lemmy/server/src/apub/fetcher.rs

159 lines
5.6 KiB
Rust
Raw Normal View History

2020-04-17 13:46:08 +00:00
use crate::api::site::SearchResponse;
2020-04-03 16:32:09 +00:00
use crate::apub::*;
use crate::db::community::{Community, CommunityForm};
2020-04-17 13:46:08 +00:00
use crate::db::community_view::CommunityView;
use crate::db::post::{Post, PostForm};
2020-04-17 13:46:08 +00:00
use crate::db::post_view::PostView;
2020-04-07 21:02:32 +00:00
use crate::db::user::{UserForm, User_};
2020-04-17 13:46:08 +00:00
use crate::db::user_view::UserView;
use crate::db::{Crud, SearchType};
use crate::routes::nodeinfo::{NodeInfo, NodeInfoWellKnown};
2020-04-17 14:39:03 +00:00
use activitystreams::collection::OrderedCollection;
2020-03-19 01:16:17 +00:00
use activitystreams::object::Page;
use activitystreams::BaseBox;
2020-04-08 11:23:59 +00:00
use diesel::result::Error::NotFound;
use diesel::PgConnection;
use failure::Error;
2020-03-28 19:41:42 +00:00
use isahc::prelude::*;
use serde::Deserialize;
use std::time::Duration;
2020-04-08 12:37:05 +00:00
use url::Url;
2020-04-17 15:33:55 +00:00
// Fetch nodeinfo metadata from a remote instance.
2020-04-17 14:39:03 +00:00
fn _fetch_node_info(domain: &str) -> Result<NodeInfo, Error> {
2020-04-08 12:37:05 +00:00
let well_known_uri = Url::parse(&format!(
2020-03-18 21:09:00 +00:00
"{}://{}/.well-known/nodeinfo",
get_apub_protocol_string(),
2020-04-17 14:39:03 +00:00
domain
2020-04-08 12:37:05 +00:00
))?;
2020-03-18 21:09:00 +00:00
let well_known = fetch_remote_object::<NodeInfoWellKnown>(&well_known_uri)?;
Ok(fetch_remote_object::<NodeInfo>(&well_known.links.href)?)
}
2020-03-18 21:09:00 +00:00
2020-04-17 13:46:08 +00:00
// TODO: move these to db
fn upsert_community(
community_form: &CommunityForm,
conn: &PgConnection,
) -> Result<Community, Error> {
let existing = Community::read_from_actor_id(conn, &community_form.actor_id);
match existing {
Err(NotFound {}) => Ok(Community::create(conn, &community_form)?),
Ok(c) => Ok(Community::update(conn, c.id, &community_form)?),
Err(e) => Err(Error::from(e)),
}
}
fn upsert_user(user_form: &UserForm, conn: &PgConnection) -> Result<User_, Error> {
let existing = User_::read_from_apub_id(conn, &user_form.actor_id);
Ok(match existing {
Err(NotFound {}) => User_::create(conn, &user_form)?,
Ok(u) => User_::update(conn, u.id, &user_form)?,
Err(e) => return Err(Error::from(e)),
})
}
fn upsert_post(post_form: &PostForm, conn: &PgConnection) -> Result<Post, Error> {
let existing = Post::read_from_apub_id(conn, &post_form.ap_id);
match existing {
Err(NotFound {}) => Ok(Post::create(conn, &post_form)?),
Ok(p) => Ok(Post::update(conn, p.id, &post_form)?),
Err(e) => Err(Error::from(e)),
}
}
2020-04-17 15:33:55 +00:00
/// Fetch any type of ActivityPub object, handling things like HTTP headers, deserialisation,
/// timeouts etc.
/// TODO: add an optional param last_updated and only fetch if its too old
2020-04-08 12:37:05 +00:00
pub fn fetch_remote_object<Response>(url: &Url) -> Result<Response, Error>
where
Response: for<'de> Deserialize<'de>,
{
2020-04-17 17:34:18 +00:00
if !is_apub_id_valid(&url.to_string()) {
return Err(format_err!("Activitypub uri invalid or blocked: {}", url));
2020-03-18 21:09:00 +00:00
}
// TODO: this function should return a future
let timeout = Duration::from_secs(60);
2020-04-08 12:37:05 +00:00
let text = Request::get(url.as_str())
.header("Accept", APUB_JSON_CONTENT_TYPE)
.connect_timeout(timeout)
.timeout(timeout)
.body(())?
.send()?
.text()?;
2020-03-20 00:42:07 +00:00
let res: Response = serde_json::from_str(&text)?;
Ok(res)
}
2020-04-17 15:33:55 +00:00
/// The types of ActivityPub objects that can be fetched directly by searching for their ID.
2020-04-17 13:46:08 +00:00
#[serde(untagged)]
2020-04-17 17:34:18 +00:00
#[derive(serde::Deserialize, Debug)]
2020-04-17 13:46:08 +00:00
pub enum SearchAcceptedObjects {
Person(Box<PersonExt>),
Group(Box<GroupExt>),
Page(Box<Page>),
}
2020-04-17 15:33:55 +00:00
/// Attempt to parse the query as URL, and fetch an ActivityPub object from it.
///
/// Some working examples for use with the docker/federation/ setup:
/// http://lemmy_alpha:8540/federation/c/main
/// http://lemmy_alpha:8540/federation/u/lemmy_alpha
/// http://lemmy_alpha:8540/federation/p/3
2020-04-17 13:46:08 +00:00
pub fn search_by_apub_id(query: &str, conn: &PgConnection) -> Result<SearchResponse, Error> {
let query_url = Url::parse(&query)?;
let mut response = SearchResponse {
type_: SearchType::All.to_string(),
comments: vec![],
posts: vec![],
communities: vec![],
users: vec![],
};
match fetch_remote_object::<SearchAcceptedObjects>(&query_url)? {
SearchAcceptedObjects::Person(p) => {
let u = upsert_user(&UserForm::from_person(&p)?, conn)?;
response.users = vec![UserView::read(conn, u.id)?];
}
SearchAcceptedObjects::Group(g) => {
let c = upsert_community(&CommunityForm::from_group(&g, conn)?, conn)?;
2020-04-17 14:39:03 +00:00
fetch_community_outbox(&c, conn)?;
2020-04-17 13:46:08 +00:00
response.communities = vec![CommunityView::read(conn, c.id, None)?];
}
SearchAcceptedObjects::Page(p) => {
let p = upsert_post(&PostForm::from_page(&p, conn)?, conn)?;
response.posts = vec![PostView::read(conn, p.id, None)?];
}
}
Ok(response)
}
2020-04-17 15:33:55 +00:00
/// Fetch all posts in the outbox of the given user, and insert them into the database.
2020-04-17 14:39:03 +00:00
fn fetch_community_outbox(community: &Community, conn: &PgConnection) -> Result<Vec<Post>, Error> {
2020-04-13 13:06:41 +00:00
let outbox_url = Url::parse(&community.get_outbox_url())?;
2020-04-08 16:39:45 +00:00
let outbox = fetch_remote_object::<OrderedCollection>(&outbox_url)?;
let items = outbox.collection_props.get_many_items_base_boxes();
2020-04-08 12:08:33 +00:00
Ok(
items
.unwrap()
.map(|obox: &BaseBox| -> Result<PostForm, Error> {
let page = obox.clone().to_concrete::<Page>()?;
PostForm::from_page(&page, conn)
})
2020-04-17 13:46:08 +00:00
.map(|pf| upsert_post(&pf?, conn))
2020-04-08 12:08:33 +00:00
.collect::<Result<Vec<Post>, Error>>()?,
)
}
2020-04-17 15:33:55 +00:00
/// Fetch a user, insert/update it in the database and return the user.
2020-04-08 12:37:05 +00:00
pub fn fetch_remote_user(apub_id: &Url, conn: &PgConnection) -> Result<User_, Error> {
2020-04-07 21:02:32 +00:00
let person = fetch_remote_object::<PersonExt>(apub_id)?;
let uf = UserForm::from_person(&person)?;
2020-04-17 13:46:08 +00:00
upsert_user(&uf, conn)
2020-04-07 21:02:32 +00:00
}
2020-04-17 13:46:08 +00:00
2020-04-17 15:33:55 +00:00
/// Fetch a community, insert/update it in the database and return the community.
2020-04-09 19:04:31 +00:00
pub fn fetch_remote_community(apub_id: &Url, conn: &PgConnection) -> Result<Community, Error> {
let group = fetch_remote_object::<GroupExt>(apub_id)?;
let cf = CommunityForm::from_group(&group, conn)?;
2020-04-17 13:46:08 +00:00
upsert_community(&cf, conn)
2020-04-09 19:04:31 +00:00
}