lemmy/server/src/apub/fetcher.rs

288 lines
9.9 KiB
Rust
Raw Normal View History

2020-05-16 14:04:08 +00:00
use activitystreams::object::Note;
use actix_web::Result;
use diesel::{result::Error::NotFound, PgConnection};
use failure::{Error, _core::fmt::Debug};
use isahc::prelude::*;
use log::debug;
use serde::Deserialize;
use std::time::Duration;
use url::Url;
use crate::{
api::site::SearchResponse,
db::{
comment::{Comment, CommentForm},
comment_view::CommentView,
community::{Community, CommunityForm, CommunityModerator, CommunityModeratorForm},
community_view::CommunityView,
post::{Post, PostForm},
post_view::PostView,
user::{UserForm, User_},
Crud,
Joinable,
SearchType,
},
naive_now,
routes::nodeinfo::{NodeInfo, NodeInfoWellKnown},
};
use crate::{
apub::{
get_apub_protocol_string,
is_apub_id_valid,
FromApub,
GroupExt,
PageExt,
PersonExt,
APUB_JSON_CONTENT_TYPE,
},
db::user_view::UserView,
};
2020-04-17 15:33:55 +00:00
// Fetch nodeinfo metadata from a remote instance.
2020-04-17 14:39:03 +00:00
fn _fetch_node_info(domain: &str) -> Result<NodeInfo, Error> {
2020-04-08 12:37:05 +00:00
let well_known_uri = Url::parse(&format!(
2020-03-18 21:09:00 +00:00
"{}://{}/.well-known/nodeinfo",
get_apub_protocol_string(),
2020-04-17 14:39:03 +00:00
domain
2020-04-08 12:37:05 +00:00
))?;
2020-03-18 21:09:00 +00:00
let well_known = fetch_remote_object::<NodeInfoWellKnown>(&well_known_uri)?;
Ok(fetch_remote_object::<NodeInfo>(&well_known.links.href)?)
}
2020-03-18 21:09:00 +00:00
2020-04-17 15:33:55 +00:00
/// Fetch any type of ActivityPub object, handling things like HTTP headers, deserialisation,
/// timeouts etc.
2020-04-08 12:37:05 +00:00
pub fn fetch_remote_object<Response>(url: &Url) -> Result<Response, Error>
where
Response: for<'de> Deserialize<'de>,
{
if !is_apub_id_valid(&url) {
2020-04-17 17:34:18 +00:00
return Err(format_err!("Activitypub uri invalid or blocked: {}", url));
2020-03-18 21:09:00 +00:00
}
// TODO: this function should return a future
let timeout = Duration::from_secs(60);
2020-04-08 12:37:05 +00:00
let text = Request::get(url.as_str())
2020-04-23 11:42:09 +00:00
.header("Accept", APUB_JSON_CONTENT_TYPE)
.connect_timeout(timeout)
.timeout(timeout)
.body(())?
.send()?
.text()?;
2020-03-20 00:42:07 +00:00
let res: Response = serde_json::from_str(&text)?;
Ok(res)
}
2020-04-17 15:33:55 +00:00
/// The types of ActivityPub objects that can be fetched directly by searching for their ID.
2020-04-17 13:46:08 +00:00
#[serde(untagged)]
2020-04-17 17:34:18 +00:00
#[derive(serde::Deserialize, Debug)]
2020-04-17 13:46:08 +00:00
pub enum SearchAcceptedObjects {
Person(Box<PersonExt>),
Group(Box<GroupExt>),
2020-05-05 00:04:48 +00:00
Page(Box<PageExt>),
2020-05-13 17:21:32 +00:00
Comment(Box<Note>),
2020-04-17 13:46:08 +00:00
}
2020-04-17 15:33:55 +00:00
/// Attempt to parse the query as URL, and fetch an ActivityPub object from it.
///
/// Some working examples for use with the docker/federation/ setup:
/// http://lemmy_alpha:8540/c/main, or !main@lemmy_alpha:8540
/// http://lemmy_alpha:8540/u/lemmy_alpha, or @lemmy_alpha@lemmy_alpha:8540
2020-05-13 17:21:32 +00:00
/// http://lemmy_alpha:8540/post/3
/// http://lemmy_alpha:8540/comment/2
2020-04-17 13:46:08 +00:00
pub fn search_by_apub_id(query: &str, conn: &PgConnection) -> Result<SearchResponse, Error> {
// Parse the shorthand query url
let query_url = if query.contains('@') {
debug!("{}", query);
let split = query.split('@').collect::<Vec<&str>>();
// User type will look like ['', username, instance]
// Community will look like [!community, instance]
let (name, instance) = if split.len() == 3 {
(format!("/u/{}", split[1]), split[2])
} else if split.len() == 2 {
if split[0].contains('!') {
let split2 = split[0].split('!').collect::<Vec<&str>>();
(format!("/c/{}", split2[1]), split[1])
} else {
return Err(format_err!("Invalid search query: {}", query));
}
} else {
return Err(format_err!("Invalid search query: {}", query));
};
let url = format!("{}://{}{}", get_apub_protocol_string(), instance, name);
Url::parse(&url)?
} else {
Url::parse(&query)?
};
2020-04-17 13:46:08 +00:00
let mut response = SearchResponse {
type_: SearchType::All.to_string(),
comments: vec![],
posts: vec![],
communities: vec![],
users: vec![],
};
match fetch_remote_object::<SearchAcceptedObjects>(&query_url)? {
SearchAcceptedObjects::Person(p) => {
2020-05-18 16:15:26 +00:00
let user_uri = p.inner.object_props.get_id().unwrap().to_string();
2020-04-24 19:55:54 +00:00
let user = get_or_fetch_and_upsert_remote_user(&user_uri, &conn)?;
2020-04-24 14:04:36 +00:00
response.users = vec![UserView::read(conn, user.id)?];
2020-04-17 13:46:08 +00:00
}
SearchAcceptedObjects::Group(g) => {
2020-05-18 16:15:26 +00:00
let community_uri = g.inner.object_props.get_id().unwrap().to_string();
2020-04-24 19:55:54 +00:00
let community = get_or_fetch_and_upsert_remote_community(&community_uri, &conn)?;
// TODO Maybe at some point in the future, fetch all the history of a community
2020-04-24 14:04:36 +00:00
// fetch_community_outbox(&c, conn)?;
response.communities = vec![CommunityView::read(conn, community.id, None)?];
2020-04-17 13:46:08 +00:00
}
2020-04-25 15:49:15 +00:00
SearchAcceptedObjects::Page(p) => {
let p = upsert_post(&PostForm::from_apub(&p, conn)?, conn)?;
response.posts = vec![PostView::read(conn, p.id, None)?];
}
2020-05-13 17:21:32 +00:00
SearchAcceptedObjects::Comment(c) => {
let post_url = c
.object_props
.get_many_in_reply_to_xsd_any_uris()
.unwrap()
.next()
.unwrap()
.to_string();
// TODO: also fetch parent comments if any
let post = fetch_remote_object(&Url::parse(&post_url)?)?;
upsert_post(&PostForm::from_apub(&post, conn)?, conn)?;
let c = upsert_comment(&CommentForm::from_apub(&c, conn)?, conn)?;
response.comments = vec![CommentView::read(conn, c.id, None)?];
}
2020-04-17 13:46:08 +00:00
}
Ok(response)
}
2020-04-24 14:04:36 +00:00
/// Check if a remote user exists, create if not found, if its too old update it.Fetch a user, insert/update it in the database and return the user.
2020-04-24 19:55:54 +00:00
pub fn get_or_fetch_and_upsert_remote_user(
apub_id: &str,
conn: &PgConnection,
) -> Result<User_, Error> {
2020-04-24 14:04:36 +00:00
match User_::read_from_actor_id(&conn, &apub_id) {
Ok(u) => {
// If its older than a day, re-fetch it
2020-05-03 14:00:59 +00:00
if !u.local
&& u
.last_refreshed_at
// TODO it won't pick up new avatars, summaries etc until a day after.
// Both user and community need an "update" action pushed to other servers
// to fix this
2020-05-03 14:00:59 +00:00
.lt(&(naive_now() - chrono::Duration::days(1)))
2020-04-24 19:55:54 +00:00
{
2020-04-24 14:04:36 +00:00
debug!("Fetching and updating from remote user: {}", apub_id);
let person = fetch_remote_object::<PersonExt>(&Url::parse(apub_id)?)?;
2020-04-24 19:55:54 +00:00
let mut uf = UserForm::from_apub(&person, &conn)?;
2020-04-24 14:04:36 +00:00
uf.last_refreshed_at = Some(naive_now());
Ok(User_::update(&conn, u.id, &uf)?)
} else {
Ok(u)
}
2020-04-24 19:55:54 +00:00
}
2020-04-24 14:04:36 +00:00
Err(NotFound {}) => {
debug!("Fetching and creating remote user: {}", apub_id);
let person = fetch_remote_object::<PersonExt>(&Url::parse(apub_id)?)?;
2020-04-24 19:55:54 +00:00
let uf = UserForm::from_apub(&person, &conn)?;
2020-04-24 14:04:36 +00:00
Ok(User_::create(conn, &uf)?)
}
Err(e) => Err(Error::from(e)),
}
2020-04-07 21:02:32 +00:00
}
2020-04-17 13:46:08 +00:00
2020-04-24 14:04:36 +00:00
/// Check if a remote community exists, create if not found, if its too old update it.Fetch a community, insert/update it in the database and return the community.
2020-04-24 19:55:54 +00:00
pub fn get_or_fetch_and_upsert_remote_community(
apub_id: &str,
conn: &PgConnection,
) -> Result<Community, Error> {
2020-04-24 14:04:36 +00:00
match Community::read_from_actor_id(&conn, &apub_id) {
Ok(c) => {
// If its older than a day, re-fetch it
2020-05-03 14:00:59 +00:00
if !c.local
&& c
.last_refreshed_at
.lt(&(naive_now() - chrono::Duration::days(1)))
2020-04-24 19:55:54 +00:00
{
2020-04-24 14:04:36 +00:00
debug!("Fetching and updating from remote community: {}", apub_id);
let group = fetch_remote_object::<GroupExt>(&Url::parse(apub_id)?)?;
2020-04-24 19:55:54 +00:00
let mut cf = CommunityForm::from_apub(&group, conn)?;
2020-04-24 14:04:36 +00:00
cf.last_refreshed_at = Some(naive_now());
Ok(Community::update(&conn, c.id, &cf)?)
} else {
Ok(c)
}
2020-04-24 19:55:54 +00:00
}
2020-04-24 14:04:36 +00:00
Err(NotFound {}) => {
debug!("Fetching and creating remote community: {}", apub_id);
let group = fetch_remote_object::<GroupExt>(&Url::parse(apub_id)?)?;
2020-04-24 19:55:54 +00:00
let cf = CommunityForm::from_apub(&group, conn)?;
2020-05-03 14:00:59 +00:00
let community = Community::create(conn, &cf)?;
// Also add the community moderators too
let creator_and_moderator_uris = group
2020-05-18 16:15:26 +00:00
.inner
2020-05-03 14:00:59 +00:00
.object_props
.get_many_attributed_to_xsd_any_uris()
.unwrap();
let creator_and_moderators = creator_and_moderator_uris
.map(|c| get_or_fetch_and_upsert_remote_user(&c.to_string(), &conn).unwrap())
.collect::<Vec<User_>>();
for mod_ in creator_and_moderators {
let community_moderator_form = CommunityModeratorForm {
community_id: community.id,
user_id: mod_.id,
};
CommunityModerator::join(&conn, &community_moderator_form)?;
}
Ok(community)
2020-04-24 14:04:36 +00:00
}
Err(e) => Err(Error::from(e)),
}
2020-04-09 19:04:31 +00:00
}
2020-04-24 19:55:54 +00:00
2020-04-25 15:49:15 +00:00
fn upsert_post(post_form: &PostForm, conn: &PgConnection) -> Result<Post, Error> {
let existing = Post::read_from_apub_id(conn, &post_form.ap_id);
match existing {
Err(NotFound {}) => Ok(Post::create(conn, &post_form)?),
Ok(p) => Ok(Post::update(conn, p.id, &post_form)?),
Err(e) => Err(Error::from(e)),
}
}
2020-05-13 17:21:32 +00:00
fn upsert_comment(comment_form: &CommentForm, conn: &PgConnection) -> Result<Comment, Error> {
let existing = Comment::read_from_apub_id(conn, &comment_form.ap_id);
match existing {
Err(NotFound {}) => Ok(Comment::create(conn, &comment_form)?),
Ok(p) => Ok(Comment::update(conn, p.id, &comment_form)?),
Err(e) => Err(Error::from(e)),
}
}
2020-04-24 19:55:54 +00:00
// TODO It should not be fetching data from a community outbox.
// All posts, comments, comment likes, etc should be posts to our community_inbox
// The only data we should be periodically fetching (if it hasn't been fetched in the last day
// maybe), is community and user actors
// and user actors
// Fetch all posts in the outbox of the given user, and insert them into the database.
// fn fetch_community_outbox(community: &Community, conn: &PgConnection) -> Result<Vec<Post>, Error> {
// let outbox_url = Url::parse(&community.get_outbox_url())?;
// let outbox = fetch_remote_object::<OrderedCollection>(&outbox_url)?;
// let items = outbox.collection_props.get_many_items_base_boxes();
// Ok(
// items
// .unwrap()
// .map(|obox: &BaseBox| -> Result<PostForm, Error> {
// let page = obox.clone().to_concrete::<Page>()?;
// PostForm::from_page(&page, conn)
// })
// .map(|pf| upsert_post(&pf?, conn))
// .collect::<Result<Vec<Post>, Error>>()?,
// )
// }