lemmy/server/src/apub/puller.rs

143 lines
4.7 KiB
Rust
Raw Normal View History

2020-04-03 16:32:09 +00:00
use crate::apub::*;
use crate::db::community::{Community, CommunityForm};
use crate::db::post::{Post, PostForm};
2020-04-07 21:02:32 +00:00
use crate::db::user::{UserForm, User_};
use crate::db::Crud;
use crate::routes::nodeinfo::{NodeInfo, NodeInfoWellKnown};
use crate::settings::Settings;
2020-03-19 01:16:17 +00:00
use activitystreams::collection::{OrderedCollection, UnorderedCollection};
use activitystreams::object::Page;
use activitystreams::BaseBox;
2020-04-08 11:23:59 +00:00
use diesel::result::Error::NotFound;
use diesel::PgConnection;
use failure::Error;
2020-03-28 19:41:42 +00:00
use isahc::prelude::*;
2020-04-08 12:08:33 +00:00
use log::warn;
use serde::Deserialize;
use std::time::Duration;
2020-04-08 12:37:05 +00:00
use url::Url;
fn fetch_node_info(domain: &str) -> Result<NodeInfo, Error> {
2020-04-08 12:37:05 +00:00
let well_known_uri = Url::parse(&format!(
2020-03-18 21:09:00 +00:00
"{}://{}/.well-known/nodeinfo",
get_apub_protocol_string(),
domain
2020-04-08 12:37:05 +00:00
))?;
2020-03-18 21:09:00 +00:00
let well_known = fetch_remote_object::<NodeInfoWellKnown>(&well_known_uri)?;
Ok(fetch_remote_object::<NodeInfo>(&well_known.links.href)?)
}
2020-03-18 21:09:00 +00:00
2020-04-07 21:02:32 +00:00
fn fetch_communities_from_instance(
2020-04-08 12:37:05 +00:00
community_list: &Url,
2020-04-07 21:02:32 +00:00
conn: &PgConnection,
2020-04-08 12:08:33 +00:00
) -> Result<Vec<Community>, Error> {
2020-04-08 12:37:05 +00:00
fetch_remote_object::<UnorderedCollection>(community_list)?
2020-04-08 12:08:33 +00:00
.collection_props
.get_many_items_base_boxes()
.unwrap()
.map(|b| -> Result<CommunityForm, Error> {
let group = b.to_owned().to_concrete::<GroupExt>()?;
Ok(CommunityForm::from_group(&group, conn)?)
})
.map(
|cf: Result<CommunityForm, Error>| -> Result<Community, Error> {
let cf2 = cf?;
let existing = Community::read_from_actor_id(conn, &cf2.actor_id);
match existing {
Err(NotFound {}) => Ok(Community::create(conn, &cf2)?),
Ok(c) => Ok(Community::update(conn, c.id, &cf2)?),
Err(e) => Err(Error::from(e)),
}
},
)
.collect()
}
2020-04-07 21:02:32 +00:00
// TODO: add an optional param last_updated and only fetch if its too old
2020-04-08 12:37:05 +00:00
pub fn fetch_remote_object<Response>(url: &Url) -> Result<Response, Error>
where
Response: for<'de> Deserialize<'de>,
{
2020-04-08 12:37:05 +00:00
if Settings::get().federation.tls_enabled && url.scheme() != "https" {
return Err(format_err!("Activitypub uri is insecure: {}", url));
2020-03-18 21:09:00 +00:00
}
// TODO: this function should return a future
let timeout = Duration::from_secs(60);
2020-04-08 12:37:05 +00:00
let text = Request::get(url.as_str())
.header("Accept", APUB_JSON_CONTENT_TYPE)
.connect_timeout(timeout)
.timeout(timeout)
.body(())?
.send()?
.text()?;
2020-03-20 00:42:07 +00:00
let res: Response = serde_json::from_str(&text)?;
Ok(res)
}
2020-04-07 21:02:32 +00:00
fn fetch_remote_community_posts(
instance: &str,
2020-04-08 12:08:33 +00:00
community: &Community,
2020-04-07 21:02:32 +00:00
conn: &PgConnection,
2020-04-08 12:08:33 +00:00
) -> Result<Vec<Post>, Error> {
2020-04-08 12:37:05 +00:00
let endpoint = Url::parse(&format!(
"http://{}/federation/c/{}",
instance, community.name
))?;
2020-04-08 12:08:33 +00:00
let group = fetch_remote_object::<GroupExt>(&endpoint)?;
2020-04-08 12:37:05 +00:00
let outbox_uri = Url::parse(&group.extension.get_outbox().to_string())?;
2020-04-08 12:08:33 +00:00
// TODO: outbox url etc should be stored in local db
2020-04-08 12:37:05 +00:00
let outbox = fetch_remote_object::<OrderedCollection>(&outbox_uri)?;
let items = outbox.collection_props.get_many_items_base_boxes();
2020-04-08 12:08:33 +00:00
Ok(
items
.unwrap()
.map(|obox: &BaseBox| -> Result<PostForm, Error> {
let page = obox.clone().to_concrete::<Page>()?;
PostForm::from_page(&page, conn)
})
.map(|pf: Result<PostForm, Error>| -> Result<Post, Error> {
let mut pf2 = pf?;
pf2.community_id = community.id;
let existing = Post::read_from_apub_id(conn, &pf2.ap_id);
match existing {
Err(NotFound {}) => Ok(Post::create(conn, &pf2)?),
Ok(p) => Ok(Post::update(conn, p.id, &pf2)?),
Err(e) => Err(Error::from(e)),
}
})
.collect::<Result<Vec<Post>, Error>>()?,
)
}
2020-04-08 12:37:05 +00:00
pub fn fetch_remote_user(apub_id: &Url, conn: &PgConnection) -> Result<User_, Error> {
2020-04-07 21:02:32 +00:00
let person = fetch_remote_object::<PersonExt>(apub_id)?;
let uf = UserForm::from_person(&person)?;
let existing = User_::read_from_apub_id(conn, &uf.actor_id);
Ok(match existing {
2020-04-08 11:23:59 +00:00
Err(NotFound {}) => User_::create(conn, &uf)?,
2020-04-07 21:02:32 +00:00
Ok(u) => User_::update(conn, u.id, &uf)?,
2020-04-08 11:23:59 +00:00
Err(e) => return Err(Error::from(e)),
2020-04-07 21:02:32 +00:00
})
}
// TODO: in the future, this should only be done when an instance is followed for the first time
// after that, we should rely in the inbox, and fetch on demand when needed
pub fn fetch_all(conn: &PgConnection) -> Result<(), Error> {
for instance in &get_following_instances() {
2020-04-08 12:08:33 +00:00
let node_info = fetch_node_info(instance)?;
2020-04-08 12:37:05 +00:00
if let Some(community_list) = node_info.metadata.community_list_url {
let communities = fetch_communities_from_instance(&community_list, conn)?;
2020-04-08 12:08:33 +00:00
for c in communities {
fetch_remote_community_posts(instance, &c, conn)?;
}
2020-04-08 12:08:33 +00:00
} else {
warn!(
"{} is not a Lemmy instance, federation is not supported",
instance
);
}
}
Ok(())
}