lemmy/server/src/apub/puller.rs

119 lines
4 KiB
Rust
Raw Normal View History

2020-04-03 16:32:09 +00:00
use crate::apub::*;
use crate::db::community::{Community, CommunityForm};
use crate::db::post::{Post, PostForm};
use crate::db::Crud;
use crate::routes::nodeinfo::{NodeInfo, NodeInfoWellKnown};
use crate::settings::Settings;
2020-03-19 01:16:17 +00:00
use activitystreams::collection::{OrderedCollection, UnorderedCollection};
use activitystreams::object::Page;
use activitystreams::BaseBox;
use diesel::PgConnection;
use failure::Error;
2020-03-28 19:41:42 +00:00
use isahc::prelude::*;
use serde::Deserialize;
use std::time::Duration;
fn fetch_node_info(domain: &str) -> Result<NodeInfo, Error> {
2020-03-18 21:09:00 +00:00
let well_known_uri = format!(
"{}://{}/.well-known/nodeinfo",
get_apub_protocol_string(),
domain
);
let well_known = fetch_remote_object::<NodeInfoWellKnown>(&well_known_uri)?;
Ok(fetch_remote_object::<NodeInfo>(&well_known.links.href)?)
}
2020-03-18 21:09:00 +00:00
fn fetch_communities_from_instance(domain: &str) -> Result<Vec<CommunityForm>, Error> {
let node_info = fetch_node_info(domain)?;
if let Some(community_list_url) = node_info.metadata.community_list_url {
let collection = fetch_remote_object::<UnorderedCollection>(&community_list_url)?;
let object_boxes = collection
.collection_props
.get_many_items_base_boxes()
.unwrap();
let communities: Result<Vec<CommunityForm>, Error> = object_boxes
.map(|c| {
let group = c.to_owned().to_concrete::<GroupExt>()?;
CommunityForm::from_group(&group)
})
.collect();
Ok(communities?)
} else {
Err(format_err!(
"{} is not a Lemmy instance, federation is not supported",
domain
))
}
}
pub fn fetch_remote_object<Response>(uri: &str) -> Result<Response, Error>
where
Response: for<'de> Deserialize<'de>,
{
if Settings::get().federation.tls_enabled && !uri.starts_with("https://") {
2020-03-18 21:09:00 +00:00
return Err(format_err!("Activitypub uri is insecure: {}", uri));
}
// TODO: should cache responses here when we are in production
// TODO: this function should return a future
let timeout = Duration::from_secs(60);
let text = Request::get(uri)
.header("Accept", APUB_JSON_CONTENT_TYPE)
.connect_timeout(timeout)
.timeout(timeout)
.body(())?
.send()?
.text()?;
2020-03-20 00:42:07 +00:00
let res: Response = serde_json::from_str(&text)?;
Ok(res)
}
fn fetch_remote_community_posts(instance: &str, community: &str) -> Result<Vec<PostForm>, Error> {
let endpoint = format!("http://{}/federation/c/{}", instance, community);
let community = fetch_remote_object::<GroupExt>(&endpoint)?;
2020-03-19 01:16:17 +00:00
let outbox_uri = &community.extension.get_outbox().to_string();
let outbox = fetch_remote_object::<OrderedCollection>(outbox_uri)?;
let items = outbox.collection_props.get_many_items_base_boxes();
let posts = items
.unwrap()
.map(|obox: &BaseBox| {
let page = obox.clone().to_concrete::<Page>().unwrap();
PostForm::from_page(&page)
})
.collect::<Result<Vec<PostForm>, Error>>()?;
Ok(posts)
}
// TODO: in the future, this should only be done when an instance is followed for the first time
// after that, we should rely in the inbox, and fetch on demand when needed
pub fn fetch_all(conn: &PgConnection) -> Result<(), Error> {
for instance in &get_following_instances() {
let communities = fetch_communities_from_instance(instance)?;
for community in &communities {
let existing = Community::read_from_actor_id(conn, &community.actor_id);
let community_id = match existing {
// TODO: should make sure that this is actually a `NotFound` error
Err(_) => Community::create(conn, community)?.id,
Ok(c) => Community::update(conn, c.id, community)?.id,
};
let mut posts = fetch_remote_community_posts(instance, &community.name)?;
for post_ in &mut posts {
post_.community_id = community_id;
let existing = Post::read_from_apub_id(conn, &post_.ap_id);
match existing {
// TODO: should make sure that this is actually a `NotFound` error
Err(_) => {
Post::create(conn, post_)?;
}
Ok(p) => {
Post::update(conn, p.id, post_)?;
}
}
}
}
}
Ok(())
}