2021-11-16 17:03:09 +00:00
|
|
|
use crate::fetcher::post_or_comment::PostOrComment;
|
2023-11-21 09:26:12 +00:00
|
|
|
use activitypub_federation::{
|
|
|
|
config::{Data, UrlVerifier},
|
|
|
|
error::Error as ActivityPubError,
|
|
|
|
};
|
2022-10-28 13:38:22 +00:00
|
|
|
use async_trait::async_trait;
|
2022-11-28 14:29:33 +00:00
|
|
|
use lemmy_api_common::context::LemmyContext;
|
2022-10-27 09:24:07 +00:00
|
|
|
use lemmy_db_schema::{
|
2023-07-14 15:17:06 +00:00
|
|
|
source::{activity::ReceivedActivity, instance::Instance, local_site::LocalSite},
|
2023-07-11 13:09:59 +00:00
|
|
|
utils::{ActualDbPool, DbPool},
|
2022-06-22 20:24:54 +00:00
|
|
|
};
|
2024-04-03 21:38:31 +00:00
|
|
|
use lemmy_utils::{
|
|
|
|
error::{LemmyError, LemmyErrorType, LemmyResult},
|
2024-04-09 14:10:20 +00:00
|
|
|
CACHE_DURATION_FEDERATION,
|
2024-04-03 21:38:31 +00:00
|
|
|
};
|
2023-07-05 15:08:02 +00:00
|
|
|
use moka::future::Cache;
|
2024-01-05 14:42:46 +00:00
|
|
|
use serde_json::Value;
|
2024-07-30 14:11:39 +00:00
|
|
|
use std::sync::{Arc, LazyLock};
|
2022-11-26 20:47:13 +00:00
|
|
|
use url::Url;
|
2021-11-16 17:03:09 +00:00
|
|
|
|
2020-10-12 14:10:09 +00:00
|
|
|
pub mod activities;
|
2023-09-09 16:25:03 +00:00
|
|
|
pub mod activity_lists;
|
2022-11-28 14:29:33 +00:00
|
|
|
pub mod api;
|
2021-10-27 16:03:07 +00:00
|
|
|
pub(crate) mod collections;
|
2020-04-10 11:37:35 +00:00
|
|
|
pub mod fetcher;
|
2021-07-17 16:20:44 +00:00
|
|
|
pub mod http;
|
2021-11-15 22:54:25 +00:00
|
|
|
pub(crate) mod mentions;
|
2020-10-12 14:10:09 +00:00
|
|
|
pub mod objects;
|
2021-10-29 10:32:42 +00:00
|
|
|
pub mod protocol;
|
2020-04-24 14:04:36 +00:00
|
|
|
|
2024-05-15 03:03:43 +00:00
|
|
|
/// Maximum number of outgoing HTTP requests to fetch a single object. Needs to be high enough
|
|
|
|
/// to fetch a new community with posts, moderators and featured posts.
|
|
|
|
pub const FEDERATION_HTTP_FETCH_LIMIT: u32 = 100;
|
2022-11-21 16:44:34 +00:00
|
|
|
|
2024-01-05 14:42:46 +00:00
|
|
|
/// Only include a basic context to save space and bandwidth. The main context is hosted statically
|
|
|
|
/// on join-lemmy.org. Include activitystreams explicitly for better compat, but this could
|
|
|
|
/// theoretically also be moved.
|
2024-07-30 14:11:39 +00:00
|
|
|
pub static FEDERATION_CONTEXT: LazyLock<Value> = LazyLock::new(|| {
|
2024-01-05 14:42:46 +00:00
|
|
|
Value::Array(vec![
|
|
|
|
Value::String("https://join-lemmy.org/context.json".to_string()),
|
|
|
|
Value::String("https://www.w3.org/ns/activitystreams".to_string()),
|
|
|
|
])
|
2022-06-02 14:33:41 +00:00
|
|
|
});
|
|
|
|
|
2022-10-28 13:38:22 +00:00
|
|
|
#[derive(Clone)]
|
2023-07-11 13:09:59 +00:00
|
|
|
pub struct VerifyUrlData(pub ActualDbPool);
|
2022-10-28 13:38:22 +00:00
|
|
|
|
|
|
|
#[async_trait]
|
|
|
|
impl UrlVerifier for VerifyUrlData {
|
2023-11-21 09:26:12 +00:00
|
|
|
async fn verify(&self, url: &Url) -> Result<(), ActivityPubError> {
|
2023-07-11 13:09:59 +00:00
|
|
|
let local_site_data = local_site_data_cached(&mut (&self.0).into())
|
2022-10-28 13:38:22 +00:00
|
|
|
.await
|
|
|
|
.expect("read local site data");
|
2023-07-25 17:26:54 +00:00
|
|
|
check_apub_id_valid(url, &local_site_data).map_err(|err| match err {
|
|
|
|
LemmyError {
|
|
|
|
error_type: LemmyErrorType::FederationDisabled,
|
|
|
|
..
|
2023-11-21 09:26:12 +00:00
|
|
|
} => ActivityPubError::Other("Federation disabled".into()),
|
2023-07-25 17:26:54 +00:00
|
|
|
LemmyError {
|
2023-09-06 17:29:15 +00:00
|
|
|
error_type: LemmyErrorType::DomainBlocked(domain),
|
2023-07-25 17:26:54 +00:00
|
|
|
..
|
2023-11-21 09:26:12 +00:00
|
|
|
} => ActivityPubError::Other(format!("Domain {domain:?} is blocked")),
|
2023-07-25 17:26:54 +00:00
|
|
|
LemmyError {
|
2023-09-06 17:29:15 +00:00
|
|
|
error_type: LemmyErrorType::DomainNotInAllowList(domain),
|
2023-07-25 17:26:54 +00:00
|
|
|
..
|
2023-11-21 09:26:12 +00:00
|
|
|
} => ActivityPubError::Other(format!("Domain {domain:?} is not in allowlist")),
|
|
|
|
_ => ActivityPubError::Other("Failed validating apub id".into()),
|
2023-07-25 17:26:54 +00:00
|
|
|
})?;
|
2023-03-21 15:03:05 +00:00
|
|
|
Ok(())
|
2022-10-28 13:38:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-19 14:29:35 +00:00
|
|
|
/// Checks if the ID is allowed for sending or receiving.
|
|
|
|
///
|
|
|
|
/// In particular, it checks for:
|
|
|
|
/// - federation being enabled (if its disabled, only local URLs are allowed)
|
|
|
|
/// - the correct scheme (either http or https)
|
|
|
|
/// - URL being in the allowlist (if it is active)
|
|
|
|
/// - URL not being in the blocklist (if it is active)
|
2023-03-21 15:03:05 +00:00
|
|
|
#[tracing::instrument(skip(local_site_data))]
|
2024-04-10 14:14:11 +00:00
|
|
|
fn check_apub_id_valid(apub_id: &Url, local_site_data: &LocalSiteData) -> LemmyResult<()> {
|
2024-04-30 10:33:37 +00:00
|
|
|
let domain = apub_id
|
|
|
|
.domain()
|
|
|
|
.ok_or(LemmyErrorType::UrlWithoutDomain)?
|
|
|
|
.to_string();
|
2020-08-18 13:12:03 +00:00
|
|
|
|
2022-10-27 09:24:07 +00:00
|
|
|
if !local_site_data
|
|
|
|
.local_site
|
|
|
|
.as_ref()
|
|
|
|
.map(|l| l.federation_enabled)
|
|
|
|
.unwrap_or(true)
|
|
|
|
{
|
2023-08-31 13:01:08 +00:00
|
|
|
Err(LemmyErrorType::FederationDisabled)?
|
2020-10-22 16:12:43 +00:00
|
|
|
}
|
|
|
|
|
2023-04-21 21:41:03 +00:00
|
|
|
if local_site_data
|
|
|
|
.blocked_instances
|
|
|
|
.iter()
|
2023-08-22 15:10:21 +00:00
|
|
|
.any(|i| domain.to_lowercase().eq(&i.domain.to_lowercase()))
|
2023-04-21 21:41:03 +00:00
|
|
|
{
|
2023-08-31 13:01:08 +00:00
|
|
|
Err(LemmyErrorType::DomainBlocked(domain.clone()))?
|
2021-04-21 13:36:07 +00:00
|
|
|
}
|
2021-03-01 17:24:11 +00:00
|
|
|
|
2023-04-21 21:41:03 +00:00
|
|
|
// Only check this if there are instances in the allowlist
|
|
|
|
if !local_site_data.allowed_instances.is_empty()
|
|
|
|
&& !local_site_data
|
|
|
|
.allowed_instances
|
|
|
|
.iter()
|
2023-08-22 15:10:21 +00:00
|
|
|
.any(|i| domain.to_lowercase().eq(&i.domain.to_lowercase()))
|
2023-04-21 21:41:03 +00:00
|
|
|
{
|
2023-08-31 13:01:08 +00:00
|
|
|
Err(LemmyErrorType::DomainNotInAllowList(domain))?
|
2022-06-02 14:33:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2022-10-27 09:24:07 +00:00
|
|
|
#[derive(Clone)]
|
|
|
|
pub(crate) struct LocalSiteData {
|
|
|
|
local_site: Option<LocalSite>,
|
2023-04-21 21:41:03 +00:00
|
|
|
allowed_instances: Vec<Instance>,
|
|
|
|
blocked_instances: Vec<Instance>,
|
2022-10-27 09:24:07 +00:00
|
|
|
}
|
|
|
|
|
2023-07-11 13:09:59 +00:00
|
|
|
pub(crate) async fn local_site_data_cached(
|
|
|
|
pool: &mut DbPool<'_>,
|
|
|
|
) -> LemmyResult<Arc<LocalSiteData>> {
|
2024-04-03 21:38:31 +00:00
|
|
|
// All incoming and outgoing federation actions read the blocklist/allowlist and slur filters
|
|
|
|
// multiple times. This causes a huge number of database reads if we hit the db directly. So we
|
|
|
|
// cache these values for a short time, which will already make a huge difference and ensures that
|
|
|
|
// changes take effect quickly.
|
2024-07-30 14:11:39 +00:00
|
|
|
static CACHE: LazyLock<Cache<(), Arc<LocalSiteData>>> = LazyLock::new(|| {
|
2023-07-05 15:08:02 +00:00
|
|
|
Cache::builder()
|
|
|
|
.max_capacity(1)
|
2024-04-09 14:10:20 +00:00
|
|
|
.time_to_live(CACHE_DURATION_FEDERATION)
|
2023-07-05 15:08:02 +00:00
|
|
|
.build()
|
|
|
|
});
|
|
|
|
Ok(
|
|
|
|
CACHE
|
|
|
|
.try_get_with((), async {
|
2023-07-11 13:09:59 +00:00
|
|
|
let (local_site, allowed_instances, blocked_instances) =
|
|
|
|
lemmy_db_schema::try_join_with_pool!(pool => (
|
|
|
|
// LocalSite may be missing
|
|
|
|
|pool| async {
|
|
|
|
Ok(LocalSite::read(pool).await.ok())
|
|
|
|
},
|
|
|
|
Instance::allowlist,
|
|
|
|
Instance::blocklist
|
|
|
|
))?;
|
2023-07-05 15:08:02 +00:00
|
|
|
|
|
|
|
Ok::<_, diesel::result::Error>(Arc::new(LocalSiteData {
|
2023-07-11 13:09:59 +00:00
|
|
|
local_site,
|
|
|
|
allowed_instances,
|
|
|
|
blocked_instances,
|
2023-07-05 15:08:02 +00:00
|
|
|
}))
|
|
|
|
})
|
|
|
|
.await?,
|
|
|
|
)
|
2022-10-27 09:24:07 +00:00
|
|
|
}
|
|
|
|
|
2023-07-05 15:08:02 +00:00
|
|
|
pub(crate) async fn check_apub_id_valid_with_strictness(
|
2022-06-02 14:33:41 +00:00
|
|
|
apub_id: &Url,
|
|
|
|
is_strict: bool,
|
2023-07-05 15:08:02 +00:00
|
|
|
context: &LemmyContext,
|
2024-04-10 14:14:11 +00:00
|
|
|
) -> LemmyResult<()> {
|
2024-04-30 10:33:37 +00:00
|
|
|
let domain = apub_id
|
|
|
|
.domain()
|
|
|
|
.ok_or(LemmyErrorType::UrlWithoutDomain)?
|
|
|
|
.to_string();
|
2023-07-05 15:08:02 +00:00
|
|
|
let local_instance = context
|
|
|
|
.settings()
|
2022-06-02 14:33:41 +00:00
|
|
|
.get_hostname_without_port()
|
|
|
|
.expect("local hostname is valid");
|
|
|
|
if domain == local_instance {
|
|
|
|
return Ok(());
|
|
|
|
}
|
2023-07-05 15:08:02 +00:00
|
|
|
|
2023-07-11 13:09:59 +00:00
|
|
|
let local_site_data = local_site_data_cached(&mut context.pool()).await?;
|
2023-07-25 17:26:54 +00:00
|
|
|
check_apub_id_valid(apub_id, &local_site_data)?;
|
2022-06-02 14:33:41 +00:00
|
|
|
|
2023-04-21 21:41:03 +00:00
|
|
|
// Only check allowlist if this is a community, and there are instances in the allowlist
|
|
|
|
if is_strict && !local_site_data.allowed_instances.is_empty() {
|
|
|
|
// need to allow this explicitly because apub receive might contain objects from our local
|
|
|
|
// instance.
|
|
|
|
let mut allowed_and_local = local_site_data
|
|
|
|
.allowed_instances
|
|
|
|
.iter()
|
|
|
|
.map(|i| i.domain.clone())
|
|
|
|
.collect::<Vec<String>>();
|
2023-07-05 15:08:02 +00:00
|
|
|
let local_instance = context
|
|
|
|
.settings()
|
2023-04-21 21:41:03 +00:00
|
|
|
.get_hostname_without_port()
|
|
|
|
.expect("local hostname is valid");
|
|
|
|
allowed_and_local.push(local_instance);
|
|
|
|
|
2024-04-30 10:33:37 +00:00
|
|
|
let domain = apub_id
|
|
|
|
.domain()
|
|
|
|
.ok_or(LemmyErrorType::UrlWithoutDomain)?
|
|
|
|
.to_string();
|
2023-04-21 21:41:03 +00:00
|
|
|
if !allowed_and_local.contains(&domain) {
|
2023-08-31 13:01:08 +00:00
|
|
|
Err(LemmyErrorType::FederationDisabledByStrictAllowList)?
|
2020-08-13 20:26:49 +00:00
|
|
|
}
|
2020-04-17 17:34:18 +00:00
|
|
|
}
|
2021-04-21 13:36:07 +00:00
|
|
|
Ok(())
|
2020-04-17 17:34:18 +00:00
|
|
|
}
|
2020-04-24 19:55:54 +00:00
|
|
|
|
2023-07-14 15:17:06 +00:00
|
|
|
/// Store received activities in the database.
|
2023-03-21 15:03:05 +00:00
|
|
|
///
|
2024-04-17 12:35:54 +00:00
|
|
|
/// This ensures that the same activity doesn't get received and processed more than once, which
|
2023-07-14 15:17:06 +00:00
|
|
|
/// would be a waste of resources.
|
|
|
|
#[tracing::instrument(skip(data))]
|
2024-04-10 14:14:11 +00:00
|
|
|
async fn insert_received_activity(ap_id: &Url, data: &Data<LemmyContext>) -> LemmyResult<()> {
|
2023-07-14 15:17:06 +00:00
|
|
|
ReceivedActivity::create(&mut data.pool(), &ap_id.clone().into()).await?;
|
2023-03-21 15:03:05 +00:00
|
|
|
Ok(())
|
2022-06-02 14:33:41 +00:00
|
|
|
}
|