lemmy/crates/federate/src/util.rs
phiresky 375d9a2a3c
Persistent, performant, reliable federation queue (#3605)
* persistent activity queue

* fixes

* fixes

* make federation workers function callable from outside

* log federation instances

* dead instance detection not needed here

* taplo fmt

* split federate bin/lib

* minor fix

* better logging

* log

* create struct to hold cancellable task for readability

* use boxfuture for readability

* reset submodule

* fix

* fix lint

* swap

* remove json column, use separate array columns instead

* some review comments

* make worker a struct for readability

* minor readability

* add local filter to community follower view

* remove separate lemmy_federate entry point

* fix remaining duration

* address review comments mostly

* fix lint

* upgrade actitypub-fed to simpler interface

* fix sql format

* increase delays a bit

* fixes after merge

* remove selectable

* fix instance selectable

* add comment

* start federation based on latest id at the time

* rename federate process args

* dead instances in one query

* filter follow+report activities by local

* remove synchronous federation

remove activity sender queue

* lint

* fix federation tests by waiting for results to change

* fix fed test

* fix comment report

* wait some more

* Apply suggestions from code review

Co-authored-by: SorteKanin <sortekanin@gmail.com>

* fix most remaining tests

* wait until private messages

* fix community tests

* fix community tests

* move arg parse

* use instance_id instead of domain in federation_queue_state table

---------

Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: SorteKanin <sortekanin@gmail.com>
2023-09-09 12:25:03 -04:00

198 lines
6.3 KiB
Rust

use anyhow::{anyhow, Context, Result};
use diesel::{
prelude::*,
sql_types::{Bool, Int8},
};
use diesel_async::RunQueryDsl;
use lemmy_apub::{
activity_lists::SharedInboxActivities,
fetcher::{site_or_community_or_user::SiteOrCommunityOrUser, user_or_community::UserOrCommunity},
};
use lemmy_db_schema::{
source::{
activity::{ActorType, SentActivity},
community::Community,
person::Person,
site::Site,
},
traits::ApubActor,
utils::{get_conn, DbPool},
};
use moka::future::Cache;
use once_cell::sync::Lazy;
use reqwest::Url;
use serde_json::Value;
use std::{
future::Future,
pin::Pin,
sync::{Arc, RwLock},
time::Duration,
};
use tokio::{task::JoinHandle, time::sleep};
use tokio_util::sync::CancellationToken;
pub struct CancellableTask<R: Send + 'static> {
f: Pin<Box<dyn Future<Output = Result<R, anyhow::Error>> + Send + 'static>>,
ended: Arc<RwLock<bool>>,
}
impl<R: Send + 'static> CancellableTask<R> {
/// spawn a task but with graceful shutdown
pub fn spawn<F>(
timeout: Duration,
task: impl FnOnce(CancellationToken) -> F,
) -> CancellableTask<R>
where
F: Future<Output = Result<R>> + Send + 'static,
{
let stop = CancellationToken::new();
let task = task(stop.clone());
let ended = Arc::new(RwLock::new(false));
let ended_write = ended.clone();
let task: JoinHandle<Result<R>> = tokio::spawn(async move {
match task.await {
Ok(o) => Ok(o),
Err(e) => {
*ended_write.write().expect("poisoned") = true;
Err(e)
}
}
});
let abort = task.abort_handle();
CancellableTask {
ended,
f: Box::pin(async move {
stop.cancel();
tokio::select! {
r = task => {
Ok(r.context("could not join")??)
},
_ = sleep(timeout) => {
abort.abort();
tracing::warn!("Graceful shutdown timed out, aborting task");
Err(anyhow!("task aborted due to timeout"))
}
}
}),
}
}
/// cancel the cancel signal, wait for timeout for the task to stop gracefully, otherwise abort it
pub async fn cancel(self) -> Result<R, anyhow::Error> {
self.f.await
}
pub fn has_ended(&self) -> bool {
*self.ended.read().expect("poisoned")
}
}
/// assuming apub priv key and ids are immutable, then we don't need to have TTL
/// TODO: capacity should be configurable maybe based on memory use
pub(crate) async fn get_actor_cached(
pool: &mut DbPool<'_>,
actor_type: ActorType,
actor_apub_id: &Url,
) -> Result<Arc<SiteOrCommunityOrUser>> {
static CACHE: Lazy<Cache<Url, Arc<SiteOrCommunityOrUser>>> =
Lazy::new(|| Cache::builder().max_capacity(10000).build());
CACHE
.try_get_with(actor_apub_id.clone(), async {
let url = actor_apub_id.clone().into();
let person = match actor_type {
ActorType::Site => SiteOrCommunityOrUser::Site(
Site::read_from_apub_id(pool, &url)
.await?
.context("apub site not found")?
.into(),
),
ActorType::Community => SiteOrCommunityOrUser::UserOrCommunity(UserOrCommunity::Community(
Community::read_from_apub_id(pool, &url)
.await?
.context("apub community not found")?
.into(),
)),
ActorType::Person => SiteOrCommunityOrUser::UserOrCommunity(UserOrCommunity::User(
Person::read_from_apub_id(pool, &url)
.await?
.context("apub person not found")?
.into(),
)),
};
Result::<_, anyhow::Error>::Ok(Arc::new(person))
})
.await
.map_err(|e| anyhow::anyhow!("err getting actor {actor_type:?} {actor_apub_id}: {e:?}"))
}
/// this should maybe be a newtype like all the other PersonId CommunityId etc.
pub(crate) type ActivityId = i64;
type CachedActivityInfo = Option<Arc<(SentActivity, SharedInboxActivities)>>;
/// activities are immutable so cache does not need to have TTL
/// May return None if the corresponding id does not exist or is a received activity.
/// Holes in serials are expected behaviour in postgresql
/// todo: cache size should probably be configurable / dependent on desired memory usage
pub(crate) async fn get_activity_cached(
pool: &mut DbPool<'_>,
activity_id: ActivityId,
) -> Result<CachedActivityInfo> {
static ACTIVITIES: Lazy<Cache<ActivityId, CachedActivityInfo>> =
Lazy::new(|| Cache::builder().max_capacity(10000).build());
ACTIVITIES
.try_get_with(activity_id, async {
let row = SentActivity::read(pool, activity_id)
.await
.optional()
.context("could not read activity")?;
let Some(mut row) = row else {
return anyhow::Result::<_, anyhow::Error>::Ok(None);
};
// swap to avoid cloning
let mut data = Value::Null;
std::mem::swap(&mut row.data, &mut data);
let activity_actual: SharedInboxActivities = serde_json::from_value(data)?;
Ok(Some(Arc::new((row, activity_actual))))
})
.await
.map_err(|e| anyhow::anyhow!("err getting activity: {e:?}"))
}
/// return the most current activity id (with 1 second cache)
pub(crate) async fn get_latest_activity_id(pool: &mut DbPool<'_>) -> Result<ActivityId> {
static CACHE: Lazy<Cache<(), ActivityId>> = Lazy::new(|| {
Cache::builder()
.time_to_live(Duration::from_secs(1))
.build()
});
CACHE
.try_get_with((), async {
let conn = &mut get_conn(pool).await?;
let seq: Sequence =
diesel::sql_query("select last_value, is_called from sent_activity_id_seq")
.get_result(conn)
.await?;
let latest_id = if seq.is_called {
seq.last_value as ActivityId
} else {
// if a PG sequence has never been used, last_value will actually be next_value
(seq.last_value - 1) as ActivityId
};
anyhow::Result::<_, anyhow::Error>::Ok(latest_id as ActivityId)
})
.await
.map_err(|e| anyhow::anyhow!("err getting id: {e:?}"))
}
/// how long to sleep based on how many retries have already happened
pub(crate) fn retry_sleep_duration(retry_count: i32) -> Duration {
Duration::from_secs_f64(10.0 * 2.0_f64.powf(f64::from(retry_count)))
}
#[derive(QueryableByName)]
struct Sequence {
#[diesel(sql_type = Int8)]
last_value: i64, // this value is bigint for some reason even if sequence is int4
#[diesel(sql_type = Bool)]
is_called: bool,
}