Fix import blocked objects (#4712)

* Allow importing partial backup (fixes #4672)

* Fetch blocked objects if not known locally (fixes #4669)

* extract helper fn

* add comment

* cleanup

* remove test

* fmt

* remove .ok()
This commit is contained in:
Nutomic 2024-05-15 05:03:43 +02:00 committed by GitHub
parent 7fb03c502e
commit 9a9d518153
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 117 additions and 125 deletions

View file

@ -4,9 +4,10 @@ use crate::objects::{
person::ApubPerson, person::ApubPerson,
post::ApubPost, post::ApubPost,
}; };
use activitypub_federation::{config::Data, fetch::object_id::ObjectId}; use activitypub_federation::{config::Data, fetch::object_id::ObjectId, traits::Object};
use actix_web::web::Json; use actix_web::web::Json;
use futures::{future::try_join_all, StreamExt}; use futures::{future::try_join_all, StreamExt};
use itertools::Itertools;
use lemmy_api_common::{context::LemmyContext, SuccessResponse}; use lemmy_api_common::{context::LemmyContext, SuccessResponse};
use lemmy_db_schema::{ use lemmy_db_schema::{
newtypes::DbUrl, newtypes::DbUrl,
@ -30,8 +31,11 @@ use lemmy_utils::{
spawn_try_task, spawn_try_task,
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::future::Future;
use tracing::info; use tracing::info;
const PARALLELISM: usize = 10;
/// Backup of user data. This struct should never be changed so that the data can be used as a /// Backup of user data. This struct should never be changed so that the data can be used as a
/// long-term backup in case the instance goes down unexpectedly. All fields are optional to allow /// long-term backup in case the instance goes down unexpectedly. All fields are optional to allow
/// importing partial backups. /// importing partial backups.
@ -167,141 +171,91 @@ pub async fn import_settings(
} }
spawn_try_task(async move { spawn_try_task(async move {
const PARALLELISM: usize = 10;
let person_id = local_user_view.person.id; let person_id = local_user_view.person.id;
// These tasks fetch objects from remote instances which might be down.
// TODO: Would be nice if we could send a list of failed items with api response, but then
// the request would likely timeout.
let mut failed_items = vec![];
info!( info!(
"Starting settings backup for {}", "Starting settings import for {}",
local_user_view.person.name local_user_view.person.name
); );
futures::stream::iter( let failed_followed_communities = fetch_and_import(
data data.followed_communities.clone(),
.followed_communities &context,
.clone() |(followed, context)| async move {
.into_iter() let community = followed.dereference(&context).await?;
// reset_request_count works like clone, and is necessary to avoid running into request limit let form = CommunityFollowerForm {
.map(|f| (f, context.reset_request_count())) person_id,
.map(|(followed, context)| async move { community_id: community.id,
// need to reset outgoing request count to avoid running into limit pending: true,
let community = followed.dereference(&context).await?; };
let form = CommunityFollowerForm { CommunityFollower::follow(&mut context.pool(), &form).await?;
person_id, LemmyResult::Ok(())
community_id: community.id, },
pending: true,
};
CommunityFollower::follow(&mut context.pool(), &form).await?;
LemmyResult::Ok(())
}),
) )
.buffer_unordered(PARALLELISM)
.collect::<Vec<_>>()
.await
.into_iter()
.enumerate()
.for_each(|(i, r)| {
if let Err(e) = r {
failed_items.push(data.followed_communities.get(i).map(|u| u.inner().clone()));
info!("Failed to import followed community: {e}");
}
});
futures::stream::iter(
data
.saved_posts
.clone()
.into_iter()
.map(|s| (s, context.reset_request_count()))
.map(|(saved, context)| async move {
let post = saved.dereference(&context).await?;
let form = PostSavedForm {
person_id,
post_id: post.id,
};
PostSaved::save(&mut context.pool(), &form).await?;
LemmyResult::Ok(())
}),
)
.buffer_unordered(PARALLELISM)
.collect::<Vec<_>>()
.await
.into_iter()
.enumerate()
.for_each(|(i, r)| {
if let Err(e) = r {
failed_items.push(data.followed_communities.get(i).map(|u| u.inner().clone()));
info!("Failed to import saved post community: {e}");
}
});
futures::stream::iter(
data
.saved_comments
.clone()
.into_iter()
.map(|s| (s, context.reset_request_count()))
.map(|(saved, context)| async move {
let comment = saved.dereference(&context).await?;
let form = CommentSavedForm {
person_id,
comment_id: comment.id,
};
CommentSaved::save(&mut context.pool(), &form).await?;
LemmyResult::Ok(())
}),
)
.buffer_unordered(PARALLELISM)
.collect::<Vec<_>>()
.await
.into_iter()
.enumerate()
.for_each(|(i, r)| {
if let Err(e) = r {
failed_items.push(data.followed_communities.get(i).map(|u| u.inner().clone()));
info!("Failed to import saved comment community: {e}");
}
});
let failed_items: Vec<_> = failed_items.into_iter().flatten().collect();
info!(
"Finished settings backup for {}, failed items: {:#?}",
local_user_view.person.name, failed_items
);
// These tasks don't connect to any remote instances but only insert directly in the database.
// That means the only error condition are db connection failures, so no extra error handling is
// needed.
try_join_all(data.blocked_communities.iter().map(|blocked| async {
// dont fetch unknown blocked objects from home server
let community = blocked.dereference_local(&context).await?;
let form = CommunityBlockForm {
person_id,
community_id: community.id,
};
CommunityBlock::block(&mut context.pool(), &form).await?;
LemmyResult::Ok(())
}))
.await?; .await?;
try_join_all(data.blocked_users.iter().map(|blocked| async { let failed_saved_posts = fetch_and_import(
// dont fetch unknown blocked objects from home server data.saved_posts.clone(),
let target = blocked.dereference_local(&context).await?; &context,
let form = PersonBlockForm { |(saved, context)| async move {
person_id, let post = saved.dereference(&context).await?;
target_id: target.id, let form = PostSavedForm {
}; person_id,
PersonBlock::block(&mut context.pool(), &form).await?; post_id: post.id,
LemmyResult::Ok(()) };
})) PostSaved::save(&mut context.pool(), &form).await?;
LemmyResult::Ok(())
},
)
.await?;
let failed_saved_comments = fetch_and_import(
data.saved_comments.clone(),
&context,
|(saved, context)| async move {
let comment = saved.dereference(&context).await?;
let form = CommentSavedForm {
person_id,
comment_id: comment.id,
};
CommentSaved::save(&mut context.pool(), &form).await?;
LemmyResult::Ok(())
},
)
.await?;
let failed_community_blocks = fetch_and_import(
data.blocked_communities.clone(),
&context,
|(blocked, context)| async move {
let community = blocked.dereference(&context).await?;
let form = CommunityBlockForm {
person_id,
community_id: community.id,
};
CommunityBlock::block(&mut context.pool(), &form).await?;
LemmyResult::Ok(())
},
)
.await?;
let failed_user_blocks = fetch_and_import(
data.blocked_users.clone(),
&context,
|(blocked, context)| async move {
let context = context.reset_request_count();
let target = blocked.dereference(&context).await?;
let form = PersonBlockForm {
person_id,
target_id: target.id,
};
PersonBlock::block(&mut context.pool(), &form).await?;
LemmyResult::Ok(())
},
)
.await?; .await?;
try_join_all(data.blocked_instances.iter().map(|domain| async { try_join_all(data.blocked_instances.iter().map(|domain| async {
// dont fetch unknown blocked objects from home server
let instance = Instance::read_or_create(&mut context.pool(), domain.clone()).await?; let instance = Instance::read_or_create(&mut context.pool(), domain.clone()).await?;
let form = InstanceBlockForm { let form = InstanceBlockForm {
person_id, person_id,
@ -312,12 +266,48 @@ pub async fn import_settings(
})) }))
.await?; .await?;
info!("Settings import completed for {}, the following items failed: {failed_followed_communities}, {failed_saved_posts}, {failed_saved_comments}, {failed_community_blocks}, {failed_user_blocks}",
local_user_view.person.name);
Ok(()) Ok(())
}); });
Ok(Json(Default::default())) Ok(Json(Default::default()))
} }
async fn fetch_and_import<Kind, Fut>(
objects: Vec<ObjectId<Kind>>,
context: &Data<LemmyContext>,
import_fn: impl FnMut((ObjectId<Kind>, Data<LemmyContext>)) -> Fut,
) -> LemmyResult<String>
where
Kind: Object + Send + 'static,
for<'de2> <Kind as Object>::Kind: Deserialize<'de2>,
Fut: Future<Output = LemmyResult<()>>,
{
let mut failed_items = vec![];
futures::stream::iter(
objects
.clone()
.into_iter()
// need to reset outgoing request count to avoid running into limit
.map(|s| (s, context.reset_request_count()))
.map(import_fn),
)
.buffer_unordered(PARALLELISM)
.collect::<Vec<_>>()
.await
.into_iter()
.enumerate()
.for_each(|(i, r): (usize, LemmyResult<()>)| {
if r.is_err() {
if let Some(object) = objects.get(i) {
failed_items.push(object.inner().clone());
}
}
});
Ok(failed_items.into_iter().join(","))
}
#[cfg(test)] #[cfg(test)]
#[allow(clippy::indexing_slicing)] #[allow(clippy::indexing_slicing)]
mod tests { mod tests {

View file

@ -29,7 +29,9 @@ pub(crate) mod mentions;
pub mod objects; pub mod objects;
pub mod protocol; pub mod protocol;
pub const FEDERATION_HTTP_FETCH_LIMIT: u32 = 50; /// Maximum number of outgoing HTTP requests to fetch a single object. Needs to be high enough
/// to fetch a new community with posts, moderators and featured posts.
pub const FEDERATION_HTTP_FETCH_LIMIT: u32 = 100;
/// Only include a basic context to save space and bandwidth. The main context is hosted statically /// Only include a basic context to save space and bandwidth. The main context is hosted statically
/// on join-lemmy.org. Include activitystreams explicitly for better compat, but this could /// on join-lemmy.org. Include activitystreams explicitly for better compat, but this could