From 810762762fedcf4f20c571c263603aa7753234df Mon Sep 17 00:00:00 2001 From: "Lemmus.org" <137361180+lemmus-org@users.noreply.github.com> Date: Fri, 30 Jun 2023 03:42:42 -0700 Subject: [PATCH] Update federated posts to not cache sensitive images if not allow by local site (#3253) * Update federated posts to not cache sensitive images if not allow by local site * Refactor thumbnail match to simplify logic --- crates/api_common/src/request.rs | 4 ++++ crates/api_common/src/utils.rs | 7 +++++++ crates/api_crud/src/post/create.rs | 2 +- crates/api_crud/src/post/update.rs | 2 +- crates/apub/src/objects/post.rs | 25 ++++++++++++++++++++----- 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 9f7f9db591..64563bc0d6 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -193,6 +193,7 @@ pub async fn fetch_site_data( client: &ClientWithMiddleware, settings: &Settings, url: Option<&Url>, + include_image: bool, ) -> (Option, Option) { match &url { Some(url) => { @@ -200,6 +201,9 @@ pub async fn fetch_site_data( // Ignore errors, since it may be an image, or not have the data. // Warning, this may ignore SSL errors let metadata_option = fetch_site_metadata(client, url).await.ok(); + if !include_image { + return (metadata_option, None); + } let missing_pictrs_file = |r: PictrsResponse| r.files.first().expect("missing pictrs file").file.clone(); diff --git a/crates/api_common/src/utils.rs b/crates/api_common/src/utils.rs index e3e761c90f..f400cc9a84 100644 --- a/crates/api_common/src/utils.rs +++ b/crates/api_common/src/utils.rs @@ -428,6 +428,13 @@ pub fn local_site_opt_to_slur_regex(local_site: &Option) -> Option) -> bool { + local_site + .as_ref() + .map(|site| site.enable_nsfw) + .unwrap_or(false) +} + pub fn send_application_approved_email( user: &LocalUserView, settings: &Settings, diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index 8ff1b678ae..0bbbabcb04 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -79,7 +79,7 @@ impl PerformCrud for CreatePost { // Fetch post links and pictrs cached image let (metadata_res, thumbnail_url) = - fetch_site_data(context.client(), context.settings(), data_url).await; + fetch_site_data(context.client(), context.settings(), data_url, true).await; let (embed_title, embed_description, embed_video_url) = metadata_res .map(|u| (u.title, u.description, u.embed_video_url)) .unwrap_or_default(); diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index a540f454f9..253340834b 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -69,7 +69,7 @@ impl PerformCrud for EditPost { // Fetch post links and Pictrs cached image let data_url = data.url.as_ref(); let (metadata_res, thumbnail_url) = - fetch_site_data(context.client(), context.settings(), data_url).await; + fetch_site_data(context.client(), context.settings(), data_url, true).await; let (embed_title, embed_description, embed_video_url) = metadata_res .map(|u| (Some(u.title), Some(u.description), Some(u.embed_video_url))) .unwrap_or_default(); diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index 4ef9351ab4..7878fcf122 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -25,7 +25,7 @@ use html2md::parse_html; use lemmy_api_common::{ context::LemmyContext, request::fetch_site_data, - utils::{is_mod_or_admin, local_site_opt_to_slur_regex}, + utils::{is_mod_or_admin, local_site_opt_to_sensitive, local_site_opt_to_slur_regex}, }; use lemmy_db_schema::{ self, @@ -197,18 +197,33 @@ impl Object for ApubPost { } else { None }; + + let local_site = LocalSite::read(context.pool()).await.ok(); + let allow_sensitive = local_site_opt_to_sensitive(&local_site); + let page_is_sensitive = page.sensitive.unwrap_or(false); + let include_image = allow_sensitive || !page_is_sensitive; + // Only fetch metadata if the post has a url and was not seen previously. We dont want to // waste resources by fetching metadata for the same post multiple times. - let (metadata_res, thumbnail_url) = match &url { + // Additionally, only fetch image if content is not sensitive or is allowed on local site. + let (metadata_res, thumbnail) = match &url { Some(url) if old_post.is_err() => { - fetch_site_data(context.client(), context.settings(), Some(url)).await + fetch_site_data( + context.client(), + context.settings(), + Some(url), + include_image, + ) + .await } - _ => (None, page.image.map(|i| i.url.into())), + _ => (None, None), }; + // If no image was included with metadata, use post image instead when available. + let thumbnail_url = thumbnail.or_else(|| page.image.map(|i| i.url.into())); + let (embed_title, embed_description, embed_video_url) = metadata_res .map(|u| (u.title, u.description, u.embed_video_url)) .unwrap_or_default(); - let local_site = LocalSite::read(context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); let body_slurs_removed =