From 31829b6c051ca172fcfd7da843172e33e521a8fc Mon Sep 17 00:00:00 2001 From: Nutomic Date: Wed, 17 Apr 2024 16:36:45 +0200 Subject: [PATCH] Untangle thumbnail generation logic (ref #4604) (#4615) * Untangle thumbnail generation logic (ref #4604) * prettier * test cleanup * fix tests * also consider opengraph image for local thumbnail generation --- api_tests/src/image.spec.ts | 36 ++----- api_tests/src/shared.ts | 19 ++-- crates/api/src/post/get_link_metadata.rs | 2 +- crates/api_common/src/post.rs | 2 - crates/api_common/src/request.rs | 123 +++++++---------------- crates/api_crud/src/post/create.rs | 1 + crates/api_crud/src/post/update.rs | 1 + crates/apub/src/objects/post.rs | 1 + 8 files changed, 65 insertions(+), 120 deletions(-) diff --git a/api_tests/src/image.spec.ts b/api_tests/src/image.spec.ts index cf6692960..a1b3c3f3e 100644 --- a/api_tests/src/image.spec.ts +++ b/api_tests/src/image.spec.ts @@ -27,9 +27,10 @@ import { setupLogins, waitForPost, unfollows, - editPostThumbnail, getPost, waitUntil, + randomString, + createPostWithThumbnail, } from "./shared"; const downloadFileSync = require("download-file-sync"); @@ -269,10 +270,11 @@ test("Make regular post, and give it a custom thumbnail", async () => { // Use wikipedia since it has an opengraph image const wikipediaUrl = "https://wikipedia.org/"; - let post = await createPost( + let post = await createPostWithThumbnail( alphaImage, community.community_view.community.id, wikipediaUrl, + upload1.url!, ); // Wait for the metadata to get fetched, since this is backgrounded now @@ -281,17 +283,7 @@ test("Make regular post, and give it a custom thumbnail", async () => { p => p.post_view.post.thumbnail_url != undefined, ); expect(post.post_view.post.url).toBe(wikipediaUrl); - expect(post.post_view.post.thumbnail_url).toBeDefined(); - - // Edit the thumbnail - await editPostThumbnail(alphaImage, post.post_view.post, upload1.url!); - - post = await waitUntil( - () => getPost(alphaImage, post.post_view.post.id), - p => p.post_view.post.thumbnail_url == upload1.url, - ); - - // Make sure the thumbnail got edited. + // Make sure it uses custom thumbnail expect(post.post_view.post.thumbnail_url).toBe(upload1.url); }); @@ -308,23 +300,17 @@ test("Create an image post, and make sure a custom thumbnail doesn't overwrite i const community = await createCommunity(alphaImage); - let post = await createPost( + let post = await createPostWithThumbnail( alphaImage, community.community_view.community.id, - upload1.url, + upload1.url!, + upload2.url!, ); - expect(post.post_view.post.url).toBe(upload1.url); - - // Edit the post - await editPostThumbnail(alphaImage, post.post_view.post, upload2.url!); - - // Wait for the metadata to get fetched post = await waitUntil( () => getPost(alphaImage, post.post_view.post.id), - p => p.post_view.post.thumbnail_url == upload1.url, + p => p.post_view.post.thumbnail_url != undefined, ); - - // Make sure the new custom thumbnail is ignored, and doesn't overwrite the image post expect(post.post_view.post.url).toBe(upload1.url); - expect(post.post_view.post.thumbnail_url).toBe(upload1.url); + // Make sure the custom thumbnail is ignored + expect(post.post_view.post.thumbnail_url == upload2.url).toBe(false); }); diff --git a/api_tests/src/shared.ts b/api_tests/src/shared.ts index c6f39f787..1a8a9afaf 100644 --- a/api_tests/src/shared.ts +++ b/api_tests/src/shared.ts @@ -203,6 +203,7 @@ export async function createPost( // use example.com for consistent title and embed description name: string = randomString(5), alt_text = randomString(10), + custom_thumbnail: string | undefined = undefined, ): Promise { let form: CreatePost = { name, @@ -210,6 +211,7 @@ export async function createPost( body, alt_text, community_id, + custom_thumbnail, }; return api.createPost(form); } @@ -226,16 +228,19 @@ export async function editPost( return api.editPost(form); } -export async function editPostThumbnail( +export async function createPostWithThumbnail( api: LemmyHttp, - post: Post, - customThumbnail: string, + community_id: number, + url: string, + custom_thumbnail: string, ): Promise { - let form: EditPost = { - post_id: post.id, - custom_thumbnail: customThumbnail, + let form: CreatePost = { + name: randomString(10), + url, + community_id, + custom_thumbnail, }; - return api.editPost(form); + return api.createPost(form); } export async function deletePost( diff --git a/crates/api/src/post/get_link_metadata.rs b/crates/api/src/post/get_link_metadata.rs index 8bc425125..17346790a 100644 --- a/crates/api/src/post/get_link_metadata.rs +++ b/crates/api/src/post/get_link_metadata.rs @@ -11,7 +11,7 @@ pub async fn get_link_metadata( data: Query, context: Data, ) -> LemmyResult> { - let metadata = fetch_link_metadata(&data.url, false, &context).await?; + let metadata = fetch_link_metadata(&data.url, &context).await?; Ok(Json(GetSiteMetadataResponse { metadata })) } diff --git a/crates/api_common/src/post.rs b/crates/api_common/src/post.rs index 4993fc6e5..49327dac1 100644 --- a/crates/api_common/src/post.rs +++ b/crates/api_common/src/post.rs @@ -270,8 +270,6 @@ pub struct LinkMetadata { #[serde(flatten)] pub opengraph_data: OpenGraphData, pub content_type: Option, - #[serde(skip)] - pub thumbnail: Option, } #[skip_serializing_none] diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 912fbc67e..ddb2a4551 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -42,11 +42,7 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder { /// Fetches metadata for the given link and optionally generates thumbnail. #[tracing::instrument(skip_all)] -pub async fn fetch_link_metadata( - url: &Url, - generate_thumbnail: bool, - context: &LemmyContext, -) -> LemmyResult { +pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResult { info!("Fetching site metadata for url: {}", url); let response = context.client().get(url.as_str()).send().await?; @@ -63,71 +59,61 @@ pub async fn fetch_link_metadata( let opengraph_data = extract_opengraph_data(&html_bytes, url) .map_err(|e| info!("{e}")) .unwrap_or_default(); - let thumbnail = - extract_thumbnail_from_opengraph_data(url, &opengraph_data, generate_thumbnail, context).await; - Ok(LinkMetadata { opengraph_data, content_type: content_type.map(|c| c.to_string()), - thumbnail, }) } -#[tracing::instrument(skip_all)] -pub async fn fetch_link_metadata_opt( - url: Option<&Url>, - generate_thumbnail: bool, - context: &LemmyContext, -) -> LinkMetadata { - match &url { - Some(url) => fetch_link_metadata(url, generate_thumbnail, context) - .await - .unwrap_or_default(), - _ => Default::default(), - } -} /// Generate post thumbnail in background task, because some sites can be very slow to respond. /// /// Takes a callback to generate a send activity task, so that post can be federated with metadata. +/// +/// TODO: `federated_thumbnail` param can be removed once we federate full metadata and can +/// write it to db directly, without calling this function. +/// https://github.com/LemmyNet/lemmy/issues/4598 pub fn generate_post_link_metadata( post: Post, custom_thumbnail: Option, + federated_thumbnail: Option, send_activity: impl FnOnce(Post) -> Option + Send + 'static, local_site: Option, context: Data, ) { spawn_try_task(async move { - // Decide if the thumbnail should be generated - let allow_sensitive = local_site_opt_to_sensitive(&local_site); - let page_is_sensitive = post.nsfw; - let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive; - let do_generate_thumbnail = - allow_generate_thumbnail && custom_thumbnail.is_none() && post.thumbnail_url.is_none(); - - // Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it. - let metadata = fetch_link_metadata_opt( - post.url.as_ref().map(DbUrl::inner), - do_generate_thumbnail, - &context, - ) - .await; - - // If its an image post, it needs to overwrite the thumbnail, and take precedence - let image_url = if metadata - .content_type - .as_ref() - .is_some_and(|content_type| content_type.starts_with("image")) - { - post.url.map(Into::into) - } else { - None + let metadata = match &post.url { + Some(url) => fetch_link_metadata(url, &context).await.unwrap_or_default(), + _ => Default::default(), }; - // Build the thumbnail url based on either the post image url, custom thumbnail, metadata fetch, or existing thumbnail. - let thumbnail_url = image_url - .or(custom_thumbnail) - .or(metadata.thumbnail.map(Into::into)) - .or(post.thumbnail_url.map(Into::into)); + let is_image_post = metadata + .content_type + .as_ref() + .is_some_and(|content_type| content_type.starts_with("image")); + + // Decide if we are allowed to generate local thumbnail + let allow_sensitive = local_site_opt_to_sensitive(&local_site); + let allow_generate_thumbnail = allow_sensitive || !post.nsfw; + + // Use custom thumbnail if available and its not an image post + let thumbnail_url = if !is_image_post && custom_thumbnail.is_some() { + custom_thumbnail + } + // Use federated thumbnail if available + else if federated_thumbnail.is_some() { + federated_thumbnail + } + // Generate local thumbnail if allowed + else if allow_generate_thumbnail { + match post.url.or(metadata.opengraph_data.image) { + Some(url) => generate_pictrs_thumbnail(&url, &context).await.ok(), + None => None, + } + } + // Otherwise use opengraph preview image directly + else { + metadata.opengraph_data.image.map(Into::into) + }; // Proxy the image fetch if necessary let proxied_thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, &context).await?; @@ -213,28 +199,6 @@ fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> LemmyResult Option { - if generate_thumbnail { - let image_url = opengraph_data - .image - .as_ref() - .map(DbUrl::inner) - .unwrap_or(url); - generate_pictrs_thumbnail(image_url, context) - .await - .ok() - .map(Into::into) - } else { - opengraph_data.image.clone() - } -} - #[derive(Deserialize, Debug)] struct PictrsResponse { files: Vec, @@ -414,9 +378,7 @@ mod tests { async fn test_link_metadata() { let context = LemmyContext::init_test_context().await; let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ").unwrap(); - let sample_res = fetch_link_metadata(&sample_url, false, &context) - .await - .unwrap(); + let sample_res = fetch_link_metadata(&sample_url, &context).await.unwrap(); assert_eq!( Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()), sample_res.opengraph_data.title @@ -438,17 +400,8 @@ mod tests { Some(mime::TEXT_HTML_UTF_8.to_string()), sample_res.content_type ); - assert!(sample_res.thumbnail.is_some()); } - // #[test] - // fn test_pictshare() { - // let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg"); - // assert!(res.is_ok()); - // let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu"); - // assert!(res_other.is_err()); - // } - #[test] fn test_resolve_image_url() { // url that lists the opengraph fields diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index 266f964fa..fcd274c03 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -159,6 +159,7 @@ pub async fn create_post( generate_post_link_metadata( updated_post.clone(), custom_thumbnail, + None, |post| Some(SendActivityData::CreatePost(post)), Some(local_site), context.reset_request_count(), diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index 8be97f3c1..74e0c0d47 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -118,6 +118,7 @@ pub async fn update_post( generate_post_link_metadata( updated_post.clone(), custom_thumbnail, + None, |post| Some(SendActivityData::UpdatePost(post)), Some(local_site), context.reset_request_count(), diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index c7fa4acb1..ff11c985c 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -280,6 +280,7 @@ impl Object for ApubPost { generate_post_link_metadata( post.clone(), + None, page.image.map(|i| i.url), |_| None, local_site,