Untangle thumbnail generation logic (ref #4604) (#4615)

* Untangle thumbnail generation logic (ref #4604)

* prettier

* test cleanup

* fix tests

* also consider opengraph image for local thumbnail generation
This commit is contained in:
Nutomic 2024-04-17 16:36:45 +02:00 committed by GitHub
parent b0370ae2fd
commit 31829b6c05
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 65 additions and 120 deletions

View file

@ -27,9 +27,10 @@ import {
setupLogins, setupLogins,
waitForPost, waitForPost,
unfollows, unfollows,
editPostThumbnail,
getPost, getPost,
waitUntil, waitUntil,
randomString,
createPostWithThumbnail,
} from "./shared"; } from "./shared";
const downloadFileSync = require("download-file-sync"); const downloadFileSync = require("download-file-sync");
@ -269,10 +270,11 @@ test("Make regular post, and give it a custom thumbnail", async () => {
// Use wikipedia since it has an opengraph image // Use wikipedia since it has an opengraph image
const wikipediaUrl = "https://wikipedia.org/"; const wikipediaUrl = "https://wikipedia.org/";
let post = await createPost( let post = await createPostWithThumbnail(
alphaImage, alphaImage,
community.community_view.community.id, community.community_view.community.id,
wikipediaUrl, wikipediaUrl,
upload1.url!,
); );
// Wait for the metadata to get fetched, since this is backgrounded now // Wait for the metadata to get fetched, since this is backgrounded now
@ -281,17 +283,7 @@ test("Make regular post, and give it a custom thumbnail", async () => {
p => p.post_view.post.thumbnail_url != undefined, p => p.post_view.post.thumbnail_url != undefined,
); );
expect(post.post_view.post.url).toBe(wikipediaUrl); expect(post.post_view.post.url).toBe(wikipediaUrl);
expect(post.post_view.post.thumbnail_url).toBeDefined(); // Make sure it uses custom thumbnail
// Edit the thumbnail
await editPostThumbnail(alphaImage, post.post_view.post, upload1.url!);
post = await waitUntil(
() => getPost(alphaImage, post.post_view.post.id),
p => p.post_view.post.thumbnail_url == upload1.url,
);
// Make sure the thumbnail got edited.
expect(post.post_view.post.thumbnail_url).toBe(upload1.url); expect(post.post_view.post.thumbnail_url).toBe(upload1.url);
}); });
@ -308,23 +300,17 @@ test("Create an image post, and make sure a custom thumbnail doesn't overwrite i
const community = await createCommunity(alphaImage); const community = await createCommunity(alphaImage);
let post = await createPost( let post = await createPostWithThumbnail(
alphaImage, alphaImage,
community.community_view.community.id, community.community_view.community.id,
upload1.url, upload1.url!,
upload2.url!,
); );
expect(post.post_view.post.url).toBe(upload1.url);
// Edit the post
await editPostThumbnail(alphaImage, post.post_view.post, upload2.url!);
// Wait for the metadata to get fetched
post = await waitUntil( post = await waitUntil(
() => getPost(alphaImage, post.post_view.post.id), () => getPost(alphaImage, post.post_view.post.id),
p => p.post_view.post.thumbnail_url == upload1.url, p => p.post_view.post.thumbnail_url != undefined,
); );
// Make sure the new custom thumbnail is ignored, and doesn't overwrite the image post
expect(post.post_view.post.url).toBe(upload1.url); expect(post.post_view.post.url).toBe(upload1.url);
expect(post.post_view.post.thumbnail_url).toBe(upload1.url); // Make sure the custom thumbnail is ignored
expect(post.post_view.post.thumbnail_url == upload2.url).toBe(false);
}); });

View file

@ -203,6 +203,7 @@ export async function createPost(
// use example.com for consistent title and embed description // use example.com for consistent title and embed description
name: string = randomString(5), name: string = randomString(5),
alt_text = randomString(10), alt_text = randomString(10),
custom_thumbnail: string | undefined = undefined,
): Promise<PostResponse> { ): Promise<PostResponse> {
let form: CreatePost = { let form: CreatePost = {
name, name,
@ -210,6 +211,7 @@ export async function createPost(
body, body,
alt_text, alt_text,
community_id, community_id,
custom_thumbnail,
}; };
return api.createPost(form); return api.createPost(form);
} }
@ -226,16 +228,19 @@ export async function editPost(
return api.editPost(form); return api.editPost(form);
} }
export async function editPostThumbnail( export async function createPostWithThumbnail(
api: LemmyHttp, api: LemmyHttp,
post: Post, community_id: number,
customThumbnail: string, url: string,
custom_thumbnail: string,
): Promise<PostResponse> { ): Promise<PostResponse> {
let form: EditPost = { let form: CreatePost = {
post_id: post.id, name: randomString(10),
custom_thumbnail: customThumbnail, url,
community_id,
custom_thumbnail,
}; };
return api.editPost(form); return api.createPost(form);
} }
export async function deletePost( export async function deletePost(

View file

@ -11,7 +11,7 @@ pub async fn get_link_metadata(
data: Query<GetSiteMetadata>, data: Query<GetSiteMetadata>,
context: Data<LemmyContext>, context: Data<LemmyContext>,
) -> LemmyResult<Json<GetSiteMetadataResponse>> { ) -> LemmyResult<Json<GetSiteMetadataResponse>> {
let metadata = fetch_link_metadata(&data.url, false, &context).await?; let metadata = fetch_link_metadata(&data.url, &context).await?;
Ok(Json(GetSiteMetadataResponse { metadata })) Ok(Json(GetSiteMetadataResponse { metadata }))
} }

View file

@ -270,8 +270,6 @@ pub struct LinkMetadata {
#[serde(flatten)] #[serde(flatten)]
pub opengraph_data: OpenGraphData, pub opengraph_data: OpenGraphData,
pub content_type: Option<String>, pub content_type: Option<String>,
#[serde(skip)]
pub thumbnail: Option<DbUrl>,
} }
#[skip_serializing_none] #[skip_serializing_none]

View file

@ -42,11 +42,7 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder {
/// Fetches metadata for the given link and optionally generates thumbnail. /// Fetches metadata for the given link and optionally generates thumbnail.
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata( pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResult<LinkMetadata> {
url: &Url,
generate_thumbnail: bool,
context: &LemmyContext,
) -> LemmyResult<LinkMetadata> {
info!("Fetching site metadata for url: {}", url); info!("Fetching site metadata for url: {}", url);
let response = context.client().get(url.as_str()).send().await?; let response = context.client().get(url.as_str()).send().await?;
@ -63,71 +59,61 @@ pub async fn fetch_link_metadata(
let opengraph_data = extract_opengraph_data(&html_bytes, url) let opengraph_data = extract_opengraph_data(&html_bytes, url)
.map_err(|e| info!("{e}")) .map_err(|e| info!("{e}"))
.unwrap_or_default(); .unwrap_or_default();
let thumbnail =
extract_thumbnail_from_opengraph_data(url, &opengraph_data, generate_thumbnail, context).await;
Ok(LinkMetadata { Ok(LinkMetadata {
opengraph_data, opengraph_data,
content_type: content_type.map(|c| c.to_string()), content_type: content_type.map(|c| c.to_string()),
thumbnail,
}) })
} }
#[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata_opt(
url: Option<&Url>,
generate_thumbnail: bool,
context: &LemmyContext,
) -> LinkMetadata {
match &url {
Some(url) => fetch_link_metadata(url, generate_thumbnail, context)
.await
.unwrap_or_default(),
_ => Default::default(),
}
}
/// Generate post thumbnail in background task, because some sites can be very slow to respond. /// Generate post thumbnail in background task, because some sites can be very slow to respond.
/// ///
/// Takes a callback to generate a send activity task, so that post can be federated with metadata. /// Takes a callback to generate a send activity task, so that post can be federated with metadata.
///
/// TODO: `federated_thumbnail` param can be removed once we federate full metadata and can
/// write it to db directly, without calling this function.
/// https://github.com/LemmyNet/lemmy/issues/4598
pub fn generate_post_link_metadata( pub fn generate_post_link_metadata(
post: Post, post: Post,
custom_thumbnail: Option<Url>, custom_thumbnail: Option<Url>,
federated_thumbnail: Option<Url>,
send_activity: impl FnOnce(Post) -> Option<SendActivityData> + Send + 'static, send_activity: impl FnOnce(Post) -> Option<SendActivityData> + Send + 'static,
local_site: Option<LocalSite>, local_site: Option<LocalSite>,
context: Data<LemmyContext>, context: Data<LemmyContext>,
) { ) {
spawn_try_task(async move { spawn_try_task(async move {
// Decide if the thumbnail should be generated let metadata = match &post.url {
let allow_sensitive = local_site_opt_to_sensitive(&local_site); Some(url) => fetch_link_metadata(url, &context).await.unwrap_or_default(),
let page_is_sensitive = post.nsfw; _ => Default::default(),
let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive;
let do_generate_thumbnail =
allow_generate_thumbnail && custom_thumbnail.is_none() && post.thumbnail_url.is_none();
// Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it.
let metadata = fetch_link_metadata_opt(
post.url.as_ref().map(DbUrl::inner),
do_generate_thumbnail,
&context,
)
.await;
// If its an image post, it needs to overwrite the thumbnail, and take precedence
let image_url = if metadata
.content_type
.as_ref()
.is_some_and(|content_type| content_type.starts_with("image"))
{
post.url.map(Into::into)
} else {
None
}; };
// Build the thumbnail url based on either the post image url, custom thumbnail, metadata fetch, or existing thumbnail. let is_image_post = metadata
let thumbnail_url = image_url .content_type
.or(custom_thumbnail) .as_ref()
.or(metadata.thumbnail.map(Into::into)) .is_some_and(|content_type| content_type.starts_with("image"));
.or(post.thumbnail_url.map(Into::into));
// Decide if we are allowed to generate local thumbnail
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let allow_generate_thumbnail = allow_sensitive || !post.nsfw;
// Use custom thumbnail if available and its not an image post
let thumbnail_url = if !is_image_post && custom_thumbnail.is_some() {
custom_thumbnail
}
// Use federated thumbnail if available
else if federated_thumbnail.is_some() {
federated_thumbnail
}
// Generate local thumbnail if allowed
else if allow_generate_thumbnail {
match post.url.or(metadata.opengraph_data.image) {
Some(url) => generate_pictrs_thumbnail(&url, &context).await.ok(),
None => None,
}
}
// Otherwise use opengraph preview image directly
else {
metadata.opengraph_data.image.map(Into::into)
};
// Proxy the image fetch if necessary // Proxy the image fetch if necessary
let proxied_thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, &context).await?; let proxied_thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, &context).await?;
@ -213,28 +199,6 @@ fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> LemmyResult<OpenGraph
}) })
} }
#[tracing::instrument(skip_all)]
pub async fn extract_thumbnail_from_opengraph_data(
url: &Url,
opengraph_data: &OpenGraphData,
generate_thumbnail: bool,
context: &LemmyContext,
) -> Option<DbUrl> {
if generate_thumbnail {
let image_url = opengraph_data
.image
.as_ref()
.map(DbUrl::inner)
.unwrap_or(url);
generate_pictrs_thumbnail(image_url, context)
.await
.ok()
.map(Into::into)
} else {
opengraph_data.image.clone()
}
}
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
struct PictrsResponse { struct PictrsResponse {
files: Vec<PictrsFile>, files: Vec<PictrsFile>,
@ -414,9 +378,7 @@ mod tests {
async fn test_link_metadata() { async fn test_link_metadata() {
let context = LemmyContext::init_test_context().await; let context = LemmyContext::init_test_context().await;
let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ").unwrap(); let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ").unwrap();
let sample_res = fetch_link_metadata(&sample_url, false, &context) let sample_res = fetch_link_metadata(&sample_url, &context).await.unwrap();
.await
.unwrap();
assert_eq!( assert_eq!(
Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()), Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
sample_res.opengraph_data.title sample_res.opengraph_data.title
@ -438,17 +400,8 @@ mod tests {
Some(mime::TEXT_HTML_UTF_8.to_string()), Some(mime::TEXT_HTML_UTF_8.to_string()),
sample_res.content_type sample_res.content_type
); );
assert!(sample_res.thumbnail.is_some());
} }
// #[test]
// fn test_pictshare() {
// let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg");
// assert!(res.is_ok());
// let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu");
// assert!(res_other.is_err());
// }
#[test] #[test]
fn test_resolve_image_url() { fn test_resolve_image_url() {
// url that lists the opengraph fields // url that lists the opengraph fields

View file

@ -159,6 +159,7 @@ pub async fn create_post(
generate_post_link_metadata( generate_post_link_metadata(
updated_post.clone(), updated_post.clone(),
custom_thumbnail, custom_thumbnail,
None,
|post| Some(SendActivityData::CreatePost(post)), |post| Some(SendActivityData::CreatePost(post)),
Some(local_site), Some(local_site),
context.reset_request_count(), context.reset_request_count(),

View file

@ -118,6 +118,7 @@ pub async fn update_post(
generate_post_link_metadata( generate_post_link_metadata(
updated_post.clone(), updated_post.clone(),
custom_thumbnail, custom_thumbnail,
None,
|post| Some(SendActivityData::UpdatePost(post)), |post| Some(SendActivityData::UpdatePost(post)),
Some(local_site), Some(local_site),
context.reset_request_count(), context.reset_request_count(),

View file

@ -280,6 +280,7 @@ impl Object for ApubPost {
generate_post_link_metadata( generate_post_link_metadata(
post.clone(), post.clone(),
None,
page.image.map(|i| i.url), page.image.map(|i| i.url),
|_| None, |_| None,
local_site, local_site,