From 63ea99d38ade254bd428b5d031be0feec123315f Mon Sep 17 00:00:00 2001 From: Nutomic Date: Tue, 19 Nov 2024 15:48:44 +0100 Subject: [PATCH] Guess image mime type from file extension (fixes #5196) (#5212) * Guess image mime type from file extension (fixes #5196) * Mime check fixes. (#5213) * Mime check fixes. * Adding back comment. --------- Co-authored-by: Dessalines --- Cargo.lock | 19 ++++++++++++++++++- crates/api_common/Cargo.toml | 1 + crates/api_common/src/request.rs | 20 ++++++++++++++------ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 54857b3a0..b62cf624c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "accept-language" @@ -2515,6 +2515,7 @@ dependencies = [ "lemmy_db_views_moderator", "lemmy_utils", "mime", + "mime_guess", "moka", "pretty_assertions", "regex", @@ -3147,6 +3148,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -5300,6 +5311,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "unicase" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" + [[package]] name = "unicode-bidi" version = "0.3.15" diff --git a/crates/api_common/Cargo.toml b/crates/api_common/Cargo.toml index 7b987cbb7..76f531ddb 100644 --- a/crates/api_common/Cargo.toml +++ b/crates/api_common/Cargo.toml @@ -64,6 +64,7 @@ actix-web = { workspace = true, optional = true } enum-map = { workspace = true } urlencoding = { workspace = true } mime = { version = "0.3.17", optional = true } +mime_guess = "2.0.5" webpage = { version = "2.0", default-features = false, features = [ "serde", ], optional = true } diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 36010f760..cc506b896 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -23,7 +23,6 @@ use lemmy_utils::{ REQWEST_TIMEOUT, VERSION, }; -use mime::Mime; use reqwest::{ header::{CONTENT_TYPE, RANGE}, Client, @@ -64,11 +63,20 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu .await? .error_for_status()?; - let content_type: Option = response - .headers() - .get(CONTENT_TYPE) - .and_then(|h| h.to_str().ok()) - .and_then(|h| h.parse().ok()); + // In some cases servers send a wrong mime type for images, which prevents thumbnail + // generation. To avoid this we also try to guess the mime type from file extension. + let content_type = mime_guess::from_path(url.path()) + .first() + // If you can guess that its an image type, then return that first. + .filter(|guess| guess.type_() == mime::IMAGE) + // Otherwise, get the content type from the headers + .or( + response + .headers() + .get(CONTENT_TYPE) + .and_then(|h| h.to_str().ok()) + .and_then(|h| h.parse().ok()), + ); let opengraph_data = { // if the content type is not text/html, we don't need to parse it