From 6d39a97cb046c67093cd378608b4f126d8fe96fc Mon Sep 17 00:00:00 2001 From: flamingos-cant Date: Sun, 1 Dec 2024 06:15:03 +0000 Subject: [PATCH] Clean-up + guess mime type from extension if server is unresponsive --- Cargo.lock | 19 ++++++++++++++++++- crates/api_common/Cargo.toml | 1 + crates/api_common/src/request.rs | 15 ++++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e0a19f810..431c0c17c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2535,6 +2535,7 @@ dependencies = [ "lemmy_db_views_moderator", "lemmy_utils", "mime", + "mime_guess", "moka", "pretty_assertions", "regex", @@ -2897,7 +2898,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -3166,6 +3167,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -5319,6 +5330,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "unicase" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" + [[package]] name = "unicode-bidi" version = "0.3.15" diff --git a/crates/api_common/Cargo.toml b/crates/api_common/Cargo.toml index 5c41e4634..74a0390ca 100644 --- a/crates/api_common/Cargo.toml +++ b/crates/api_common/Cargo.toml @@ -65,6 +65,7 @@ actix-web = { workspace = true, optional = true } enum-map = { workspace = true } urlencoding = { workspace = true } mime = { version = "0.3.17", optional = true } +mime_guess = "2.0.5" infer = "0.16.0" webpage = { version = "2.0", default-features = false, features = [ "serde", diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index bdcf5f464..1ac8e685f 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -23,7 +23,7 @@ use lemmy_utils::{ REQWEST_TIMEOUT, VERSION, }; -use mime::Mime; +use mime::{Mime, TEXT_HTML}; use reqwest::{ header::{CONTENT_TYPE, RANGE}, Client, @@ -73,10 +73,10 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu let mut opengraph_data = Default::default(); if let Some(c) = &content_type { - if (c.type_() == mime::TEXT && c.subtype() == mime::HTML) - || - // application/xhtml+xml is a subset of HTML - (c.type_() == mime::APPLICATION && c.subtype() == "xhtml") + // application/xhtml+xml is a subset of HTML + let application_xhtml: Mime = "application/xhtml+xml".parse()?; + if c.essence_str() == TEXT_HTML.essence_str() + || c.essence_str() == application_xhtml.essence_str() { // Can't use .text() here, because it only checks the content header, not the actual bytes // https://github.com/LemmyNet/lemmy/issues/1964 @@ -97,6 +97,11 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu content_type = infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok()); } } + // If we don't get a content_type from the response (e.g. if the server is down), + // then try to infer the content_type from the file extension. + else { + content_type = mime_guess::from_path(url.path()).first(); + } Ok(LinkMetadata { opengraph_data,