Clean-up + guess mime type from extension if server is unresponsive

This commit is contained in:
flamingos-cant 2024-12-01 06:15:03 +00:00
parent 0bee91b64c
commit 6d39a97cb0
3 changed files with 29 additions and 6 deletions

19
Cargo.lock generated
View file

@ -2535,6 +2535,7 @@ dependencies = [
"lemmy_db_views_moderator", "lemmy_db_views_moderator",
"lemmy_utils", "lemmy_utils",
"mime", "mime",
"mime_guess",
"moka", "moka",
"pretty_assertions", "pretty_assertions",
"regex", "regex",
@ -2897,7 +2898,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"windows-targets 0.48.5", "windows-targets 0.52.6",
] ]
[[package]] [[package]]
@ -3166,6 +3167,16 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "mime_guess"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
dependencies = [
"mime",
"unicase",
]
[[package]] [[package]]
name = "minimal-lexical" name = "minimal-lexical"
version = "0.2.1" version = "0.2.1"
@ -5319,6 +5330,12 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "unicase"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df"
[[package]] [[package]]
name = "unicode-bidi" name = "unicode-bidi"
version = "0.3.15" version = "0.3.15"

View file

@ -65,6 +65,7 @@ actix-web = { workspace = true, optional = true }
enum-map = { workspace = true } enum-map = { workspace = true }
urlencoding = { workspace = true } urlencoding = { workspace = true }
mime = { version = "0.3.17", optional = true } mime = { version = "0.3.17", optional = true }
mime_guess = "2.0.5"
infer = "0.16.0" infer = "0.16.0"
webpage = { version = "2.0", default-features = false, features = [ webpage = { version = "2.0", default-features = false, features = [
"serde", "serde",

View file

@ -23,7 +23,7 @@ use lemmy_utils::{
REQWEST_TIMEOUT, REQWEST_TIMEOUT,
VERSION, VERSION,
}; };
use mime::Mime; use mime::{Mime, TEXT_HTML};
use reqwest::{ use reqwest::{
header::{CONTENT_TYPE, RANGE}, header::{CONTENT_TYPE, RANGE},
Client, Client,
@ -73,10 +73,10 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
let mut opengraph_data = Default::default(); let mut opengraph_data = Default::default();
if let Some(c) = &content_type { if let Some(c) = &content_type {
if (c.type_() == mime::TEXT && c.subtype() == mime::HTML)
||
// application/xhtml+xml is a subset of HTML // application/xhtml+xml is a subset of HTML
(c.type_() == mime::APPLICATION && c.subtype() == "xhtml") let application_xhtml: Mime = "application/xhtml+xml".parse()?;
if c.essence_str() == TEXT_HTML.essence_str()
|| c.essence_str() == application_xhtml.essence_str()
{ {
// Can't use .text() here, because it only checks the content header, not the actual bytes // Can't use .text() here, because it only checks the content header, not the actual bytes
// https://github.com/LemmyNet/lemmy/issues/1964 // https://github.com/LemmyNet/lemmy/issues/1964
@ -97,6 +97,11 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
content_type = infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok()); content_type = infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
} }
} }
// If we don't get a content_type from the response (e.g. if the server is down),
// then try to infer the content_type from the file extension.
else {
content_type = mime_guess::from_path(url.path()).first();
}
Ok(LinkMetadata { Ok(LinkMetadata {
opengraph_data, opengraph_data,