Some cleanup.

This commit is contained in:
Dessalines 2024-12-03 23:08:31 -05:00
parent e33e72729c
commit c5e4430c38

View file

@ -73,13 +73,18 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
// then try to infer the content_type from the file extension. // then try to infer the content_type from the file extension.
.or(mime_guess::from_path(url.path()).first()); .or(mime_guess::from_path(url.path()).first());
let opengraph_data = 'ograph: { let opengraph_data = {
if let Some(c) = &content_type { let is_html = content_type
.as_ref()
.map(|c| {
// application/xhtml+xml is a subset of HTML // application/xhtml+xml is a subset of HTML
let application_xhtml: Mime = "application/xhtml+xml".parse()?; let application_xhtml: Mime = "application/xhtml+xml".parse::<Mime>().unwrap_or(TEXT_HTML);
if c.essence_str() == TEXT_HTML.essence_str() let allowed_mime_types = [TEXT_HTML.essence_str(), application_xhtml.essence_str()];
|| c.essence_str() == application_xhtml.essence_str() allowed_mime_types.contains(&c.essence_str())
{ })
.unwrap_or_default();
if is_html {
// Can't use .text() here, because it only checks the content header, not the actual bytes // Can't use .text() here, because it only checks the content header, not the actual bytes
// https://github.com/LemmyNet/lemmy/issues/1964 // https://github.com/LemmyNet/lemmy/issues/1964
// So we want to do deep inspection of the actually returned bytes but need to be careful // So we want to do deep inspection of the actually returned bytes but need to be careful
@ -87,20 +92,27 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
// only take first bytes regardless of how many bytes the server returns // only take first bytes regardless of how many bytes the server returns
let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?; let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
break 'ograph extract_opengraph_data(&html_bytes, url) extract_opengraph_data(&html_bytes, url)
.map_err(|e| info!("{e}")) .map_err(|e| info!("{e}"))
.unwrap_or_default()
} else {
let is_octet_type = content_type
.as_ref()
.map(|c| c.subtype() == "octet-stream")
.unwrap_or_default(); .unwrap_or_default();
}
// If a server is serving `application/octet-stream`, it's likely a mistake, // Overwrite the content type if its an octet type
// so we try to guess the file type from its magic number. if is_octet_type {
else if c.subtype() == "octet-stream" {
// Don't need to fetch as much data for this as we do with opengraph // Don't need to fetch as much data for this as we do with opengraph
let octet_bytes = collect_bytes_until_limit(response, 512).await?; let octet_bytes = collect_bytes_until_limit(response, 512).await?;
// content_type = infer::get(&octet_bytes).or(&content_type, |t|
// t.mime_type().parse().ok());
content_type = content_type =
infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok()); infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
} }
}
Default::default() Default::default()
}
}; };
Ok(LinkMetadata { Ok(LinkMetadata {