mirror of
https://github.com/LemmyNet/lemmy.git
synced 2025-01-03 16:51:35 +00:00
Some cleanup.
This commit is contained in:
parent
e33e72729c
commit
c5e4430c38
1 changed files with 33 additions and 21 deletions
|
@ -73,34 +73,46 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
|
|||
// then try to infer the content_type from the file extension.
|
||||
.or(mime_guess::from_path(url.path()).first());
|
||||
|
||||
let opengraph_data = 'ograph: {
|
||||
if let Some(c) = &content_type {
|
||||
// application/xhtml+xml is a subset of HTML
|
||||
let application_xhtml: Mime = "application/xhtml+xml".parse()?;
|
||||
if c.essence_str() == TEXT_HTML.essence_str()
|
||||
|| c.essence_str() == application_xhtml.essence_str()
|
||||
{
|
||||
// Can't use .text() here, because it only checks the content header, not the actual bytes
|
||||
// https://github.com/LemmyNet/lemmy/issues/1964
|
||||
// So we want to do deep inspection of the actually returned bytes but need to be careful
|
||||
// not spend too much time parsing binary data as HTML
|
||||
let opengraph_data = {
|
||||
let is_html = content_type
|
||||
.as_ref()
|
||||
.map(|c| {
|
||||
// application/xhtml+xml is a subset of HTML
|
||||
let application_xhtml: Mime = "application/xhtml+xml".parse::<Mime>().unwrap_or(TEXT_HTML);
|
||||
let allowed_mime_types = [TEXT_HTML.essence_str(), application_xhtml.essence_str()];
|
||||
allowed_mime_types.contains(&c.essence_str())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// only take first bytes regardless of how many bytes the server returns
|
||||
let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
|
||||
break 'ograph extract_opengraph_data(&html_bytes, url)
|
||||
.map_err(|e| info!("{e}"))
|
||||
.unwrap_or_default();
|
||||
}
|
||||
// If a server is serving `application/octet-stream`, it's likely a mistake,
|
||||
// so we try to guess the file type from its magic number.
|
||||
else if c.subtype() == "octet-stream" {
|
||||
if is_html {
|
||||
// Can't use .text() here, because it only checks the content header, not the actual bytes
|
||||
// https://github.com/LemmyNet/lemmy/issues/1964
|
||||
// So we want to do deep inspection of the actually returned bytes but need to be careful
|
||||
// not spend too much time parsing binary data as HTML
|
||||
|
||||
// only take first bytes regardless of how many bytes the server returns
|
||||
let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
|
||||
extract_opengraph_data(&html_bytes, url)
|
||||
.map_err(|e| info!("{e}"))
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
let is_octet_type = content_type
|
||||
.as_ref()
|
||||
.map(|c| c.subtype() == "octet-stream")
|
||||
.unwrap_or_default();
|
||||
|
||||
// Overwrite the content type if its an octet type
|
||||
if is_octet_type {
|
||||
// Don't need to fetch as much data for this as we do with opengraph
|
||||
let octet_bytes = collect_bytes_until_limit(response, 512).await?;
|
||||
// content_type = infer::get(&octet_bytes).or(&content_type, |t|
|
||||
// t.mime_type().parse().ok());
|
||||
content_type =
|
||||
infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
|
||||
}
|
||||
|
||||
Default::default()
|
||||
}
|
||||
Default::default()
|
||||
};
|
||||
|
||||
Ok(LinkMetadata {
|
||||
|
|
Loading…
Reference in a new issue