Don't wrap response in an option

This commit is contained in:
flamingos-cant 2024-11-27 16:25:54 +00:00
parent 648f97492a
commit 1945011034

View file

@ -18,7 +18,7 @@ use lemmy_db_schema::{
}, },
}; };
use lemmy_utils::{ use lemmy_utils::{
error::{FederationError, LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult}, error::{LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult},
settings::structs::{PictrsImageMode, Settings}, settings::structs::{PictrsImageMode, Settings},
REQWEST_TIMEOUT, REQWEST_TIMEOUT,
VERSION, VERSION,
@ -54,8 +54,7 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
// We only fetch the first 64kB of data in order to not waste bandwidth especially for large // We only fetch the first 64kB of data in order to not waste bandwidth especially for large
// binary files // binary files
let bytes_to_fetch = 64 * 1024; let bytes_to_fetch = 64 * 1024;
let mut response = Some( let response = context
context
.client() .client()
.get(url.as_str()) .get(url.as_str())
// we only need the first chunk of data. Note that we do not check for Accept-Range so the // we only need the first chunk of data. Note that we do not check for Accept-Range so the
@ -63,62 +62,42 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
.header(RANGE, format!("bytes=0-{}", bytes_to_fetch - 1)) /* -1 because inclusive */ .header(RANGE, format!("bytes=0-{}", bytes_to_fetch - 1)) /* -1 because inclusive */
.send() .send()
.await? .await?
.error_for_status()?, .error_for_status()?;
);
let content_type: Option<Mime> = { let mut content_type: Option<Mime> = response
let mut mime = response.as_ref().and_then(|m| { .headers()
m.headers()
.get(CONTENT_TYPE) .get(CONTENT_TYPE)
.and_then(|h| h.to_str().ok()) .and_then(|h| h.to_str().ok())
.and_then(|h| h.parse().ok()) .and_then(|h| h.parse().ok());
});
// If a server is serving `application/octet-stream`, it's likely a mistake, let mut opengraph_data = Default::default();
// so we try to guess the file type from its magic number.
if mime
.as_ref()
.is_some_and(|m: &Mime| m.subtype() == "octet-stream")
{
// Don't need to fetch as much data for this as we do with opengraph
let octet_bytes =
collect_bytes_until_limit(response.take().ok_or(FederationError::Unreachable)?, 512)
.await?;
mime = infer::get(&octet_bytes).map_or(mime, |t| t.mime_type().parse().ok());
}
mime
};
let opengraph_data = { if let Some(c) = &content_type {
// if the content type is not text/html, we don't need to parse it if (c.type_() == mime::TEXT && c.subtype() == mime::HTML)
let is_html = content_type
.as_ref()
.map(|c| {
(c.type_() == mime::TEXT && c.subtype() == mime::HTML)
|| ||
// application/xhtml+xml is a subset of HTML // application/xhtml+xml is a subset of HTML
(c.type_() == mime::APPLICATION && c.subtype() == "xhtml") (c.type_() == mime::APPLICATION && c.subtype() == "xhtml")
}) {
.unwrap_or(false);
if !is_html || response.is_none() {
Default::default()
} else {
// Can't use .text() here, because it only checks the content header, not the actual bytes // Can't use .text() here, because it only checks the content header, not the actual bytes
// https://github.com/LemmyNet/lemmy/issues/1964 // https://github.com/LemmyNet/lemmy/issues/1964
// So we want to do deep inspection of the actually returned bytes but need to be careful not // So we want to do deep inspection of the actually returned bytes but need to be careful not
// spend too much time parsing binary data as HTML // spend too much time parsing binary data as HTML
// only take first bytes regardless of how many bytes the server returns // only take first bytes regardless of how many bytes the server returns
let html_bytes = collect_bytes_until_limit( let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
response.take().ok_or(FederationError::Unreachable)?, opengraph_data = extract_opengraph_data(&html_bytes, url)
bytes_to_fetch,
)
.await?;
extract_opengraph_data(&html_bytes, url)
.map_err(|e| info!("{e}")) .map_err(|e| info!("{e}"))
.unwrap_or_default() .unwrap_or_default();
} }
}; // If a server is serving `application/octet-stream`, it's likely a mistake,
// so we try to guess the file type from its magic number.
else if c.subtype() == "octet-stream" {
// Don't need to fetch as much data for this as we do with opengraph
let octet_bytes = collect_bytes_until_limit(response, 512).await?;
content_type = infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
}
}
Ok(LinkMetadata { Ok(LinkMetadata {
opengraph_data, opengraph_data,
content_type: content_type.map(|c| c.to_string()), content_type: content_type.map(|c| c.to_string()),