mirror of
https://github.com/LemmyNet/lemmy.git
synced 2025-01-08 19:21:41 +00:00
Don't wrap response in an option
This commit is contained in:
parent
648f97492a
commit
1945011034
1 changed files with 34 additions and 55 deletions
|
@ -18,7 +18,7 @@ use lemmy_db_schema::{
|
|||
},
|
||||
};
|
||||
use lemmy_utils::{
|
||||
error::{FederationError, LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult},
|
||||
error::{LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult},
|
||||
settings::structs::{PictrsImageMode, Settings},
|
||||
REQWEST_TIMEOUT,
|
||||
VERSION,
|
||||
|
@ -54,71 +54,50 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
|
|||
// We only fetch the first 64kB of data in order to not waste bandwidth especially for large
|
||||
// binary files
|
||||
let bytes_to_fetch = 64 * 1024;
|
||||
let mut response = Some(
|
||||
context
|
||||
.client()
|
||||
.get(url.as_str())
|
||||
// we only need the first chunk of data. Note that we do not check for Accept-Range so the
|
||||
// server may ignore this and still respond with the full response
|
||||
.header(RANGE, format!("bytes=0-{}", bytes_to_fetch - 1)) /* -1 because inclusive */
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?,
|
||||
);
|
||||
let response = context
|
||||
.client()
|
||||
.get(url.as_str())
|
||||
// we only need the first chunk of data. Note that we do not check for Accept-Range so the
|
||||
// server may ignore this and still respond with the full response
|
||||
.header(RANGE, format!("bytes=0-{}", bytes_to_fetch - 1)) /* -1 because inclusive */
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?;
|
||||
|
||||
let content_type: Option<Mime> = {
|
||||
let mut mime = response.as_ref().and_then(|m| {
|
||||
m.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.and_then(|h| h.parse().ok())
|
||||
});
|
||||
let mut content_type: Option<Mime> = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.and_then(|h| h.parse().ok());
|
||||
|
||||
// If a server is serving `application/octet-stream`, it's likely a mistake,
|
||||
// so we try to guess the file type from its magic number.
|
||||
if mime
|
||||
.as_ref()
|
||||
.is_some_and(|m: &Mime| m.subtype() == "octet-stream")
|
||||
let mut opengraph_data = Default::default();
|
||||
|
||||
if let Some(c) = &content_type {
|
||||
if (c.type_() == mime::TEXT && c.subtype() == mime::HTML)
|
||||
||
|
||||
// application/xhtml+xml is a subset of HTML
|
||||
(c.type_() == mime::APPLICATION && c.subtype() == "xhtml")
|
||||
{
|
||||
// Don't need to fetch as much data for this as we do with opengraph
|
||||
let octet_bytes =
|
||||
collect_bytes_until_limit(response.take().ok_or(FederationError::Unreachable)?, 512)
|
||||
.await?;
|
||||
mime = infer::get(&octet_bytes).map_or(mime, |t| t.mime_type().parse().ok());
|
||||
}
|
||||
mime
|
||||
};
|
||||
|
||||
let opengraph_data = {
|
||||
// if the content type is not text/html, we don't need to parse it
|
||||
let is_html = content_type
|
||||
.as_ref()
|
||||
.map(|c| {
|
||||
(c.type_() == mime::TEXT && c.subtype() == mime::HTML)
|
||||
||
|
||||
// application/xhtml+xml is a subset of HTML
|
||||
(c.type_() == mime::APPLICATION && c.subtype() == "xhtml")
|
||||
})
|
||||
.unwrap_or(false);
|
||||
if !is_html || response.is_none() {
|
||||
Default::default()
|
||||
} else {
|
||||
// Can't use .text() here, because it only checks the content header, not the actual bytes
|
||||
// https://github.com/LemmyNet/lemmy/issues/1964
|
||||
// So we want to do deep inspection of the actually returned bytes but need to be careful not
|
||||
// spend too much time parsing binary data as HTML
|
||||
|
||||
// only take first bytes regardless of how many bytes the server returns
|
||||
let html_bytes = collect_bytes_until_limit(
|
||||
response.take().ok_or(FederationError::Unreachable)?,
|
||||
bytes_to_fetch,
|
||||
)
|
||||
.await?;
|
||||
extract_opengraph_data(&html_bytes, url)
|
||||
let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
|
||||
opengraph_data = extract_opengraph_data(&html_bytes, url)
|
||||
.map_err(|e| info!("{e}"))
|
||||
.unwrap_or_default()
|
||||
.unwrap_or_default();
|
||||
}
|
||||
};
|
||||
// If a server is serving `application/octet-stream`, it's likely a mistake,
|
||||
// so we try to guess the file type from its magic number.
|
||||
else if c.subtype() == "octet-stream" {
|
||||
// Don't need to fetch as much data for this as we do with opengraph
|
||||
let octet_bytes = collect_bytes_until_limit(response, 512).await?;
|
||||
content_type = infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(LinkMetadata {
|
||||
opengraph_data,
|
||||
content_type: content_type.map(|c| c.to_string()),
|
||||
|
|
Loading…
Reference in a new issue