Don't wrap response in an option

2024-12-23 03:11:32 +00:00 · 2024-11-27 16:25:54 +00:00 · 2024-11-27 16:25:54 +00:00 · 1945011034
commit 1945011034
parent 648f97492a
1 changed files with 34 additions and 55 deletions
--- a/crates/api_common/src/request.rs
+++ b/crates/api_common/src/request.rs
@ -18,7 +18,7 @@ use lemmy_db_schema::{
  },
 };
 use lemmy_utils::{
-  error::{FederationError, LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult},
+  error::{LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult},
  settings::structs::{PictrsImageMode, Settings},
  REQWEST_TIMEOUT,
  VERSION,
@ -54,71 +54,50 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
  // We only fetch the first 64kB of data in order to not waste bandwidth especially for large
  // binary files
  let bytes_to_fetch = 64 * 1024;
-  let mut response = Some(
+  let response = context
-    context
+    .client()
-      .client()
+    .get(url.as_str())
-      .get(url.as_str())
+    // we only need the first chunk of data. Note that we do not check for Accept-Range so the
-      // we only need the first chunk of data. Note that we do not check for Accept-Range so the
+    // server may ignore this and still respond with the full response
-      // server may ignore this and still respond with the full response
+    .header(RANGE, format!("bytes=0-{}", bytes_to_fetch - 1)) /* -1 because inclusive */
-      .header(RANGE, format!("bytes=0-{}", bytes_to_fetch - 1)) /* -1 because inclusive */
+    .send()
-      .send()
+    .await?
-      .await?
+    .error_for_status()?;
      .error_for_status()?,
  );
-  let content_type: Option<Mime> = {
+  let mut content_type: Option<Mime> = response
-    let mut mime = response.as_ref().and_then(|m| {
+    .headers()
-      m.headers()
+    .get(CONTENT_TYPE)
-        .get(CONTENT_TYPE)
+    .and_then(|h| h.to_str().ok())
-        .and_then(|h| h.to_str().ok())
+    .and_then(|h| h.parse().ok());
        .and_then(|h| h.parse().ok())
    });
-    // If a server is serving `application/octet-stream`, it's likely a mistake,
+  let mut opengraph_data = Default::default();
-    // so we try to guess the file type from its magic number.
+
-    if mime
+  if let Some(c) = &content_type {
-      .as_ref()
+    if (c.type_() == mime::TEXT && c.subtype() == mime::HTML)
-      .is_some_and(|m: &Mime| m.subtype() == "octet-stream")
+	    ||
 	    // application/xhtml+xml is a subset of HTML
 	    (c.type_() == mime::APPLICATION && c.subtype() == "xhtml")
    {
      // Don't need to fetch as much data for this as we do with opengraph
      let octet_bytes =
        collect_bytes_until_limit(response.take().ok_or(FederationError::Unreachable)?, 512)
          .await?;
      mime = infer::get(&octet_bytes).map_or(mime, |t| t.mime_type().parse().ok());
    }
    mime
  };
  let opengraph_data = {
    // if the content type is not text/html, we don't need to parse it
    let is_html = content_type
      .as_ref()
      .map(|c| {
        (c.type_() == mime::TEXT && c.subtype() == mime::HTML)
 	  ||
 	// application/xhtml+xml is a subset of HTML
 	  (c.type_() == mime::APPLICATION && c.subtype() == "xhtml")
      })
      .unwrap_or(false);
    if !is_html || response.is_none() {
      Default::default()
    } else {
      // Can't use .text() here, because it only checks the content header, not the actual bytes
      // https://github.com/LemmyNet/lemmy/issues/1964
      // So we want to do deep inspection of the actually returned bytes but need to be careful not
      // spend too much time parsing binary data as HTML
      // only take first bytes regardless of how many bytes the server returns
-      let html_bytes = collect_bytes_until_limit(
+      let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
-        response.take().ok_or(FederationError::Unreachable)?,
+      opengraph_data = extract_opengraph_data(&html_bytes, url)
        bytes_to_fetch,
      )
      .await?;
      extract_opengraph_data(&html_bytes, url)
        .map_err(|e| info!("{e}"))
-        .unwrap_or_default()
+        .unwrap_or_default();
    }
-  };
+    // If a server is serving `application/octet-stream`, it's likely a mistake,
    // so we try to guess the file type from its magic number.
    else if c.subtype() == "octet-stream" {
      // Don't need to fetch as much data for this as we do with opengraph
      let octet_bytes = collect_bytes_until_limit(response, 512).await?;
      content_type = infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
    }
  }
  Ok(LinkMetadata {
    opengraph_data,
    content_type: content_type.map(|c| c.to_string()),