Some cleanup.

2025-01-03 16:51:35 +00:00 · 2024-12-03 23:08:31 -05:00 · 2024-12-03 23:08:31 -05:00 · c5e4430c38
commit c5e4430c38
parent e33e72729c
1 changed files with 33 additions and 21 deletions
--- a/crates/api_common/src/request.rs
+++ b/crates/api_common/src/request.rs
@ -73,34 +73,46 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
    // then try to infer the content_type from the file extension.
    .or(mime_guess::from_path(url.path()).first());

-  let opengraph_data = 'ograph: {
-    if let Some(c) = &content_type {
-      // application/xhtml+xml is a subset of HTML
-      let application_xhtml: Mime = "application/xhtml+xml".parse()?;
-      if c.essence_str() == TEXT_HTML.essence_str()
-        || c.essence_str() == application_xhtml.essence_str()
-      {
-        // Can't use .text() here, because it only checks the content header, not the actual bytes
-        // https://github.com/LemmyNet/lemmy/issues/1964
-        // So we want to do deep inspection of the actually returned bytes but need to be careful
-        // not spend too much time parsing binary data as HTML
+  let opengraph_data = {
+    let is_html = content_type
+      .as_ref()
+      .map(|c| {
+        // application/xhtml+xml is a subset of HTML
+        let application_xhtml: Mime = "application/xhtml+xml".parse::<Mime>().unwrap_or(TEXT_HTML);
+        let allowed_mime_types = [TEXT_HTML.essence_str(), application_xhtml.essence_str()];
+        allowed_mime_types.contains(&c.essence_str())
+      })
+      .unwrap_or_default();

-        // only take first bytes regardless of how many bytes the server returns
-        let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
-        break 'ograph extract_opengraph_data(&html_bytes, url)
-          .map_err(|e| info!("{e}"))
-          .unwrap_or_default();
-      }
-      // If a server is serving `application/octet-stream`, it's likely a mistake,
-      // so we try to guess the file type from its magic number.
-      else if c.subtype() == "octet-stream" {
+    if is_html {
+      // Can't use .text() here, because it only checks the content header, not the actual bytes
+      // https://github.com/LemmyNet/lemmy/issues/1964
+      // So we want to do deep inspection of the actually returned bytes but need to be careful
+      // not spend too much time parsing binary data as HTML
+
+      // only take first bytes regardless of how many bytes the server returns
+      let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
+      extract_opengraph_data(&html_bytes, url)
+        .map_err(|e| info!("{e}"))
+        .unwrap_or_default()
+    } else {
+      let is_octet_type = content_type
+        .as_ref()
+        .map(|c| c.subtype() == "octet-stream")
+        .unwrap_or_default();
+
+      // Overwrite the content type if its an octet type
+      if is_octet_type {
        // Don't need to fetch as much data for this as we do with opengraph
        let octet_bytes = collect_bytes_until_limit(response, 512).await?;
+        // content_type = infer::get(&octet_bytes).or(&content_type, |t|
+        // t.mime_type().parse().ok());
        content_type =
          infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
      }
+
+      Default::default()
    }
-    Default::default()
  };

  Ok(LinkMetadata {