From c5e4430c389541158efcd009fca59c8a2b2830d8 Mon Sep 17 00:00:00 2001
From: Dessalines <tyhou13@gmx.com>
Date: Tue, 3 Dec 2024 23:08:31 -0500
Subject: [PATCH] Some cleanup.

---
 crates/api_common/src/request.rs | 54 +++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 21 deletions(-)
diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs
index df8bc5bc4..9b651a871 100644
--- a/crates/api_common/src/request.rs
+++ b/crates/api_common/src/request.rs
@@ -73,34 +73,46 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
     // then try to infer the content_type from the file extension.
     .or(mime_guess::from_path(url.path()).first());
 
-  let opengraph_data = 'ograph: {
-    if let Some(c) = &content_type {
-      // application/xhtml+xml is a subset of HTML
-      let application_xhtml: Mime = "application/xhtml+xml".parse()?;
-      if c.essence_str() == TEXT_HTML.essence_str()
-        || c.essence_str() == application_xhtml.essence_str()
-      {
-        // Can't use .text() here, because it only checks the content header, not the actual bytes
-        // https://github.com/LemmyNet/lemmy/issues/1964
-        // So we want to do deep inspection of the actually returned bytes but need to be careful
-        // not spend too much time parsing binary data as HTML
+  let opengraph_data = {
+    let is_html = content_type
+      .as_ref()
+      .map(|c| {
+        // application/xhtml+xml is a subset of HTML
+        let application_xhtml: Mime = "application/xhtml+xml".parse::<Mime>().unwrap_or(TEXT_HTML);
+        let allowed_mime_types = [TEXT_HTML.essence_str(), application_xhtml.essence_str()];
+        allowed_mime_types.contains(&c.essence_str())
+      })
+      .unwrap_or_default();
 
-        // only take first bytes regardless of how many bytes the server returns
-        let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
-        break 'ograph extract_opengraph_data(&html_bytes, url)
-          .map_err(|e| info!("{e}"))
-          .unwrap_or_default();
-      }
-      // If a server is serving `application/octet-stream`, it's likely a mistake,
-      // so we try to guess the file type from its magic number.
-      else if c.subtype() == "octet-stream" {
+    if is_html {
+      // Can't use .text() here, because it only checks the content header, not the actual bytes
+      // https://github.com/LemmyNet/lemmy/issues/1964
+      // So we want to do deep inspection of the actually returned bytes but need to be careful
+      // not spend too much time parsing binary data as HTML
+
+      // only take first bytes regardless of how many bytes the server returns
+      let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
+      extract_opengraph_data(&html_bytes, url)
+        .map_err(|e| info!("{e}"))
+        .unwrap_or_default()
+    } else {
+      let is_octet_type = content_type
+        .as_ref()
+        .map(|c| c.subtype() == "octet-stream")
+        .unwrap_or_default();
+
+      // Overwrite the content type if its an octet type
+      if is_octet_type {
         // Don't need to fetch as much data for this as we do with opengraph
         let octet_bytes = collect_bytes_until_limit(response, 512).await?;
+        // content_type = infer::get(&octet_bytes).or(&content_type, |t|
+        // t.mime_type().parse().ok());
         content_type =
           infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
       }
+
+      Default::default()
     }
-    Default::default()
   };
 
   Ok(LinkMetadata {