Some cleanup.

2025-01-03 08:41:34 +00:00 · 2024-12-03 23:08:31 -05:00 · 2024-12-03 23:08:31 -05:00 · c5e4430c38
commit c5e4430c38
parent e33e72729c
1 changed files with 33 additions and 21 deletions
--- a/crates/api_common/src/request.rs
+++ b/crates/api_common/src/request.rs
@ -73,34 +73,46 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
    // then try to infer the content_type from the file extension.
    .or(mime_guess::from_path(url.path()).first());
-  let opengraph_data = 'ograph: {
+  let opengraph_data = {
-    if let Some(c) = &content_type {
+    let is_html = content_type
-      // application/xhtml+xml is a subset of HTML
+      .as_ref()
-      let application_xhtml: Mime = "application/xhtml+xml".parse()?;
+      .map(|c| {
-      if c.essence_str() == TEXT_HTML.essence_str()
+        // application/xhtml+xml is a subset of HTML
-        || c.essence_str() == application_xhtml.essence_str()
+        let application_xhtml: Mime = "application/xhtml+xml".parse::<Mime>().unwrap_or(TEXT_HTML);
-      {
+        let allowed_mime_types = [TEXT_HTML.essence_str(), application_xhtml.essence_str()];
-        // Can't use .text() here, because it only checks the content header, not the actual bytes
+        allowed_mime_types.contains(&c.essence_str())
-        // https://github.com/LemmyNet/lemmy/issues/1964
+      })
-        // So we want to do deep inspection of the actually returned bytes but need to be careful
+      .unwrap_or_default();
        // not spend too much time parsing binary data as HTML
-        // only take first bytes regardless of how many bytes the server returns
+    if is_html {
-        let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
+      // Can't use .text() here, because it only checks the content header, not the actual bytes
-        break 'ograph extract_opengraph_data(&html_bytes, url)
+      // https://github.com/LemmyNet/lemmy/issues/1964
-          .map_err(|e| info!("{e}"))
+      // So we want to do deep inspection of the actually returned bytes but need to be careful
-          .unwrap_or_default();
+      // not spend too much time parsing binary data as HTML
-      }
+
-      // If a server is serving `application/octet-stream`, it's likely a mistake,
+      // only take first bytes regardless of how many bytes the server returns
-      // so we try to guess the file type from its magic number.
+      let html_bytes = collect_bytes_until_limit(response, bytes_to_fetch).await?;
-      else if c.subtype() == "octet-stream" {
+      extract_opengraph_data(&html_bytes, url)
        .map_err(|e| info!("{e}"))
        .unwrap_or_default()
    } else {
      let is_octet_type = content_type
        .as_ref()
        .map(|c| c.subtype() == "octet-stream")
        .unwrap_or_default();
      // Overwrite the content type if its an octet type
      if is_octet_type {
        // Don't need to fetch as much data for this as we do with opengraph
        let octet_bytes = collect_bytes_until_limit(response, 512).await?;
        // content_type = infer::get(&octet_bytes).or(&content_type, |t|
        // t.mime_type().parse().ok());
        content_type =
          infer::get(&octet_bytes).map_or(content_type, |t| t.mime_type().parse().ok());
      }
      Default::default()
    }
    Default::default()
  };
  Ok(LinkMetadata {