diff --git a/crates/utils/src/utils/markdown/image_links.rs b/crates/utils/src/utils/markdown/image_links.rs index d22860f67..c6e90685e 100644 --- a/crates/utils/src/utils/markdown/image_links.rs +++ b/crates/utils/src/utils/markdown/image_links.rs @@ -1,6 +1,15 @@ -use super::{link_rule::Link, MARKDOWN_PARSER}; -use crate::settings::SETTINGS; -use markdown_it::{plugins::cmark::inline::image::Image, NodeValue}; +use super::link_rule::Link; +use crate::{settings::SETTINGS, utils::markdown::link_rule}; +use markdown_it::{ + parser::linkfmt::LinkFormatter, + plugins::cmark::{ + block::fence, + inline::{image, image::Image}, + }, + MarkdownIt, + NodeValue, +}; +use std::sync::LazyLock; use url::Url; use urlencoding::encode; @@ -55,7 +64,18 @@ pub fn markdown_find_links(src: &str) -> Vec<(usize, usize)> { // Walk the syntax tree to find positions of image or link urls fn find_urls(src: &str) -> Vec<(usize, usize)> { - let ast = MARKDOWN_PARSER.parse(src); + // Use separate markdown parser here, with most features disabled for faster parsing, + // and a dummy link formatter which doesnt normalize links. + static PARSER: LazyLock = LazyLock::new(|| { + let mut p = MarkdownIt::new(); + p.link_formatter = Box::new(NoopLinkFormatter {}); + image::add(&mut p); + fence::add(&mut p); + link_rule::add(&mut p); + p + }); + + let ast = PARSER.parse(src); let mut links_offsets = vec![]; ast.walk(|node, _depth| { if let Some(image) = node.cast::() { @@ -94,6 +114,25 @@ impl UrlAndTitle for Link { } } +/// markdown-it normalizes links by default, which breaks the link rewriting. So we use a dummy +/// formatter here which does nothing. Note this isnt actually used to render the markdown. +#[derive(Debug)] +struct NoopLinkFormatter; + +impl LinkFormatter for NoopLinkFormatter { + fn validate_link(&self, _url: &str) -> Option<()> { + Some(()) + } + + fn normalize_link(&self, url: &str) -> String { + url.to_owned() + } + + fn normalize_link_text(&self, url: &str) -> String { + url.to_owned() + } +} + #[cfg(test)] mod tests { @@ -107,6 +146,12 @@ mod tests { let links = find_urls::("![test](https://example.com)"); assert_eq!(vec![(8, 27)], links); + + let links = find_urls::("![ითხოვს](https://example.com/ითხოვს)"); + assert_eq!(vec![(22, 60)], links); + + let links = find_urls::("![test](https://example.com/%C3%A4%C3%B6%C3%BC.jpg)"); + assert_eq!(vec![(8, 50)], links); } #[test] @@ -152,7 +197,12 @@ mod tests { "custom emoji support", r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, r#"![party-blob](https://lemmy-alpha/api/v4/image/proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# - ) + ), + ( + "image with special chars", + "ითხოვს ![ითხოვს](http://example.com/ითხოვს%C3%A4.jpg)", + "ითხოვს ![ითხოვს](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2F%E1%83%98%E1%83%97%E1%83%AE%E1%83%9D%E1%83%95%E1%83%A1%25C3%25A4.jpg)", + ), ]; tests.iter().for_each(|&(msg, input, expected)| { diff --git a/crates/utils/src/utils/markdown/mod.rs b/crates/utils/src/utils/markdown/mod.rs index 3c761143c..2c955c9d6 100644 --- a/crates/utils/src/utils/markdown/mod.rs +++ b/crates/utils/src/utils/markdown/mod.rs @@ -49,7 +49,6 @@ mod tests { use super::*; use crate::utils::validation::check_urls_are_valid; - use image_links::markdown_rewrite_image_links; use pretty_assertions::assert_eq; use regex::escape; @@ -148,63 +147,6 @@ mod tests { }); } - #[test] - fn test_markdown_proxy_images() { - let tests: Vec<_> = - vec![ - ( - "remote image proxied", - "![link](http://example.com/image.jpg)", - "![link](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", - ), - ( - "local image unproxied", - "![link](http://lemmy-alpha/image.jpg)", - "![link](http://lemmy-alpha/image.jpg)", - ), - ( - "multiple image links", - "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", - "![link](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", - ), - ( - "empty link handled", - "![image]()", - "![image]()" - ), - ( - "empty label handled", - "![](http://example.com/image.jpg)", - "![](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" - ), - ( - "invalid image link removed", - "![image](http-not-a-link)", - "![image]()" - ), - ( - "label with nested markdown handled", - "![a *b* c](http://example.com/image.jpg)", - "![a *b* c](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" - ), - ( - "custom emoji support", - r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, - r#"![party-blob](https://lemmy-alpha/api/v4/image/proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# - ) - ]; - - tests.iter().for_each(|&(msg, input, expected)| { - let result = markdown_rewrite_image_links(input.to_string()); - - assert_eq!( - result.0, expected, - "Testing {}, with original input '{}'", - msg, input - ); - }); - } - // This replicates the logic when saving url blocklist patterns and querying them. // Refer to lemmy_api_crud::site::update::update_site and // lemmy_api_common::utils::get_url_blocklist().