Fix crash while rewriting markdown (fixes #5380) (#5395)

* Special characters in markdown links (ref #5380)

* fix (fixes #5380)

* fix test
This commit is contained in:
Nutomic 2025-02-06 13:00:36 +00:00 committed by GitHub
parent 20fa90a23c
commit 865f0734ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 55 additions and 63 deletions

View file

@ -1,6 +1,15 @@
use super::{link_rule::Link, MARKDOWN_PARSER};
use crate::settings::SETTINGS;
use markdown_it::{plugins::cmark::inline::image::Image, NodeValue};
use super::link_rule::Link;
use crate::{settings::SETTINGS, utils::markdown::link_rule};
use markdown_it::{
parser::linkfmt::LinkFormatter,
plugins::cmark::{
block::fence,
inline::{image, image::Image},
},
MarkdownIt,
NodeValue,
};
use std::sync::LazyLock;
use url::Url;
use urlencoding::encode;
@ -55,7 +64,18 @@ pub fn markdown_find_links(src: &str) -> Vec<(usize, usize)> {
// Walk the syntax tree to find positions of image or link urls
fn find_urls<T: NodeValue + UrlAndTitle>(src: &str) -> Vec<(usize, usize)> {
let ast = MARKDOWN_PARSER.parse(src);
// Use separate markdown parser here, with most features disabled for faster parsing,
// and a dummy link formatter which doesnt normalize links.
static PARSER: LazyLock<MarkdownIt> = LazyLock::new(|| {
let mut p = MarkdownIt::new();
p.link_formatter = Box::new(NoopLinkFormatter {});
image::add(&mut p);
fence::add(&mut p);
link_rule::add(&mut p);
p
});
let ast = PARSER.parse(src);
let mut links_offsets = vec![];
ast.walk(|node, _depth| {
if let Some(image) = node.cast::<T>() {
@ -94,6 +114,25 @@ impl UrlAndTitle for Link {
}
}
/// markdown-it normalizes links by default, which breaks the link rewriting. So we use a dummy
/// formatter here which does nothing. Note this isnt actually used to render the markdown.
#[derive(Debug)]
struct NoopLinkFormatter;
impl LinkFormatter for NoopLinkFormatter {
fn validate_link(&self, _url: &str) -> Option<()> {
Some(())
}
fn normalize_link(&self, url: &str) -> String {
url.to_owned()
}
fn normalize_link_text(&self, url: &str) -> String {
url.to_owned()
}
}
#[cfg(test)]
mod tests {
@ -107,6 +146,12 @@ mod tests {
let links = find_urls::<Image>("![test](https://example.com)");
assert_eq!(vec![(8, 27)], links);
let links = find_urls::<Image>("![ითხოვს](https://example.com/ითხოვს)");
assert_eq!(vec![(22, 60)], links);
let links = find_urls::<Image>("![test](https://example.com/%C3%A4%C3%B6%C3%BC.jpg)");
assert_eq!(vec![(8, 50)], links);
}
#[test]
@ -152,7 +197,12 @@ mod tests {
"custom emoji support",
r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#,
r#"![party-blob](https://lemmy-alpha/api/v4/image/proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#
)
),
(
"image with special chars",
"ითხოვს ![ითხოვს](http://example.com/ითხოვს%C3%A4.jpg)",
"ითხოვს ![ითხოვს](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2F%E1%83%98%E1%83%97%E1%83%AE%E1%83%9D%E1%83%95%E1%83%A1%25C3%25A4.jpg)",
),
];
tests.iter().for_each(|&(msg, input, expected)| {

View file

@ -49,7 +49,6 @@ mod tests {
use super::*;
use crate::utils::validation::check_urls_are_valid;
use image_links::markdown_rewrite_image_links;
use pretty_assertions::assert_eq;
use regex::escape;
@ -148,63 +147,6 @@ mod tests {
});
}
#[test]
fn test_markdown_proxy_images() {
let tests: Vec<_> =
vec![
(
"remote image proxied",
"![link](http://example.com/image.jpg)",
"![link](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)",
),
(
"local image unproxied",
"![link](http://lemmy-alpha/image.jpg)",
"![link](http://lemmy-alpha/image.jpg)",
),
(
"multiple image links",
"![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)",
"![link](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)",
),
(
"empty link handled",
"![image]()",
"![image]()"
),
(
"empty label handled",
"![](http://example.com/image.jpg)",
"![](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"invalid image link removed",
"![image](http-not-a-link)",
"![image]()"
),
(
"label with nested markdown handled",
"![a *b* c](http://example.com/image.jpg)",
"![a *b* c](https://lemmy-alpha/api/v4/image/proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"custom emoji support",
r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#,
r#"![party-blob](https://lemmy-alpha/api/v4/image/proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#
)
];
tests.iter().for_each(|&(msg, input, expected)| {
let result = markdown_rewrite_image_links(input.to_string());
assert_eq!(
result.0, expected,
"Testing {}, with original input '{}'",
msg, input
);
});
}
// This replicates the logic when saving url blocklist patterns and querying them.
// Refer to lemmy_api_crud::site::update::update_site and
// lemmy_api_common::utils::get_url_blocklist().