From 556358f53e386dcea738271ce2d9ad154603197b Mon Sep 17 00:00:00 2001 From: Nutomic Date: Mon, 18 Nov 2024 15:58:31 +0100 Subject: [PATCH] Dont sanitize RSS content manually (fixes #5171) (#5174) --- Cargo.lock | 16 ++++++++-- crates/routes/Cargo.toml | 2 +- crates/routes/src/feeds.rs | 41 +++++++------------------- crates/utils/src/utils/markdown/mod.rs | 7 ++++- 4 files changed, 30 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index caef131eaa..833fe2d9e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3796,6 +3796,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.37.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f22f29bdff3987b4d8632ef95fd6424ec7e4e0a57e2f4fc63e489e75357f6a03" +dependencies = [ + "encoding_rs", + "memchr", +] + [[package]] name = "quinn" version = "0.11.5" @@ -4158,14 +4168,14 @@ dependencies = [ [[package]] name = "rss" -version = "2.0.9" +version = "2.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27e92048f840d98c6d6dd870af9101610ea9ff413f11f1bcebf4f4c31d96d957" +checksum = "554a62b3dd5450fcbb0435b3db809f9dd3c6e9f5726172408f7ad3b57ed59057" dependencies = [ "atom_syndication", "derive_builder", "never", - "quick-xml 0.36.1", + "quick-xml 0.37.1", ] [[package]] diff --git a/crates/routes/Cargo.toml b/crates/routes/Cargo.toml index 4a8c53dea2..91c3ed6830 100644 --- a/crates/routes/Cargo.toml +++ b/crates/routes/Cargo.toml @@ -33,4 +33,4 @@ url = { workspace = true } tracing = { workspace = true } tokio = { workspace = true } http.workspace = true -rss = "2.0.9" +rss = "2.0.10" diff --git a/crates/routes/src/feeds.rs b/crates/routes/src/feeds.rs index 00518032db..55e9cc7f3c 100644 --- a/crates/routes/src/feeds.rs +++ b/crates/routes/src/feeds.rs @@ -23,7 +23,7 @@ use lemmy_db_views_actor::{ use lemmy_utils::{ cache_header::cache_1hour, error::{LemmyError, LemmyErrorType, LemmyResult}, - utils::markdown::{markdown_to_html, sanitize_html}, + utils::markdown::markdown_to_html, }; use rss::{ extension::{dublincore::DublinCoreExtension, ExtensionBuilder, ExtensionMap}, @@ -93,23 +93,6 @@ static RSS_NAMESPACE: LazyLock> = LazyLock::new(|| { h }); -/// Removes any characters disallowed by the XML grammar. -/// See https://www.w3.org/TR/xml/#NT-Char for details. -fn sanitize_xml(input: String) -> String { - input - .chars() - .filter(|&c| { - matches!(c, - '\u{09}' - | '\u{0A}' - | '\u{0D}' - | '\u{20}'..='\u{D7FF}' - | '\u{E000}'..='\u{FFFD}' - | '\u{10000}'..='\u{10FFFF}') - }) - .collect() -} - #[tracing::instrument(skip_all)] async fn get_all_feed( info: web::Query, @@ -278,7 +261,7 @@ async fn get_feed_user( let items = create_post_items(posts, &context.settings().get_protocol_and_hostname())?; let channel = Channel { namespaces: RSS_NAMESPACE.clone(), - title: format!("{} - {}", sanitize_xml(site_view.site.name), person.name), + title: format!("{} - {}", site_view.site.name, person.name), link: person.actor_id.to_string(), items, ..Default::default() @@ -319,7 +302,7 @@ async fn get_feed_community( let mut channel = Channel { namespaces: RSS_NAMESPACE.clone(), - title: format!("{} - {}", sanitize_xml(site_view.site.name), community.name), + title: format!("{} - {}", site_view.site.name, community.name), link: community.actor_id.to_string(), items, ..Default::default() @@ -360,7 +343,7 @@ async fn get_feed_front( let items = create_post_items(posts, &protocol_and_hostname)?; let mut channel = Channel { namespaces: RSS_NAMESPACE.clone(), - title: format!("{} - Subscribed", sanitize_xml(site_view.site.name)), + title: format!("{} - Subscribed", site_view.site.name), link: protocol_and_hostname, items, ..Default::default() @@ -411,7 +394,7 @@ async fn get_feed_inbox(context: &LemmyContext, jwt: &str) -> LemmyResult, protocol_and_hostname: &str) -> Lemmy for p in posts { let post_url = format!("{}/post/{}", protocol_and_hostname, p.post.id); - let community_url = format!( - "{}/c/{}", - protocol_and_hostname, - sanitize_html(&p.community.name) - ); + let community_url = format!("{}/c/{}", protocol_and_hostname, &p.community.name); let dublin_core_ext = Some(DublinCoreExtension { creators: vec![p.creator.actor_id.to_string()], ..DublinCoreExtension::default() @@ -513,9 +492,9 @@ fn create_post_items(posts: Vec, protocol_and_hostname: &str) -> Lemmy }); let mut description = format!("submitted by {} to {}
{} points | {} comments", p.creator.actor_id, - sanitize_html(&p.creator.name), + &p.creator.name, community_url, - sanitize_html(&p.community.name), + &p.community.name, p.counts.score, post_url, p.counts.comments); @@ -566,11 +545,11 @@ fn create_post_items(posts: Vec, protocol_and_hostname: &str) -> Lemmy }; let i = Item { - title: Some(sanitize_html(sanitize_xml(p.post.name).as_str())), + title: Some(p.post.name), pub_date: Some(p.post.published.to_rfc2822()), comments: Some(post_url.clone()), guid, - description: Some(sanitize_xml(description)), + description: Some(description), dublin_core_ext, link: Some(post_url.clone()), extensions, diff --git a/crates/utils/src/utils/markdown/mod.rs b/crates/utils/src/utils/markdown/mod.rs index 9d34e8a698..3dfa8e9f19 100644 --- a/crates/utils/src/utils/markdown/mod.rs +++ b/crates/utils/src/utils/markdown/mod.rs @@ -259,6 +259,11 @@ mod tests { fn test_sanitize_html() { let sanitized = sanitize_html(" hello &\"'"); let expected = "<script>alert('xss');</script> hello &"'"; - assert_eq!(expected, sanitized) + assert_eq!(expected, sanitized); + + let sanitized = + sanitize_html("Polling the group: what do y'all know about the Orion browser from Kagi?"); + let expected = "Polling the group: what do y'all know about the Orion browser from Kagi?"; + assert_eq!(expected, sanitized); } }