Remove invalid XML characters from RSS feeds (#4416)
* Remove all characters that are disallowed by XML * Combine contiguous unicode ranges into one range
This commit is contained in:
parent
a09027c4c0
commit
328d48ef7e
1 changed files with 23 additions and 8 deletions
|
@ -92,6 +92,23 @@ static RSS_NAMESPACE: Lazy<BTreeMap<String, String>> = Lazy::new(|| {
|
||||||
h
|
h
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/// Removes any characters disallowed by the XML grammar.
|
||||||
|
/// See https://www.w3.org/TR/xml/#NT-Char for details.
|
||||||
|
fn sanitize_xml(input: String) -> String {
|
||||||
|
input
|
||||||
|
.chars()
|
||||||
|
.filter(|&c| {
|
||||||
|
matches!(c,
|
||||||
|
'\u{09}'
|
||||||
|
| '\u{0A}'
|
||||||
|
| '\u{0D}'
|
||||||
|
| '\u{20}'..='\u{D7FF}'
|
||||||
|
| '\u{E000}'..='\u{FFFD}'
|
||||||
|
| '\u{10000}'..='\u{10FFFF}')
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
#[tracing::instrument(skip_all)]
|
||||||
async fn get_all_feed(
|
async fn get_all_feed(
|
||||||
info: web::Query<Params>,
|
info: web::Query<Params>,
|
||||||
|
@ -256,10 +273,9 @@ async fn get_feed_user(
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let items = create_post_items(posts, &context.settings().get_protocol_and_hostname())?;
|
let items = create_post_items(posts, &context.settings().get_protocol_and_hostname())?;
|
||||||
|
|
||||||
let channel = Channel {
|
let channel = Channel {
|
||||||
namespaces: RSS_NAMESPACE.clone(),
|
namespaces: RSS_NAMESPACE.clone(),
|
||||||
title: format!("{} - {}", site_view.site.name, person.name),
|
title: format!("{} - {}", sanitize_xml(site_view.site.name), person.name),
|
||||||
link: person.actor_id.to_string(),
|
link: person.actor_id.to_string(),
|
||||||
items,
|
items,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
|
@ -298,7 +314,7 @@ async fn get_feed_community(
|
||||||
|
|
||||||
let mut channel = Channel {
|
let mut channel = Channel {
|
||||||
namespaces: RSS_NAMESPACE.clone(),
|
namespaces: RSS_NAMESPACE.clone(),
|
||||||
title: format!("{} - {}", site_view.site.name, community.name),
|
title: format!("{} - {}", sanitize_xml(site_view.site.name), community.name),
|
||||||
link: community.actor_id.to_string(),
|
link: community.actor_id.to_string(),
|
||||||
items,
|
items,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
|
@ -337,10 +353,9 @@ async fn get_feed_front(
|
||||||
|
|
||||||
let protocol_and_hostname = context.settings().get_protocol_and_hostname();
|
let protocol_and_hostname = context.settings().get_protocol_and_hostname();
|
||||||
let items = create_post_items(posts, &protocol_and_hostname)?;
|
let items = create_post_items(posts, &protocol_and_hostname)?;
|
||||||
|
|
||||||
let mut channel = Channel {
|
let mut channel = Channel {
|
||||||
namespaces: RSS_NAMESPACE.clone(),
|
namespaces: RSS_NAMESPACE.clone(),
|
||||||
title: format!("{} - Subscribed", site_view.site.name),
|
title: format!("{} - Subscribed", sanitize_xml(site_view.site.name)),
|
||||||
link: protocol_and_hostname,
|
link: protocol_and_hostname,
|
||||||
items,
|
items,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
|
@ -391,7 +406,7 @@ async fn get_feed_inbox(context: &LemmyContext, jwt: &str) -> Result<Channel, Le
|
||||||
|
|
||||||
let mut channel = Channel {
|
let mut channel = Channel {
|
||||||
namespaces: RSS_NAMESPACE.clone(),
|
namespaces: RSS_NAMESPACE.clone(),
|
||||||
title: format!("{} - Inbox", site_view.site.name),
|
title: format!("{} - Inbox", sanitize_xml(site_view.site.name)),
|
||||||
link: format!("{protocol_and_hostname}/inbox"),
|
link: format!("{protocol_and_hostname}/inbox"),
|
||||||
items,
|
items,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
|
@ -537,11 +552,11 @@ fn create_post_items(
|
||||||
}
|
}
|
||||||
|
|
||||||
let i = Item {
|
let i = Item {
|
||||||
title: Some(sanitize_html(&p.post.name)),
|
title: Some(sanitize_html(sanitize_xml(p.post.name).as_str())),
|
||||||
pub_date: Some(p.post.published.to_rfc2822()),
|
pub_date: Some(p.post.published.to_rfc2822()),
|
||||||
comments: Some(post_url.clone()),
|
comments: Some(post_url.clone()),
|
||||||
guid,
|
guid,
|
||||||
description: Some(description),
|
description: Some(sanitize_xml(description)),
|
||||||
dublin_core_ext,
|
dublin_core_ext,
|
||||||
link,
|
link,
|
||||||
extensions,
|
extensions,
|
||||||
|
|
Loading…
Reference in a new issue