Sanitize HTML from federated data (#ref 647)

This commit is contained in:
Felix Ableitner 2020-09-21 17:02:46 +02:00
parent 9f4493a0b2
commit 0da2fd3d93
7 changed files with 221 additions and 19 deletions

197
Cargo.lock generated
View file

@ -389,6 +389,21 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "ammonia"
version = "3.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89eac85170f4b3fb3dc5e442c1cfb036cb8eecf9dbbd431a161ffad15d90ea3b"
dependencies = [
"html5ever",
"lazy_static",
"maplit",
"markup5ever_rcdom",
"matches",
"tendril",
"url",
]
[[package]] [[package]]
name = "ansi_term" name = "ansi_term"
version = "0.11.0" version = "0.11.0"
@ -1325,6 +1340,16 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
[[package]]
name = "futf"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.5" version = "0.3.5"
@ -1539,6 +1564,20 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "html5ever"
version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "http" name = "http"
version = "0.2.1" version = "0.2.1"
@ -1835,6 +1874,7 @@ dependencies = [
"actix-rt", "actix-rt",
"actix-web", "actix-web",
"actix-web-actors", "actix-web-actors",
"ammonia",
"anyhow", "anyhow",
"async-trait", "async-trait",
"awc", "awc",
@ -2026,12 +2066,47 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d947cbb889ed21c2a84be6ffbaebf5b4e0f4340638cba0444907e38b56be084" checksum = "7d947cbb889ed21c2a84be6ffbaebf5b4e0f4340638cba0444907e38b56be084"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "maplit" name = "maplit"
version = "1.0.2" version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "markup5ever"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
dependencies = [
"log",
"phf",
"phf_codegen",
"serde 1.0.116",
"serde_derive",
"serde_json",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "markup5ever_rcdom"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f015da43bcd8d4f144559a3423f4591d69b8ce0652c905374da7205df336ae2b"
dependencies = [
"html5ever",
"markup5ever",
"tendril",
"xml5ever",
]
[[package]] [[package]]
name = "match_cfg" name = "match_cfg"
version = "0.1.0" version = "0.1.0"
@ -2183,6 +2258,12 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]] [[package]]
name = "nom" name = "nom"
version = "4.2.3" version = "4.2.3"
@ -2417,6 +2498,44 @@ dependencies = [
"sha-1 0.8.2", "sha-1 0.8.2",
] ]
[[package]]
name = "phf"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
dependencies = [
"phf_shared",
"rand 0.7.3",
]
[[package]]
name = "phf_shared"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
dependencies = [
"siphasher",
]
[[package]] [[package]]
name = "pin-project" name = "pin-project"
version = "0.4.23" version = "0.4.23"
@ -2482,6 +2601,12 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]] [[package]]
name = "proc-macro-hack" name = "proc-macro-hack"
version = "0.5.18" version = "0.5.18"
@ -2576,7 +2701,7 @@ dependencies = [
"rand_isaac", "rand_isaac",
"rand_jitter", "rand_jitter",
"rand_os", "rand_os",
"rand_pcg", "rand_pcg 0.1.2",
"rand_xorshift", "rand_xorshift",
"winapi 0.3.9", "winapi 0.3.9",
] ]
@ -2592,6 +2717,7 @@ dependencies = [
"rand_chacha 0.2.2", "rand_chacha 0.2.2",
"rand_core 0.5.1", "rand_core 0.5.1",
"rand_hc 0.2.0", "rand_hc 0.2.0",
"rand_pcg 0.2.1",
] ]
[[package]] [[package]]
@ -2700,6 +2826,15 @@ dependencies = [
"rand_core 0.4.2", "rand_core 0.4.2",
] ]
[[package]]
name = "rand_pcg"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
dependencies = [
"rand_core 0.5.1",
]
[[package]] [[package]]
name = "rand_xorshift" name = "rand_xorshift"
version = "0.1.1" version = "0.1.1"
@ -3095,6 +3230,12 @@ dependencies = [
"num-traits 0.2.12", "num-traits 0.2.12",
] ]
[[package]]
name = "siphasher"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
[[package]] [[package]]
name = "slab" name = "slab"
version = "0.4.2" version = "0.4.2"
@ -3189,6 +3330,31 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0" checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"
[[package]]
name = "string_cache"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2940c75beb4e3bf3a494cef919a747a2cb81e52571e212bfbd185074add7208a"
dependencies = [
"lazy_static",
"new_debug_unreachable",
"phf_shared",
"precomputed-hash",
"serde 1.0.116",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.8.0" version = "0.8.0"
@ -3244,6 +3410,17 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "tendril"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]] [[package]]
name = "termcolor" name = "termcolor"
version = "1.1.0" version = "1.1.0"
@ -3576,6 +3753,12 @@ dependencies = [
"serde 1.0.116", "serde 1.0.116",
] ]
[[package]]
name = "utf-8"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "0.7.4" version = "0.7.4"
@ -3842,3 +4025,15 @@ name = "xdg"
version = "2.2.0" version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d089681aa106a86fade1b0128fb5daf07d5867a509ab036d99988dec80429a57" checksum = "d089681aa106a86fade1b0128fb5daf07d5867a509ab036d99988dec80429a57"
[[package]]
name = "xml5ever"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b1b52e6e8614d4a58b8e70cf51ec0cc21b256ad8206708bcff8139b5bbd6a59"
dependencies = [
"log",
"mac",
"markup5ever",
"time 0.1.44",
]

View file

@ -59,6 +59,7 @@ anyhow = "1.0"
thiserror = "1.0" thiserror = "1.0"
background-jobs = " 0.8" background-jobs = " 0.8"
reqwest = { version = "0.10", features = ["json"] } reqwest = { version = "0.10", features = ["json"] }
ammonia = "3.1.0"
[dev-dependencies.cargo-husky] [dev-dependencies.cargo-husky]
version = "1" version = "1"

View file

@ -38,6 +38,7 @@ use activitystreams::{
public, public,
}; };
use actix_web::{body::Body, web, web::Path, HttpResponse}; use actix_web::{body::Body, web, web::Path, HttpResponse};
use ammonia::clean_text;
use anyhow::Context; use anyhow::Context;
use itertools::Itertools; use itertools::Itertools;
use lemmy_db::{ use lemmy_db::{
@ -183,7 +184,7 @@ impl FromApub for CommentForm {
creator_id: creator.id, creator_id: creator.id,
post_id: post.id, post_id: post.id,
parent_id, parent_id,
content: content_slurs_removed, content: clean_text(&content_slurs_removed),
removed: None, removed: None,
read: None, read: None,
published: note.published().map(|u| u.to_owned().naive_local()), published: note.published().map(|u| u.to_owned().naive_local()),

View file

@ -36,6 +36,7 @@ use activitystreams::{
}; };
use activitystreams_ext::Ext2; use activitystreams_ext::Ext2;
use actix_web::{body::Body, web, HttpResponse}; use actix_web::{body::Body, web, HttpResponse};
use ammonia::clean_text;
use anyhow::Context; use anyhow::Context;
use itertools::Itertools; use itertools::Itertools;
use lemmy_db::{ use lemmy_db::{
@ -376,7 +377,7 @@ impl FromApub for CommunityForm {
.content() .content()
.map(|s| s.as_single_xsd_string()) .map(|s| s.as_single_xsd_string())
.flatten() .flatten()
.map(|s| s.to_string()); .map(|s| clean_text(s));
check_slurs(&name)?; check_slurs(&name)?;
check_slurs(&title)?; check_slurs(&title)?;
check_slurs_opt(&description)?; check_slurs_opt(&description)?;
@ -408,8 +409,8 @@ impl FromApub for CommunityForm {
}; };
Ok(CommunityForm { Ok(CommunityForm {
name, name: clean_text(&name),
title, title: clean_text(&title),
description, description,
category_id: group.ext_one.category.identifier.parse::<i32>()?, category_id: group.ext_one.category.identifier.parse::<i32>()?,
creator_id: creator.id, creator_id: creator.id,

View file

@ -34,6 +34,7 @@ use activitystreams::{
}; };
use activitystreams_ext::Ext1; use activitystreams_ext::Ext1;
use actix_web::{body::Body, web, HttpResponse}; use actix_web::{body::Body, web, HttpResponse};
use ammonia::{clean, clean_text};
use anyhow::Context; use anyhow::Context;
use lemmy_db::{ use lemmy_db::{
community::Community, community::Community,
@ -168,17 +169,17 @@ fn extract_embed_from_apub(
.flatten() .flatten()
.map(|s| s.as_xsd_string()) .map(|s| s.as_xsd_string())
.flatten() .flatten()
.map(|s| s.to_string()); .map(|s| clean_text(s));
let description = preview_page let description = preview_page
.summary() .summary()
.map(|s| s.as_single_xsd_string()) .map(|s| s.as_single_xsd_string())
.flatten() .flatten()
.map(|s| s.to_string()); .map(|s| clean_text(s));
let html = preview_page let html = preview_page
.content() .content()
.map(|c| c.as_single_xsd_string()) .map(|c| c.as_single_xsd_string())
.flatten() .flatten()
.map(|s| s.to_string()); .map(|s| clean(s));
Ok(EmbedType { Ok(EmbedType {
title, title,
description, description,
@ -262,11 +263,11 @@ impl FromApub for PostForm {
.as_ref() .as_ref()
.map(|c| c.as_single_xsd_string()) .map(|c| c.as_single_xsd_string())
.flatten() .flatten()
.map(|s| s.to_string()); .map(|s| clean_text(s));
check_slurs(&name)?; check_slurs(&name)?;
let body_slurs_removed = body.map(|b| remove_slurs(&b)); let body_slurs_removed = body.map(|b| remove_slurs(&b));
Ok(PostForm { Ok(PostForm {
name, name: clean_text(&name),
url, url,
body: body_slurs_removed, body: body_slurs_removed,
creator_id: creator.id, creator_id: creator.id,

View file

@ -26,6 +26,7 @@ use activitystreams::{
object::{kind::NoteType, Note, Tombstone}, object::{kind::NoteType, Note, Tombstone},
prelude::*, prelude::*,
}; };
use ammonia::clean_text;
use anyhow::Context; use anyhow::Context;
use lemmy_db::{ use lemmy_db::{
private_message::{PrivateMessage, PrivateMessageForm}, private_message::{PrivateMessage, PrivateMessageForm},
@ -96,16 +97,17 @@ impl FromApub for PrivateMessageForm {
let recipient = get_or_fetch_and_upsert_user(&recipient_actor_id, context).await?; let recipient = get_or_fetch_and_upsert_user(&recipient_actor_id, context).await?;
let ap_id = note.id_unchecked().context(location_info!())?.to_string(); let ap_id = note.id_unchecked().context(location_info!())?.to_string();
check_is_apub_id_valid(&Url::parse(&ap_id)?)?; check_is_apub_id_valid(&Url::parse(&ap_id)?)?;
let content = note
.content()
.context(location_info!())?
.as_single_xsd_string()
.context(location_info!())?
.to_string();
Ok(PrivateMessageForm { Ok(PrivateMessageForm {
creator_id: creator.id, creator_id: creator.id,
recipient_id: recipient.id, recipient_id: recipient.id,
content: note content: clean_text(&content),
.content()
.context(location_info!())?
.as_single_xsd_string()
.context(location_info!())?
.to_string(),
published: note.published().map(|u| u.to_owned().naive_local()), published: note.published().map(|u| u.to_owned().naive_local()),
updated: note.updated().map(|u| u.to_owned().naive_local()), updated: note.updated().map(|u| u.to_owned().naive_local()),
deleted: None, deleted: None,

View file

@ -26,6 +26,7 @@ use activitystreams::{
}; };
use activitystreams_ext::Ext1; use activitystreams_ext::Ext1;
use actix_web::{body::Body, web, HttpResponse}; use actix_web::{body::Body, web, HttpResponse};
use ammonia::clean_text;
use anyhow::Context; use anyhow::Context;
use lemmy_db::{ use lemmy_db::{
naive_now, naive_now,
@ -242,19 +243,19 @@ impl FromApub for UserForm {
.as_xsd_string() .as_xsd_string()
.context(location_info!())? .context(location_info!())?
.to_string(); .to_string();
let preferred_username = person.inner.preferred_username().map(|u| u.to_string()); let preferred_username = person.inner.preferred_username().map(|u| clean_text(u));
let bio = person let bio = person
.inner .inner
.summary() .summary()
.map(|s| s.as_single_xsd_string()) .map(|s| s.as_single_xsd_string())
.flatten() .flatten()
.map(|s| s.to_string()); .map(|s| clean_text(s));
check_slurs(&name)?; check_slurs(&name)?;
check_slurs_opt(&preferred_username)?; check_slurs_opt(&preferred_username)?;
check_slurs_opt(&bio)?; check_slurs_opt(&bio)?;
Ok(UserForm { Ok(UserForm {
name, name: clean_text(&name),
preferred_username, preferred_username,
password_encrypted: "".to_string(), password_encrypted: "".to_string(),
admin: false, admin: false,