diff --git a/Cargo.lock b/Cargo.lock
index 3af6e1bd9..ac7e21db7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -614,6 +614,12 @@ version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd"
+[[package]]
+name = "cesu8"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
+
[[package]]
name = "cfg-if"
version = "1.0.0"
@@ -667,6 +673,16 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
+[[package]]
+name = "combine"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a909e4d93292cd8e9c42e189f61681eff9d67b6541f96b8a1a737f23737bd001"
+dependencies = [
+ "bytes",
+ "memchr",
+]
+
[[package]]
name = "comrak"
version = "0.12.1"
@@ -1371,6 +1387,20 @@ version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a164bb2ceaeff4f42542bdb847c41517c78a60f5649671b2a07312b6e117549"
+[[package]]
+name = "html2md"
+version = "0.2.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61f6bf799d9770725ec13d66f4af9344e96285dc14d8e71e0fe02d272690667f"
+dependencies = [
+ "html5ever 0.25.1",
+ "jni",
+ "lazy_static",
+ "markup5ever_rcdom",
+ "percent-encoding",
+ "regex",
+]
+
[[package]]
name = "html5ever"
version = "0.22.5"
@@ -1601,6 +1631,26 @@ version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
+[[package]]
+name = "jni"
+version = "0.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec"
+dependencies = [
+ "cesu8",
+ "combine",
+ "jni-sys",
+ "log",
+ "thiserror",
+ "walkdir",
+]
+
+[[package]]
+name = "jni-sys"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
+
[[package]]
name = "jpeg-decoder"
version = "0.1.22"
@@ -1768,6 +1818,7 @@ dependencies = [
"chrono",
"diesel",
"futures",
+ "html2md",
"http",
"http-signature-normalization-actix",
"itertools",
@@ -3104,6 +3155,15 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
[[package]]
name = "schannel"
version = "0.1.19"
@@ -4033,6 +4093,17 @@ version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
+[[package]]
+name = "walkdir"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
+dependencies = [
+ "same-file",
+ "winapi",
+ "winapi-util",
+]
+
[[package]]
name = "want"
version = "0.3.0"
diff --git a/crates/apub/Cargo.toml b/crates/apub/Cargo.toml
index 45ee1c312..10033c4c0 100644
--- a/crates/apub/Cargo.toml
+++ b/crates/apub/Cargo.toml
@@ -49,6 +49,7 @@ anyhow = "1.0.44"
thiserror = "1.0.29"
background-jobs = "0.9.0"
reqwest = { version = "0.11.4", features = ["json"] }
+html2md = "0.2.13"
[dev-dependencies]
serial_test = "0.5.1"
diff --git a/crates/apub/src/objects/comment.rs b/crates/apub/src/objects/comment.rs
index cf125ce9f..72d477d10 100644
--- a/crates/apub/src/objects/comment.rs
+++ b/crates/apub/src/objects/comment.rs
@@ -16,6 +16,7 @@ use activitystreams::{
};
use anyhow::{anyhow, Context};
use chrono::{DateTime, FixedOffset};
+use html2md::parse_html;
use lemmy_api_common::blocking;
use lemmy_apub_lib::{
traits::{ApubObject, FromApub, ToApub},
@@ -284,12 +285,11 @@ impl FromApub for ApubComment {
}
let content = if let SourceCompat::Lemmy(source) = ¬e.source {
- &source.content
+ source.content.clone()
} else {
- // TODO: convert from html to markdown
- ¬e.content
+ parse_html(¬e.content)
};
- let content_slurs_removed = remove_slurs(content, &context.settings().slur_regex());
+ let content_slurs_removed = remove_slurs(&content, &context.settings().slur_regex());
let form = CommentForm {
creator_id: creator.id,
@@ -373,8 +373,15 @@ mod tests {
.unwrap();
assert_eq!(comment.ap_id.clone().into_inner(), pleroma_url);
- assert_eq!(comment.content.len(), 179);
+ assert_eq!(comment.content.len(), 64);
assert!(!comment.local);
assert_eq!(request_counter, 0);
}
+
+ #[actix_rt::test]
+ #[serial]
+ async fn test_html_to_markdown_sanitize() {
+ let parsed = parse_html(&"hello");
+ assert_eq!(parsed, "**hello**");
+ }
}