mirror of
https://github.com/LemmyNet/lemmy.git
synced 2024-11-23 21:01:23 +00:00
Added html2md crate to parse comment html from pleroma (fixes #1461)
This commit is contained in:
parent
153ec0d7aa
commit
c514f56158
3 changed files with 84 additions and 5 deletions
71
Cargo.lock
generated
71
Cargo.lock
generated
|
@ -614,6 +614,12 @@ version = "1.0.71"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd"
|
checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cesu8"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
|
@ -667,6 +673,16 @@ version = "1.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
|
checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "combine"
|
||||||
|
version = "4.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a909e4d93292cd8e9c42e189f61681eff9d67b6541f96b8a1a737f23737bd001"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "comrak"
|
name = "comrak"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
|
@ -1371,6 +1387,20 @@ version = "3.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8a164bb2ceaeff4f42542bdb847c41517c78a60f5649671b2a07312b6e117549"
|
checksum = "8a164bb2ceaeff4f42542bdb847c41517c78a60f5649671b2a07312b6e117549"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html2md"
|
||||||
|
version = "0.2.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "61f6bf799d9770725ec13d66f4af9344e96285dc14d8e71e0fe02d272690667f"
|
||||||
|
dependencies = [
|
||||||
|
"html5ever 0.25.1",
|
||||||
|
"jni",
|
||||||
|
"lazy_static",
|
||||||
|
"markup5ever_rcdom",
|
||||||
|
"percent-encoding",
|
||||||
|
"regex",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "html5ever"
|
name = "html5ever"
|
||||||
version = "0.22.5"
|
version = "0.22.5"
|
||||||
|
@ -1601,6 +1631,26 @@ version = "0.4.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
|
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jni"
|
||||||
|
version = "0.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec"
|
||||||
|
dependencies = [
|
||||||
|
"cesu8",
|
||||||
|
"combine",
|
||||||
|
"jni-sys",
|
||||||
|
"log",
|
||||||
|
"thiserror",
|
||||||
|
"walkdir",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jni-sys"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jpeg-decoder"
|
name = "jpeg-decoder"
|
||||||
version = "0.1.22"
|
version = "0.1.22"
|
||||||
|
@ -1768,6 +1818,7 @@ dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"diesel",
|
"diesel",
|
||||||
"futures",
|
"futures",
|
||||||
|
"html2md",
|
||||||
"http",
|
"http",
|
||||||
"http-signature-normalization-actix",
|
"http-signature-normalization-actix",
|
||||||
"itertools",
|
"itertools",
|
||||||
|
@ -3104,6 +3155,15 @@ version = "1.0.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
|
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "same-file"
|
||||||
|
version = "1.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "schannel"
|
name = "schannel"
|
||||||
version = "0.1.19"
|
version = "0.1.19"
|
||||||
|
@ -4033,6 +4093,17 @@ version = "0.9.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
|
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "walkdir"
|
||||||
|
version = "2.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
|
||||||
|
dependencies = [
|
||||||
|
"same-file",
|
||||||
|
"winapi",
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "want"
|
name = "want"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
|
|
@ -49,6 +49,7 @@ anyhow = "1.0.44"
|
||||||
thiserror = "1.0.29"
|
thiserror = "1.0.29"
|
||||||
background-jobs = "0.9.0"
|
background-jobs = "0.9.0"
|
||||||
reqwest = { version = "0.11.4", features = ["json"] }
|
reqwest = { version = "0.11.4", features = ["json"] }
|
||||||
|
html2md = "0.2.13"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
serial_test = "0.5.1"
|
serial_test = "0.5.1"
|
||||||
|
|
|
@ -16,6 +16,7 @@ use activitystreams::{
|
||||||
};
|
};
|
||||||
use anyhow::{anyhow, Context};
|
use anyhow::{anyhow, Context};
|
||||||
use chrono::{DateTime, FixedOffset};
|
use chrono::{DateTime, FixedOffset};
|
||||||
|
use html2md::parse_html;
|
||||||
use lemmy_api_common::blocking;
|
use lemmy_api_common::blocking;
|
||||||
use lemmy_apub_lib::{
|
use lemmy_apub_lib::{
|
||||||
traits::{ApubObject, FromApub, ToApub},
|
traits::{ApubObject, FromApub, ToApub},
|
||||||
|
@ -284,12 +285,11 @@ impl FromApub for ApubComment {
|
||||||
}
|
}
|
||||||
|
|
||||||
let content = if let SourceCompat::Lemmy(source) = ¬e.source {
|
let content = if let SourceCompat::Lemmy(source) = ¬e.source {
|
||||||
&source.content
|
source.content.clone()
|
||||||
} else {
|
} else {
|
||||||
// TODO: convert from html to markdown
|
parse_html(¬e.content)
|
||||||
¬e.content
|
|
||||||
};
|
};
|
||||||
let content_slurs_removed = remove_slurs(content, &context.settings().slur_regex());
|
let content_slurs_removed = remove_slurs(&content, &context.settings().slur_regex());
|
||||||
|
|
||||||
let form = CommentForm {
|
let form = CommentForm {
|
||||||
creator_id: creator.id,
|
creator_id: creator.id,
|
||||||
|
@ -373,8 +373,15 @@ mod tests {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(comment.ap_id.clone().into_inner(), pleroma_url);
|
assert_eq!(comment.ap_id.clone().into_inner(), pleroma_url);
|
||||||
assert_eq!(comment.content.len(), 179);
|
assert_eq!(comment.content.len(), 64);
|
||||||
assert!(!comment.local);
|
assert!(!comment.local);
|
||||||
assert_eq!(request_counter, 0);
|
assert_eq!(request_counter, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_html_to_markdown_sanitize() {
|
||||||
|
let parsed = parse_html(&"<script></script><b>hello</b>");
|
||||||
|
assert_eq!(parsed, "**hello**");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue