mirror of
https://github.com/LemmyNet/lemmy.git
synced 2024-11-23 12:51:18 +00:00
Added html2md crate to parse comment html from pleroma (fixes #1461)
This commit is contained in:
parent
153ec0d7aa
commit
c514f56158
3 changed files with 84 additions and 5 deletions
71
Cargo.lock
generated
71
Cargo.lock
generated
|
@ -614,6 +614,12 @@ version = "1.0.71"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd"
|
||||
|
||||
[[package]]
|
||||
name = "cesu8"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
|
@ -667,6 +673,16 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
|
||||
|
||||
[[package]]
|
||||
name = "combine"
|
||||
version = "4.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a909e4d93292cd8e9c42e189f61681eff9d67b6541f96b8a1a737f23737bd001"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "comrak"
|
||||
version = "0.12.1"
|
||||
|
@ -1371,6 +1387,20 @@ version = "3.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a164bb2ceaeff4f42542bdb847c41517c78a60f5649671b2a07312b6e117549"
|
||||
|
||||
[[package]]
|
||||
name = "html2md"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61f6bf799d9770725ec13d66f4af9344e96285dc14d8e71e0fe02d272690667f"
|
||||
dependencies = [
|
||||
"html5ever 0.25.1",
|
||||
"jni",
|
||||
"lazy_static",
|
||||
"markup5ever_rcdom",
|
||||
"percent-encoding",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.22.5"
|
||||
|
@ -1601,6 +1631,26 @@ version = "0.4.8"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
|
||||
|
||||
[[package]]
|
||||
name = "jni"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec"
|
||||
dependencies = [
|
||||
"cesu8",
|
||||
"combine",
|
||||
"jni-sys",
|
||||
"log",
|
||||
"thiserror",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jni-sys"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
|
||||
|
||||
[[package]]
|
||||
name = "jpeg-decoder"
|
||||
version = "0.1.22"
|
||||
|
@ -1768,6 +1818,7 @@ dependencies = [
|
|||
"chrono",
|
||||
"diesel",
|
||||
"futures",
|
||||
"html2md",
|
||||
"http",
|
||||
"http-signature-normalization-actix",
|
||||
"itertools",
|
||||
|
@ -3104,6 +3155,15 @@ version = "1.0.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schannel"
|
||||
version = "0.1.19"
|
||||
|
@ -4033,6 +4093,17 @@ version = "0.9.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "want"
|
||||
version = "0.3.0"
|
||||
|
|
|
@ -49,6 +49,7 @@ anyhow = "1.0.44"
|
|||
thiserror = "1.0.29"
|
||||
background-jobs = "0.9.0"
|
||||
reqwest = { version = "0.11.4", features = ["json"] }
|
||||
html2md = "0.2.13"
|
||||
|
||||
[dev-dependencies]
|
||||
serial_test = "0.5.1"
|
||||
|
|
|
@ -16,6 +16,7 @@ use activitystreams::{
|
|||
};
|
||||
use anyhow::{anyhow, Context};
|
||||
use chrono::{DateTime, FixedOffset};
|
||||
use html2md::parse_html;
|
||||
use lemmy_api_common::blocking;
|
||||
use lemmy_apub_lib::{
|
||||
traits::{ApubObject, FromApub, ToApub},
|
||||
|
@ -284,12 +285,11 @@ impl FromApub for ApubComment {
|
|||
}
|
||||
|
||||
let content = if let SourceCompat::Lemmy(source) = ¬e.source {
|
||||
&source.content
|
||||
source.content.clone()
|
||||
} else {
|
||||
// TODO: convert from html to markdown
|
||||
¬e.content
|
||||
parse_html(¬e.content)
|
||||
};
|
||||
let content_slurs_removed = remove_slurs(content, &context.settings().slur_regex());
|
||||
let content_slurs_removed = remove_slurs(&content, &context.settings().slur_regex());
|
||||
|
||||
let form = CommentForm {
|
||||
creator_id: creator.id,
|
||||
|
@ -373,8 +373,15 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
assert_eq!(comment.ap_id.clone().into_inner(), pleroma_url);
|
||||
assert_eq!(comment.content.len(), 179);
|
||||
assert_eq!(comment.content.len(), 64);
|
||||
assert!(!comment.local);
|
||||
assert_eq!(request_counter, 0);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[serial]
|
||||
async fn test_html_to_markdown_sanitize() {
|
||||
let parsed = parse_html(&"<script></script><b>hello</b>");
|
||||
assert_eq!(parsed, "**hello**");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue