mirror of
https://github.com/LemmyNet/lemmy.git
synced 2024-11-26 06:11:26 +00:00
* Convert titles for posts from Mastodon to plaintext (fixes #3828) * Fix prettier. * Trigger build * Convert titles for posts from Mastodon to plaintext (fixes #3828) * Fix prettier. * Fix sanizize. --------- Co-authored-by: Dessalines <dessalines@users.noreply.github.com> Co-authored-by: Dessalines <tyhou13@gmx.com>
This commit is contained in:
parent
645bf21d54
commit
608bb6b1b4
6 changed files with 77 additions and 27 deletions
8
Cargo.lock
generated
8
Cargo.lock
generated
|
@ -2719,6 +2719,7 @@ dependencies = [
|
||||||
"enum_delegate",
|
"enum_delegate",
|
||||||
"futures",
|
"futures",
|
||||||
"html2md",
|
"html2md",
|
||||||
|
"html2text",
|
||||||
"http",
|
"http",
|
||||||
"itertools 0.11.0",
|
"itertools 0.11.0",
|
||||||
"lemmy_api_common",
|
"lemmy_api_common",
|
||||||
|
@ -2734,6 +2735,7 @@ dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_with",
|
"serde_with",
|
||||||
"serial_test",
|
"serial_test",
|
||||||
|
"stringreader",
|
||||||
"strum_macros",
|
"strum_macros",
|
||||||
"task-local-extensions",
|
"task-local-extensions",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
@ -4950,6 +4952,12 @@ dependencies = [
|
||||||
"unicode-normalization",
|
"unicode-normalization",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stringreader"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "913e7b03d63752f6cdd2df77da36749d82669904798fe8944b9ec3d23f159905"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
|
|
@ -38,6 +38,8 @@ anyhow = { workspace = true }
|
||||||
reqwest = { workspace = true }
|
reqwest = { workspace = true }
|
||||||
once_cell = { workspace = true }
|
once_cell = { workspace = true }
|
||||||
html2md = "0.2.14"
|
html2md = "0.2.14"
|
||||||
|
html2text = "0.6.0"
|
||||||
|
stringreader = "0.1.1"
|
||||||
serde_with = { workspace = true }
|
serde_with = { workspace = true }
|
||||||
enum_delegate = "0.2.0"
|
enum_delegate = "0.2.0"
|
||||||
moka = { version = "0.11", features = ["future"] }
|
moka = { version = "0.11", features = ["future"] }
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"id": "https://enterprise.lemmy.ml/c/tenforward",
|
"id": "https://enterprise.lemmy.ml/c/tenforward",
|
||||||
"type": "Group",
|
"type": "Group",
|
||||||
"preferredUsername": "main",
|
"preferredUsername": "tenforward",
|
||||||
"name": "Ten Forward",
|
"name": "Ten Forward",
|
||||||
"summary": "<p>Lounge and recreation facility</p>\n<hr />\n<p>Welcome to the <a href=\"https://memory-alpha.fandom.com/wiki/USS_Enterprise_(NCC-1701-D)\">Enterprise</a>!.</p>\n",
|
"summary": "<p>Lounge and recreation facility</p>\n<hr />\n<p>Welcome to the <a href=\"https://memory-alpha.fandom.com/wiki/USS_Enterprise_(NCC-1701-D)\">Enterprise</a>!.</p>\n",
|
||||||
"source": {
|
"source": {
|
||||||
|
|
|
@ -11,40 +11,42 @@
|
||||||
"votersCount": "toot:votersCount"
|
"votersCount": "toot:votersCount"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"id": "https://mastodon.madrid/users/felix/statuses/107224289116410645",
|
"id": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519",
|
||||||
"type": "Note",
|
"type": "Note",
|
||||||
"summary": null,
|
"summary": null,
|
||||||
"published": "2021-11-05T11:46:50Z",
|
"inReplyTo": null,
|
||||||
"url": "https://mastodon.madrid/@felix/107224289116410645",
|
"published": "2023-08-04T09:55:39Z",
|
||||||
"attributedTo": "https://mastodon.madrid/users/felix",
|
"url": "https://dice.camp/@thekernelinyellow/110830743680706519",
|
||||||
"to": ["https://mastodon.madrid/users/felix/followers"],
|
"attributedTo": "https://dice.camp/users/thekernelinyellow",
|
||||||
|
"to": ["https://www.w3.org/ns/activitystreams#Public"],
|
||||||
"cc": [
|
"cc": [
|
||||||
"https://www.w3.org/ns/activitystreams#Public",
|
"https://dice.camp/users/thekernelinyellow/followers",
|
||||||
"https://mamot.fr/users/retiolus"
|
"https://enterprise.lemmy.ml/c/tenforward",
|
||||||
|
"https://enterprise.lemmy.ml/c/tenforward/followers"
|
||||||
],
|
],
|
||||||
"sensitive": false,
|
"sensitive": false,
|
||||||
"atomUri": "https://mastodon.madrid/users/felix/statuses/107224289116410645",
|
"atomUri": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519",
|
||||||
"inReplyToAtomUri": "https://mamot.fr/users/retiolus/statuses/107224244380204526",
|
"inReplyToAtomUri": null,
|
||||||
"conversation": "tag:mamot.fr,2021-11-05:objectId=64635960:objectType=Conversation",
|
"conversation": "tag:dice.camp,2023-08-04:objectId=29969291:objectType=Conversation",
|
||||||
"content": "<p><span class=\"h-card\"><a href=\"https://mamot.fr/@retiolus\" class=\"u-url mention\">@<span>retiolus</span></a></span> i have never been disappointed by a thinkpad. if you want to save money, get a model from a few years ago, there isnt a huge difference anyway.</p>",
|
"content": "<p><span class=\"h-card\" translate=\"no\"><a href=\"https://enterprise.lemmy.ml/c/tenforward\" class=\"u-url mention\">@<span>tenforward</span></a></span> Variable never resetting at refresh</p><p>Hi! I'm using a variable to count elements in my generator but every time I generate a new character, the counter's value carries on from the previous one. Is there a function to reset it (I set it to 0 at the beginning of the file)</p>",
|
||||||
"contentMap": {
|
"contentMap": {
|
||||||
"en": "<p><span class=\"h-card\"><a href=\"https://mamot.fr/@retiolus\" class=\"u-url mention\">@<span>retiolus</span></a></span> i have never been disappointed by a thinkpad. if you want to save money, get a model from a few years ago, there isnt a huge difference anyway.</p>"
|
"it": "<p><span class=\"h-card\" translate=\"no\"><a href=\"https://enterprise.lemmy.ml/c/tenforward\" class=\"u-url mention\">@<span>tenforward</span></a></span>Variable never resetting at refresh</p><p>Hi! I'm using a variable to count elements in my generator but every time I generate a new character, the counter's value carries on from the previous one. Is there a function to reset it (I set it to 0 at the beginning of the file)</p>"
|
||||||
},
|
},
|
||||||
"attachment": [],
|
"attachment": [],
|
||||||
"tag": [
|
"tag": [
|
||||||
{
|
{
|
||||||
"type": "Mention",
|
"type": "Mention",
|
||||||
"href": "https://mamot.fr/users/retiolus",
|
"href": "https://enterprise.lemmy.ml/c/tenforward",
|
||||||
"name": "@retiolus@mamot.fr"
|
"name": "@tenforward@enterprise.lemmy.ml"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"replies": {
|
"replies": {
|
||||||
"id": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies",
|
"id": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies",
|
||||||
"type": "Collection",
|
"type": "Collection",
|
||||||
"first": {
|
"first": {
|
||||||
"type": "CollectionPage",
|
"type": "CollectionPage",
|
||||||
"next": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies?only_other_accounts=true&page=true",
|
"next": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies?only_other_accounts=true&page=true",
|
||||||
"partOf": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies",
|
"partOf": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies",
|
||||||
"items": []
|
"items": []
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@ use crate::objects::{
|
||||||
use activitypub_federation::{config::Data, fetch::object_id::ObjectId};
|
use activitypub_federation::{config::Data, fetch::object_id::ObjectId};
|
||||||
use actix_web::web::Json;
|
use actix_web::web::Json;
|
||||||
use futures::{future::try_join_all, StreamExt};
|
use futures::{future::try_join_all, StreamExt};
|
||||||
use lemmy_api_common::{context::LemmyContext, utils::sanitize_html_api_opt, SuccessResponse};
|
use lemmy_api_common::{context::LemmyContext, SuccessResponse};
|
||||||
use lemmy_db_schema::{
|
use lemmy_db_schema::{
|
||||||
newtypes::DbUrl,
|
newtypes::DbUrl,
|
||||||
source::{
|
source::{
|
||||||
|
@ -20,6 +20,7 @@ use lemmy_db_schema::{
|
||||||
post::{PostSaved, PostSavedForm},
|
post::{PostSaved, PostSavedForm},
|
||||||
},
|
},
|
||||||
traits::{Blockable, Crud, Followable, Saveable},
|
traits::{Blockable, Crud, Followable, Saveable},
|
||||||
|
utils::diesel_option_overwrite,
|
||||||
};
|
};
|
||||||
use lemmy_db_views::structs::LocalUserView;
|
use lemmy_db_views::structs::LocalUserView;
|
||||||
use lemmy_utils::{
|
use lemmy_utils::{
|
||||||
|
@ -96,8 +97,8 @@ pub async fn import_settings(
|
||||||
local_user_view: LocalUserView,
|
local_user_view: LocalUserView,
|
||||||
context: Data<LemmyContext>,
|
context: Data<LemmyContext>,
|
||||||
) -> Result<Json<SuccessResponse>, LemmyError> {
|
) -> Result<Json<SuccessResponse>, LemmyError> {
|
||||||
let display_name = Some(sanitize_html_api_opt(&data.display_name));
|
let display_name = diesel_option_overwrite(data.display_name.clone());
|
||||||
let bio = Some(sanitize_html_api_opt(&data.bio));
|
let bio = diesel_option_overwrite(data.bio.clone());
|
||||||
|
|
||||||
let person_form = PersonUpdateForm {
|
let person_form = PersonUpdateForm {
|
||||||
display_name,
|
display_name,
|
||||||
|
|
|
@ -21,7 +21,7 @@ use activitypub_federation::{
|
||||||
};
|
};
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use html2md::parse_html;
|
use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator};
|
||||||
use lemmy_api_common::{
|
use lemmy_api_common::{
|
||||||
context::LemmyContext,
|
context::LemmyContext,
|
||||||
request::fetch_site_data,
|
request::fetch_site_data,
|
||||||
|
@ -48,6 +48,7 @@ use lemmy_utils::{
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
use stringreader::StringReader;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
const MAX_TITLE_LENGTH: usize = 200;
|
const MAX_TITLE_LENGTH: usize = 200;
|
||||||
|
@ -171,11 +172,21 @@ impl Object for ApubPost {
|
||||||
.name
|
.name
|
||||||
.clone()
|
.clone()
|
||||||
.or_else(|| {
|
.or_else(|| {
|
||||||
|
// Posts coming from Mastodon or similar platforms don't have a title. Instead we take the
|
||||||
|
// first line of the content and convert it from HTML to plaintext. We also remove mentions
|
||||||
|
// of the community name.
|
||||||
page
|
page
|
||||||
.content
|
.content
|
||||||
.clone()
|
.as_deref()
|
||||||
.as_ref()
|
.map(StringReader::new)
|
||||||
.and_then(|c| parse_html(c).lines().next().map(ToString::to_string))
|
.map(|c| from_read_with_decorator(c, MAX_TITLE_LENGTH, TrivialDecorator::new()))
|
||||||
|
.and_then(|c| {
|
||||||
|
c.lines().next().map(|s| {
|
||||||
|
s.replace(&format!("@{}", community.name), "")
|
||||||
|
.trim()
|
||||||
|
.to_string()
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
.ok_or_else(|| anyhow!("Object must have name or content"))?;
|
.ok_or_else(|| anyhow!("Object must have name or content"))?;
|
||||||
if name.chars().count() > MAX_TITLE_LENGTH {
|
if name.chars().count() > MAX_TITLE_LENGTH {
|
||||||
|
@ -288,8 +299,9 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{
|
use crate::{
|
||||||
objects::{
|
objects::{
|
||||||
community::tests::parse_lemmy_community,
|
community::{tests::parse_lemmy_community, ApubCommunity},
|
||||||
person::tests::parse_lemmy_person,
|
instance::ApubSite,
|
||||||
|
person::{tests::parse_lemmy_person, ApubPerson},
|
||||||
post::ApubPost,
|
post::ApubPost,
|
||||||
tests::init_context,
|
tests::init_context,
|
||||||
},
|
},
|
||||||
|
@ -318,6 +330,31 @@ mod tests {
|
||||||
assert!(!post.featured_community);
|
assert!(!post.featured_community);
|
||||||
assert_eq!(context.request_count(), 0);
|
assert_eq!(context.request_count(), 0);
|
||||||
|
|
||||||
|
cleanup(&context, person, site, community, post).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_convert_mastodon_post_title() {
|
||||||
|
let context = init_context().await;
|
||||||
|
let (person, site) = parse_lemmy_person(&context).await;
|
||||||
|
let community = parse_lemmy_community(&context).await;
|
||||||
|
|
||||||
|
let json = file_to_json_object("assets/mastodon/objects/page.json").unwrap();
|
||||||
|
let post = ApubPost::from_json(json, &context).await.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(post.name, "Variable never resetting at refresh");
|
||||||
|
|
||||||
|
cleanup(&context, person, site, community, post).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn cleanup(
|
||||||
|
context: &Data<LemmyContext>,
|
||||||
|
person: ApubPerson,
|
||||||
|
site: ApubSite,
|
||||||
|
community: ApubCommunity,
|
||||||
|
post: ApubPost,
|
||||||
|
) {
|
||||||
Post::delete(&mut context.pool(), post.id).await.unwrap();
|
Post::delete(&mut context.pool(), post.id).await.unwrap();
|
||||||
Person::delete(&mut context.pool(), person.id)
|
Person::delete(&mut context.pool(), person.id)
|
||||||
.await
|
.await
|
||||||
|
|
Loading…
Reference in a new issue