mirror of
https://github.com/LemmyNet/lemmy.git
synced 2024-12-22 19:01:32 +00:00
* Convert titles for posts from Mastodon to plaintext (fixes #3828) * Fix prettier. * Trigger build * Convert titles for posts from Mastodon to plaintext (fixes #3828) * Fix prettier. * Fix sanizize. --------- Co-authored-by: Dessalines <dessalines@users.noreply.github.com> Co-authored-by: Dessalines <tyhou13@gmx.com>
This commit is contained in:
parent
645bf21d54
commit
608bb6b1b4
6 changed files with 77 additions and 27 deletions
8
Cargo.lock
generated
8
Cargo.lock
generated
|
@ -2719,6 +2719,7 @@ dependencies = [
|
|||
"enum_delegate",
|
||||
"futures",
|
||||
"html2md",
|
||||
"html2text",
|
||||
"http",
|
||||
"itertools 0.11.0",
|
||||
"lemmy_api_common",
|
||||
|
@ -2734,6 +2735,7 @@ dependencies = [
|
|||
"serde_json",
|
||||
"serde_with",
|
||||
"serial_test",
|
||||
"stringreader",
|
||||
"strum_macros",
|
||||
"task-local-extensions",
|
||||
"tokio",
|
||||
|
@ -4950,6 +4952,12 @@ dependencies = [
|
|||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stringreader"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "913e7b03d63752f6cdd2df77da36749d82669904798fe8944b9ec3d23f159905"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.10.0"
|
||||
|
|
|
@ -38,6 +38,8 @@ anyhow = { workspace = true }
|
|||
reqwest = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
html2md = "0.2.14"
|
||||
html2text = "0.6.0"
|
||||
stringreader = "0.1.1"
|
||||
serde_with = { workspace = true }
|
||||
enum_delegate = "0.2.0"
|
||||
moka = { version = "0.11", features = ["future"] }
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"id": "https://enterprise.lemmy.ml/c/tenforward",
|
||||
"type": "Group",
|
||||
"preferredUsername": "main",
|
||||
"preferredUsername": "tenforward",
|
||||
"name": "Ten Forward",
|
||||
"summary": "<p>Lounge and recreation facility</p>\n<hr />\n<p>Welcome to the <a href=\"https://memory-alpha.fandom.com/wiki/USS_Enterprise_(NCC-1701-D)\">Enterprise</a>!.</p>\n",
|
||||
"source": {
|
||||
|
|
|
@ -11,40 +11,42 @@
|
|||
"votersCount": "toot:votersCount"
|
||||
}
|
||||
],
|
||||
"id": "https://mastodon.madrid/users/felix/statuses/107224289116410645",
|
||||
"id": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519",
|
||||
"type": "Note",
|
||||
"summary": null,
|
||||
"published": "2021-11-05T11:46:50Z",
|
||||
"url": "https://mastodon.madrid/@felix/107224289116410645",
|
||||
"attributedTo": "https://mastodon.madrid/users/felix",
|
||||
"to": ["https://mastodon.madrid/users/felix/followers"],
|
||||
"inReplyTo": null,
|
||||
"published": "2023-08-04T09:55:39Z",
|
||||
"url": "https://dice.camp/@thekernelinyellow/110830743680706519",
|
||||
"attributedTo": "https://dice.camp/users/thekernelinyellow",
|
||||
"to": ["https://www.w3.org/ns/activitystreams#Public"],
|
||||
"cc": [
|
||||
"https://www.w3.org/ns/activitystreams#Public",
|
||||
"https://mamot.fr/users/retiolus"
|
||||
"https://dice.camp/users/thekernelinyellow/followers",
|
||||
"https://enterprise.lemmy.ml/c/tenforward",
|
||||
"https://enterprise.lemmy.ml/c/tenforward/followers"
|
||||
],
|
||||
"sensitive": false,
|
||||
"atomUri": "https://mastodon.madrid/users/felix/statuses/107224289116410645",
|
||||
"inReplyToAtomUri": "https://mamot.fr/users/retiolus/statuses/107224244380204526",
|
||||
"conversation": "tag:mamot.fr,2021-11-05:objectId=64635960:objectType=Conversation",
|
||||
"content": "<p><span class=\"h-card\"><a href=\"https://mamot.fr/@retiolus\" class=\"u-url mention\">@<span>retiolus</span></a></span> i have never been disappointed by a thinkpad. if you want to save money, get a model from a few years ago, there isnt a huge difference anyway.</p>",
|
||||
"atomUri": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519",
|
||||
"inReplyToAtomUri": null,
|
||||
"conversation": "tag:dice.camp,2023-08-04:objectId=29969291:objectType=Conversation",
|
||||
"content": "<p><span class=\"h-card\" translate=\"no\"><a href=\"https://enterprise.lemmy.ml/c/tenforward\" class=\"u-url mention\">@<span>tenforward</span></a></span> Variable never resetting at refresh</p><p>Hi! I'm using a variable to count elements in my generator but every time I generate a new character, the counter's value carries on from the previous one. Is there a function to reset it (I set it to 0 at the beginning of the file)</p>",
|
||||
"contentMap": {
|
||||
"en": "<p><span class=\"h-card\"><a href=\"https://mamot.fr/@retiolus\" class=\"u-url mention\">@<span>retiolus</span></a></span> i have never been disappointed by a thinkpad. if you want to save money, get a model from a few years ago, there isnt a huge difference anyway.</p>"
|
||||
"it": "<p><span class=\"h-card\" translate=\"no\"><a href=\"https://enterprise.lemmy.ml/c/tenforward\" class=\"u-url mention\">@<span>tenforward</span></a></span>Variable never resetting at refresh</p><p>Hi! I'm using a variable to count elements in my generator but every time I generate a new character, the counter's value carries on from the previous one. Is there a function to reset it (I set it to 0 at the beginning of the file)</p>"
|
||||
},
|
||||
"attachment": [],
|
||||
"tag": [
|
||||
{
|
||||
"type": "Mention",
|
||||
"href": "https://mamot.fr/users/retiolus",
|
||||
"name": "@retiolus@mamot.fr"
|
||||
"href": "https://enterprise.lemmy.ml/c/tenforward",
|
||||
"name": "@tenforward@enterprise.lemmy.ml"
|
||||
}
|
||||
],
|
||||
"replies": {
|
||||
"id": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies",
|
||||
"id": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies",
|
||||
"type": "Collection",
|
||||
"first": {
|
||||
"type": "CollectionPage",
|
||||
"next": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies?only_other_accounts=true&page=true",
|
||||
"partOf": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies",
|
||||
"next": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies?only_other_accounts=true&page=true",
|
||||
"partOf": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies",
|
||||
"items": []
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ use crate::objects::{
|
|||
use activitypub_federation::{config::Data, fetch::object_id::ObjectId};
|
||||
use actix_web::web::Json;
|
||||
use futures::{future::try_join_all, StreamExt};
|
||||
use lemmy_api_common::{context::LemmyContext, utils::sanitize_html_api_opt, SuccessResponse};
|
||||
use lemmy_api_common::{context::LemmyContext, SuccessResponse};
|
||||
use lemmy_db_schema::{
|
||||
newtypes::DbUrl,
|
||||
source::{
|
||||
|
@ -20,6 +20,7 @@ use lemmy_db_schema::{
|
|||
post::{PostSaved, PostSavedForm},
|
||||
},
|
||||
traits::{Blockable, Crud, Followable, Saveable},
|
||||
utils::diesel_option_overwrite,
|
||||
};
|
||||
use lemmy_db_views::structs::LocalUserView;
|
||||
use lemmy_utils::{
|
||||
|
@ -96,8 +97,8 @@ pub async fn import_settings(
|
|||
local_user_view: LocalUserView,
|
||||
context: Data<LemmyContext>,
|
||||
) -> Result<Json<SuccessResponse>, LemmyError> {
|
||||
let display_name = Some(sanitize_html_api_opt(&data.display_name));
|
||||
let bio = Some(sanitize_html_api_opt(&data.bio));
|
||||
let display_name = diesel_option_overwrite(data.display_name.clone());
|
||||
let bio = diesel_option_overwrite(data.bio.clone());
|
||||
|
||||
let person_form = PersonUpdateForm {
|
||||
display_name,
|
||||
|
|
|
@ -21,7 +21,7 @@ use activitypub_federation::{
|
|||
};
|
||||
use anyhow::anyhow;
|
||||
use chrono::{DateTime, Utc};
|
||||
use html2md::parse_html;
|
||||
use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator};
|
||||
use lemmy_api_common::{
|
||||
context::LemmyContext,
|
||||
request::fetch_site_data,
|
||||
|
@ -48,6 +48,7 @@ use lemmy_utils::{
|
|||
},
|
||||
};
|
||||
use std::ops::Deref;
|
||||
use stringreader::StringReader;
|
||||
use url::Url;
|
||||
|
||||
const MAX_TITLE_LENGTH: usize = 200;
|
||||
|
@ -171,11 +172,21 @@ impl Object for ApubPost {
|
|||
.name
|
||||
.clone()
|
||||
.or_else(|| {
|
||||
// Posts coming from Mastodon or similar platforms don't have a title. Instead we take the
|
||||
// first line of the content and convert it from HTML to plaintext. We also remove mentions
|
||||
// of the community name.
|
||||
page
|
||||
.content
|
||||
.clone()
|
||||
.as_ref()
|
||||
.and_then(|c| parse_html(c).lines().next().map(ToString::to_string))
|
||||
.as_deref()
|
||||
.map(StringReader::new)
|
||||
.map(|c| from_read_with_decorator(c, MAX_TITLE_LENGTH, TrivialDecorator::new()))
|
||||
.and_then(|c| {
|
||||
c.lines().next().map(|s| {
|
||||
s.replace(&format!("@{}", community.name), "")
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
})
|
||||
})
|
||||
.ok_or_else(|| anyhow!("Object must have name or content"))?;
|
||||
if name.chars().count() > MAX_TITLE_LENGTH {
|
||||
|
@ -288,8 +299,9 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::{
|
||||
objects::{
|
||||
community::tests::parse_lemmy_community,
|
||||
person::tests::parse_lemmy_person,
|
||||
community::{tests::parse_lemmy_community, ApubCommunity},
|
||||
instance::ApubSite,
|
||||
person::{tests::parse_lemmy_person, ApubPerson},
|
||||
post::ApubPost,
|
||||
tests::init_context,
|
||||
},
|
||||
|
@ -318,6 +330,31 @@ mod tests {
|
|||
assert!(!post.featured_community);
|
||||
assert_eq!(context.request_count(), 0);
|
||||
|
||||
cleanup(&context, person, site, community, post).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn test_convert_mastodon_post_title() {
|
||||
let context = init_context().await;
|
||||
let (person, site) = parse_lemmy_person(&context).await;
|
||||
let community = parse_lemmy_community(&context).await;
|
||||
|
||||
let json = file_to_json_object("assets/mastodon/objects/page.json").unwrap();
|
||||
let post = ApubPost::from_json(json, &context).await.unwrap();
|
||||
|
||||
assert_eq!(post.name, "Variable never resetting at refresh");
|
||||
|
||||
cleanup(&context, person, site, community, post).await;
|
||||
}
|
||||
|
||||
async fn cleanup(
|
||||
context: &Data<LemmyContext>,
|
||||
person: ApubPerson,
|
||||
site: ApubSite,
|
||||
community: ApubCommunity,
|
||||
post: ApubPost,
|
||||
) {
|
||||
Post::delete(&mut context.pool(), post.id).await.unwrap();
|
||||
Person::delete(&mut context.pool(), person.id)
|
||||
.await
|
||||
|
|
Loading…
Reference in a new issue