From edb063f288a496e79496b02c854af90d394280be Mon Sep 17 00:00:00 2001 From: Dessalines Date: Tue, 21 Jan 2025 04:01:00 -0500 Subject: [PATCH 1/3] Upgrading html2text. (#5336) --- Cargo.lock | 73 ++++++++++++++++++++++++--------- Cargo.toml | 6 +-- crates/apub/Cargo.toml | 4 +- crates/apub/src/objects/post.rs | 4 +- crates/utils/Cargo.toml | 2 +- crates/utils/src/email.rs | 2 +- 6 files changed, 63 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3106adc0..c9b05b77b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -832,9 +832,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.26" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" +checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" dependencies = [ "clap_builder", "clap_derive", @@ -842,9 +842,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.26" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" +checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" dependencies = [ "anstream", "anstyle", @@ -1371,7 +1371,7 @@ dependencies = [ "diff", "regex", "same-file", - "unicode-width", + "unicode-width 0.1.13", ] [[package]] @@ -1930,15 +1930,16 @@ dependencies = [ [[package]] name = "html2text" -version = "0.12.6" +version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "042a9677c258ac2952dd026bb0cd21972f00f644a5a38f5a215cb22cdaf6834e" +checksum = "1bf7722c2ffdd62628b6e13065b6ab6cf154a236bd476c6e89af1352d745b83e" dependencies = [ - "html5ever 0.27.0", - "markup5ever 0.12.1", + "html5ever 0.29.0", + "markup5ever 0.14.0", + "nom", "tendril", - "thiserror 1.0.69", - "unicode-width", + "thiserror 2.0.11", + "unicode-width 0.2.0", ] [[package]] @@ -1969,6 +1970,20 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "html5ever" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e15626aaf9c351bc696217cbe29cb9b5e86c43f8a46b5e2f5c6c5cf7cb904ce" +dependencies = [ + "log", + "mac", + "markup5ever 0.14.0", + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "http" version = "0.2.12" @@ -2957,7 +2972,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -3155,6 +3170,20 @@ dependencies = [ "tendril", ] +[[package]] +name = "markup5ever" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82c88c6129bd24319e62a0359cb6b958fa7e8be6e19bb1663bc396b90883aca5" +dependencies = [ + "log", + "phf 0.11.3", + "phf_codegen 0.11.3", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "markup5ever_rcdom" version = "0.2.0" @@ -4443,9 +4472,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" [[package]] name = "serde" @@ -4469,9 +4498,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.135" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +checksum = "930cfb6e6abf99298aaad7d29abbef7a9999a9a8806a40088f55f0dcec03146b" dependencies = [ "indexmap 2.7.0", "itoa", @@ -5466,6 +5495,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -5528,9 +5563,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" dependencies = [ "getrandom", "serde", @@ -5766,7 +5801,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 3c83037a5..ad88bd610 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -128,9 +128,9 @@ chrono = { version = "0.4.39", features = [ "now", "serde", ], default-features = false } -serde_json = { version = "1.0.135", features = ["preserve_order"] } +serde_json = { version = "1.0.137", features = ["preserve_order"] } base64 = "0.22.1" -uuid = { version = "1.12.0", features = ["serde"] } +uuid = { version = "1.12.1", features = ["serde"] } async-trait = "0.1.85" captcha = "0.0.9" anyhow = { version = "1.0.95", features = ["backtrace"] } @@ -158,7 +158,7 @@ urlencoding = "2.1.3" enum-map = "2.7" moka = { version = "0.12.10", features = ["future"] } i-love-jesus = { version = "0.1.0" } -clap = { version = "4.5.26", features = ["derive", "env"] } +clap = { version = "4.5.27", features = ["derive", "env"] } pretty_assertions = "1.4.1" derive-new = "0.7.0" diesel-bind-if-some = "0.1.0" diff --git a/crates/apub/Cargo.toml b/crates/apub/Cargo.toml index bd6fe3d28..75d16ff0f 100644 --- a/crates/apub/Cargo.toml +++ b/crates/apub/Cargo.toml @@ -42,10 +42,10 @@ reqwest = { workspace = true } moka.workspace = true serde_with.workspace = true html2md = "0.2.15" -html2text = "0.12.6" +html2text = "0.13.6" stringreader = "0.1.1" enum_delegate = "0.2.0" -semver = "1.0.24" +semver = "1.0.25" [dev-dependencies] serial_test = { workspace = true } diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index 73e940a3c..64effbe6a 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -21,7 +21,7 @@ use activitypub_federation::{ }; use anyhow::anyhow; use chrono::{DateTime, Utc}; -use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator}; +use html2text::{from_read_with_decorator, render::TrivialDecorator}; use lemmy_api_common::{ context::LemmyContext, request::generate_post_link_metadata, @@ -198,7 +198,7 @@ impl Object for ApubPost { .map(StringReader::new) .map(|c| from_read_with_decorator(c, MAX_TITLE_LENGTH, TrivialDecorator::new())) .and_then(|c| { - c.lines().next().map(|s| { + c.unwrap_or_default().lines().next().map(|s| { s.replace(&format!("@{}", community.name), "") .trim() .to_string() diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index e6af866d5..72e2e7f52 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -72,7 +72,7 @@ uuid = { workspace = true, optional = true, features = ["v4"] } rosetta-i18n = { workspace = true, optional = true } tokio = { workspace = true, optional = true } urlencoding = { workspace = true, optional = true } -html2text = { version = "0.12.6", optional = true } +html2text = { version = "0.13.6", optional = true } deser-hjson = { version = "2.2.4", optional = true } smart-default = { version = "0.7.1", optional = true } lettre = { version = "0.11.11", default-features = false, features = [ diff --git a/crates/utils/src/email.rs b/crates/utils/src/email.rs index 46abb47ea..aa06ba1b9 100644 --- a/crates/utils/src/email.rs +++ b/crates/utils/src/email.rs @@ -43,7 +43,7 @@ pub async fn send_email( }; // use usize::MAX as the line wrap length, since lettre handles the wrapping for us - let plain_text = html2text::from_read(html.as_bytes(), usize::MAX); + let plain_text = html2text::from_read(html.as_bytes(), usize::MAX)?; let smtp_from_address = &email_config.smtp_from_address; From 6f05254aaec5d8d048480874989546c27a564730 Mon Sep 17 00:00:00 2001 From: dullbananas Date: Tue, 21 Jan 2025 07:52:18 -0700 Subject: [PATCH 2/3] Prevent incorrectly using delete instead of uplete (#5331) * implement check and test it by modifying PostLike::remove * revert change in PostLike::remove --- .../db_schema/replaceable_schema/triggers.sql | 38 +++++++++++++++++++ crates/db_schema/src/utils/uplete.rs | 5 ++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/crates/db_schema/replaceable_schema/triggers.sql b/crates/db_schema/replaceable_schema/triggers.sql index c0cbfab34..ad61f0485 100644 --- a/crates/db_schema/replaceable_schema/triggers.sql +++ b/crates/db_schema/replaceable_schema/triggers.sql @@ -889,3 +889,41 @@ CALL r.create_inbox_combined_trigger ('person_post_mention'); CALL r.create_inbox_combined_trigger ('private_message'); +-- Prevent using delete instead of uplete on action tables +CREATE FUNCTION r.require_uplete () + RETURNS TRIGGER + LANGUAGE plpgsql + AS $$ +BEGIN + IF pg_trigger_depth() = 1 AND NOT starts_with (current_query(), '/**/') THEN + RAISE 'using delete instead of uplete is not allowed for this table'; + END IF; + RETURN NULL; +END +$$; + +CREATE TRIGGER require_uplete + BEFORE DELETE ON comment_actions + FOR EACH STATEMENT + EXECUTE FUNCTION r.require_uplete (); + +CREATE TRIGGER require_uplete + BEFORE DELETE ON community_actions + FOR EACH STATEMENT + EXECUTE FUNCTION r.require_uplete (); + +CREATE TRIGGER require_uplete + BEFORE DELETE ON instance_actions + FOR EACH STATEMENT + EXECUTE FUNCTION r.require_uplete (); + +CREATE TRIGGER require_uplete + BEFORE DELETE ON person_actions + FOR EACH STATEMENT + EXECUTE FUNCTION r.require_uplete (); + +CREATE TRIGGER require_uplete + BEFORE DELETE ON post_actions + FOR EACH STATEMENT + EXECUTE FUNCTION r.require_uplete (); + diff --git a/crates/db_schema/src/utils/uplete.rs b/crates/db_schema/src/utils/uplete.rs index 8c5262b90..dddbfe8ea 100644 --- a/crates/db_schema/src/utils/uplete.rs +++ b/crates/db_schema/src/utils/uplete.rs @@ -121,6 +121,9 @@ impl QueryFragment for UpleteQuery { fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> Result<(), Error> { assert_ne!(self.set_null_columns.len(), 0, "`set_null` was not called"); + // This is checked by require_uplete triggers + out.push_sql("/**/"); + // Declare `update_keys` and `delete_keys` CTEs, which select primary keys for (prefix, subquery) in [ ("WITH update_keys", &self.update_subquery), @@ -357,7 +360,7 @@ mod tests { let update_count = "SELECT count(*) FROM update_result"; let delete_count = "SELECT count(*) FROM delete_result"; - format!(r#"WITH {with_queries} SELECT ({update_count}), ({delete_count}) -- binds: []"#) + format!(r#"/**/WITH {with_queries} SELECT ({update_count}), ({delete_count}) -- binds: []"#) } #[test] From 31b8a4bbe030eb42390b9b4761cb23a1767bbb34 Mon Sep 17 00:00:00 2001 From: Nutomic Date: Tue, 21 Jan 2025 16:10:05 +0000 Subject: [PATCH 3/3] Implement request idempotency (fixes #4735) (#5329) * Implement request idempotency (fixes #4735) * delete old items * clippy * remove todo --- crates/utils/src/rate_limit/rate_limiter.rs | 4 +- src/idempotency_middleware.rs | 176 ++++++++++++++++++++ src/lib.rs | 6 + 3 files changed, 184 insertions(+), 2 deletions(-) create mode 100644 src/idempotency_middleware.rs diff --git a/crates/utils/src/rate_limit/rate_limiter.rs b/crates/utils/src/rate_limit/rate_limiter.rs index 68f248d6c..7fe239f79 100644 --- a/crates/utils/src/rate_limit/rate_limiter.rs +++ b/crates/utils/src/rate_limit/rate_limiter.rs @@ -13,9 +13,9 @@ static START_TIME: LazyLock = LazyLock::new(Instant::now); /// Smaller than `std::time::Instant` because it uses a smaller integer for seconds and doesn't /// store nanoseconds -#[derive(PartialEq, Debug, Clone, Copy)] +#[derive(PartialEq, Debug, Clone, Copy, Hash)] pub struct InstantSecs { - secs: u32, + pub secs: u32, } #[allow(clippy::expect_used)] diff --git a/src/idempotency_middleware.rs b/src/idempotency_middleware.rs new file mode 100644 index 000000000..2598cd12e --- /dev/null +++ b/src/idempotency_middleware.rs @@ -0,0 +1,176 @@ +use actix_web::{ + body::EitherBody, + dev::{forward_ready, Service, ServiceRequest, ServiceResponse, Transform}, + http::Method, + Error, + HttpMessage, + HttpResponse, +}; +use futures_util::future::LocalBoxFuture; +use lemmy_api_common::lemmy_db_views::structs::LocalUserView; +use lemmy_db_schema::newtypes::LocalUserId; +use lemmy_utils::rate_limit::rate_limiter::InstantSecs; +use std::{ + collections::HashSet, + future::{ready, Ready}, + hash::{Hash, Hasher}, + sync::{Arc, RwLock}, + time::Duration, +}; + +/// https://www.ietf.org/archive/id/draft-ietf-httpapi-idempotency-key-header-01.html +const IDEMPOTENCY_HEADER: &str = "Idempotency-Key"; + +/// Delete idempotency keys older than this +const CLEANUP_INTERVAL_SECS: u32 = 120; + +#[derive(Debug)] +struct Entry { + user_id: LocalUserId, + key: String, + // Creation time is ignored for Eq, Hash and only used to cleanup old entries + created: InstantSecs, +} + +impl PartialEq for Entry { + fn eq(&self, other: &Self) -> bool { + self.user_id == other.user_id && self.key == other.key + } +} +impl Eq for Entry {} + +impl Hash for Entry { + fn hash(&self, state: &mut H) { + self.user_id.hash(state); + self.key.hash(state); + } +} + +#[derive(Clone)] +pub struct IdempotencySet { + set: Arc>>, +} + +impl Default for IdempotencySet { + fn default() -> Self { + let set: Arc>> = Default::default(); + + let set_ = set.clone(); + tokio::spawn(async move { + let interval = Duration::from_secs(CLEANUP_INTERVAL_SECS.into()); + let state_weak_ref = Arc::downgrade(&set_); + + // Run at every interval to delete entries older than the interval. + // This loop stops when all other references to `state` are dropped. + while let Some(state) = state_weak_ref.upgrade() { + tokio::time::sleep(interval).await; + let now = InstantSecs::now(); + #[allow(clippy::expect_used)] + let mut lock = state.write().expect("lock failed"); + lock.retain(|e| e.created.secs > now.secs.saturating_sub(CLEANUP_INTERVAL_SECS)); + lock.shrink_to_fit(); + } + }); + Self { set } + } +} + +pub struct IdempotencyMiddleware { + idempotency_set: IdempotencySet, +} + +impl IdempotencyMiddleware { + pub fn new(idempotency_set: IdempotencySet) -> Self { + Self { idempotency_set } + } +} + +impl Transform for IdempotencyMiddleware +where + S: Service, Error = Error>, + S::Future: 'static, + B: 'static, +{ + type Response = ServiceResponse>; + type Error = Error; + type InitError = (); + type Transform = IdempotencyService; + type Future = Ready>; + + fn new_transform(&self, service: S) -> Self::Future { + ready(Ok(IdempotencyService { + service, + idempotency_set: self.idempotency_set.clone(), + })) + } +} + +pub struct IdempotencyService { + service: S, + idempotency_set: IdempotencySet, +} + +impl Service for IdempotencyService +where + S: Service, Error = Error>, + S::Future: 'static, + B: 'static, +{ + type Response = ServiceResponse>; + type Error = Error; + type Future = LocalBoxFuture<'static, Result>; + + forward_ready!(service); + + #[allow(clippy::expect_used)] + fn call(&self, req: ServiceRequest) -> Self::Future { + let is_post_or_put = req.method() == Method::POST || req.method() == Method::PUT; + let idempotency = req + .headers() + .get(IDEMPOTENCY_HEADER) + .map(|i| i.to_str().unwrap_or_default().to_string()) + // Ignore values longer than 32 chars + .and_then(|i| (i.len() <= 32).then_some(i)) + // Only use idempotency for POST and PUT requests + .and_then(|i| is_post_or_put.then_some(i)); + + let user_id = { + let ext = req.extensions(); + ext.get().map(|u: &LocalUserView| u.local_user.id) + }; + + if let (Some(key), Some(user_id)) = (idempotency, user_id) { + let value = Entry { + user_id, + key, + created: InstantSecs::now(), + }; + if self + .idempotency_set + .set + .read() + .expect("lock failed") + .contains(&value) + { + // Duplicate request, return error + let (req, _pl) = req.into_parts(); + let response = HttpResponse::UnprocessableEntity() + .finish() + .map_into_right_body(); + return Box::pin(async { Ok(ServiceResponse::new(req, response)) }); + } else { + // New request, store key and continue + self + .idempotency_set + .set + .write() + .expect("lock failed") + .insert(value); + } + } + + let fut = self.service.call(req); + + Box::pin(async move { fut.await.map(ServiceResponse::map_into_left_body) }) + } +} diff --git a/src/lib.rs b/src/lib.rs index bd84d0264..2de3b106b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod api_routes_v3; pub mod api_routes_v4; pub mod code_migrations; +pub mod idempotency_middleware; pub mod prometheus_metrics; pub mod scheduled_tasks; pub mod session_middleware; @@ -18,6 +19,7 @@ use actix_web::{ }; use actix_web_prom::PrometheusMetricsBuilder; use clap::{Parser, Subcommand}; +use idempotency_middleware::{IdempotencyMiddleware, IdempotencySet}; use lemmy_api::sitemap::get_sitemap; use lemmy_api_common::{ context::LemmyContext, @@ -334,6 +336,9 @@ fn create_http_server( .build() .map_err(|e| LemmyErrorType::Unknown(format!("Should always be buildable: {e}")))?; + // Must create this outside of HTTP server so that duplicate requests get detected across threads. + let idempotency_set = IdempotencySet::default(); + // Create Http server let bind = (settings.bind, settings.port); let server = HttpServer::new(move || { @@ -355,6 +360,7 @@ fn create_http_server( .app_data(Data::new(context.clone())) .app_data(Data::new(rate_limit_cell.clone())) .wrap(FederationMiddleware::new(federation_config.clone())) + .wrap(IdempotencyMiddleware::new(idempotency_set.clone())) .wrap(SessionMiddleware::new(context.clone())) .wrap(Condition::new( SETTINGS.prometheus.is_some(),