From e8a52d3a5c0af9b4c5c210a5c63446262c876711 Mon Sep 17 00:00:00 2001 From: Nutomic Date: Thu, 25 Jan 2024 15:22:11 +0100 Subject: [PATCH] Rewrite images to use local proxy (#4035) * Add markdown rule to add rel=nofollow for all links * Add markdown image rule to add local image proxy (fixes #1036) * comments * rewrite markdown image links working * add comment * perform markdown image processing in api/apub receivers * clippy * add db table to validate proxied links * rewrite link fields for avatar, banner etc * sql fmt * proxy links received over federation * add config option * undo post.url rewriting, move http route definition * add tests * proxy images through pictrs * testing * cleanup request.rs file * more cleanup (fixes #2611) * include url content type when sending post over apub (fixes #2611) * store post url content type in db * should be media_type * get rid of cache_remote_thumbnails setting, instead automatically take thumbnail from federation data if available. * fix tests * add setting disable_external_link_previews * federate post url as image depending on mime type * change setting again * machete * invert * support custom emoji * clippy * update defaults * add image proxy test, fix test * fix test * clippy * revert accidental changes * address review * clippy * Markdown link rule-dess (#4356) * Extracting opengraph_data to its own type. * A few additions for markdown-link-rule. --------- Co-authored-by: Nutomic * fix setting * use enum for image proxy setting * fix test configs * add config backwards compat * clippy * machete --------- Co-authored-by: Dessalines --- Cargo.lock | 11 +- Cargo.toml | 2 +- api_tests/src/image.spec.ts | 92 ++++- api_tests/src/post.spec.ts | 18 +- api_tests/src/shared.ts | 11 +- config/defaults.hjson | 35 +- crates/api/src/local_user/save_settings.rs | 17 +- crates/api/src/post/get_link_metadata.rs | 4 +- crates/api/src/site/purge/person.rs | 4 +- crates/api_common/Cargo.toml | 18 +- crates/api_common/src/context.rs | 67 +++- crates/api_common/src/post.rs | 19 +- crates/api_common/src/request.rs | 324 +++++++++--------- crates/api_common/src/utils.rs | 168 ++++++++- crates/api_crud/src/comment/create.rs | 13 +- crates/api_crud/src/comment/update.rs | 15 +- crates/api_crud/src/community/create.rs | 15 +- crates/api_crud/src/community/update.rs | 17 +- crates/api_crud/src/post/create.rs | 29 +- crates/api_crud/src/post/update.rs | 45 ++- crates/api_crud/src/private_message/create.rs | 6 +- crates/api_crud/src/private_message/update.rs | 7 +- crates/api_crud/src/site/create.rs | 22 +- crates/api_crud/src/site/update.rs | 21 +- crates/apub/Cargo.toml | 2 - .../apub/src/activities/community/update.rs | 37 +- crates/apub/src/api/user_settings_backup.rs | 9 +- .../src/collections/community_moderators.rs | 8 +- crates/apub/src/objects/comment.rs | 14 +- crates/apub/src/objects/community.rs | 52 ++- crates/apub/src/objects/instance.rs | 30 +- crates/apub/src/objects/mod.rs | 51 --- crates/apub/src/objects/person.rs | 26 +- crates/apub/src/objects/post.rs | 72 ++-- crates/apub/src/objects/private_message.rs | 14 +- crates/apub/src/protocol/objects/group.rs | 65 ---- crates/apub/src/protocol/objects/page.rs | 34 +- crates/db_schema/src/impls/image_upload.rs | 35 -- crates/db_schema/src/impls/images.rs | 78 +++++ crates/db_schema/src/impls/mod.rs | 2 +- crates/db_schema/src/impls/post.rs | 1 + crates/db_schema/src/schema.rs | 32 +- crates/db_schema/src/source/images.rs | 50 +++ crates/db_schema/src/source/mod.rs | 2 +- crates/db_schema/src/source/post.rs | 3 + crates/db_views/src/comment_view.rs | 1 + crates/db_views/src/post_view.rs | 1 + crates/routes/Cargo.toml | 1 + crates/routes/src/images.rs | 44 ++- crates/utils/Cargo.toml | 2 +- crates/utils/src/settings/mod.rs | 23 +- crates/utils/src/settings/structs.rs | 42 ++- crates/utils/src/utils/markdown.rs | 113 ------ crates/utils/src/utils/markdown/link_rule.rs | 38 ++ crates/utils/src/utils/markdown/mod.rs | 246 +++++++++++++ docker/federation/lemmy_epsilon.hjson | 4 + docker/federation/lemmy_gamma.hjson | 4 + docker/lemmy.hjson | 1 + .../2023-10-24-131607_proxy_links/down.sql | 4 + .../2023-10-24-131607_proxy_links/up.sql | 8 + .../down.sql | 3 + .../up.sql | 3 + src/api_routes_http.rs | 2 + src/lib.rs | 13 +- 64 files changed, 1455 insertions(+), 695 deletions(-) delete mode 100644 crates/db_schema/src/impls/image_upload.rs create mode 100644 crates/db_schema/src/impls/images.rs create mode 100644 crates/db_schema/src/source/images.rs delete mode 100644 crates/utils/src/utils/markdown.rs create mode 100644 crates/utils/src/utils/markdown/link_rule.rs create mode 100644 crates/utils/src/utils/markdown/mod.rs create mode 100644 migrations/2023-10-24-131607_proxy_links/down.sql create mode 100644 migrations/2023-10-24-131607_proxy_links/up.sql create mode 100644 migrations/2023-10-27-142514_post_url_content_type/down.sql create mode 100644 migrations/2023-10-27-142514_post_url_content_type/up.sql diff --git a/Cargo.lock b/Cargo.lock index e56b26b272..0f7695df57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2571,6 +2571,8 @@ version = "0.19.3" dependencies = [ "activitypub_federation", "actix-web", + "anyhow", + "async-trait", "chrono", "encoding", "enum-map", @@ -2582,8 +2584,8 @@ dependencies = [ "lemmy_db_views_actor", "lemmy_db_views_moderator", "lemmy_utils", + "mime", "once_cell", - "percent-encoding", "pretty_assertions", "regex", "reqwest", @@ -2592,10 +2594,12 @@ dependencies = [ "serde", "serde_with", "serial_test", + "task-local-extensions", "tokio", "tracing", "ts-rs", "url", + "urlencoding", "uuid", "webpage", ] @@ -2648,14 +2652,12 @@ dependencies = [ "once_cell", "pretty_assertions", "reqwest", - "reqwest-middleware", "serde", "serde_json", "serde_with", "serial_test", "stringreader", "strum_macros", - "task-local-extensions", "tokio", "tracing", "url", @@ -2811,6 +2813,7 @@ dependencies = [ "tokio", "tracing", "url", + "urlencoding", ] [[package]] @@ -2874,7 +2877,6 @@ dependencies = [ "markdown-it", "once_cell", "openssl", - "percent-encoding", "pretty_assertions", "regex", "reqwest", @@ -2891,6 +2893,7 @@ dependencies = [ "tracing-error", "ts-rs", "url", + "urlencoding", "uuid", ] diff --git a/Cargo.toml b/Cargo.toml index 4ff7eb2be7..60e85a1934 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -146,7 +146,6 @@ strum_macros = "0.25.3" itertools = "0.12.0" futures = "0.3.30" http = "0.2.11" -percent-encoding = "2.3.1" rosetta-i18n = "0.1.3" opentelemetry = { version = "0.19.0", features = ["rt-tokio"] } tracing-opentelemetry = { version = "0.19.0" } @@ -155,6 +154,7 @@ rustls = { version = "0.21.10", features = ["dangerous_configuration"] } futures-util = "0.3.30" tokio-postgres = "0.7.10" tokio-postgres-rustls = "0.10.0" +urlencoding = "2.1.3" enum-map = "2.7" moka = { version = "0.12.4", features = ["future"] } i-love-jesus = { version = "0.1.0" } diff --git a/api_tests/src/image.spec.ts b/api_tests/src/image.spec.ts index 569507d07f..1926fee33e 100644 --- a/api_tests/src/image.spec.ts +++ b/api_tests/src/image.spec.ts @@ -7,16 +7,23 @@ import { PurgePost, } from "lemmy-js-client"; import { + alpha, alphaImage, alphaUrl, beta, betaUrl, + createCommunity, createPost, + delta, + epsilon, + gamma, getSite, registerUser, resolveBetaCommunity, + resolvePost, setupLogins, unfollowRemotes, + waitForPost, } from "./shared"; const downloadFileSync = require("download-file-sync"); @@ -29,9 +36,8 @@ afterAll(() => { test("Upload image and delete it", async () => { // Upload test image. We use a simple string buffer as pictrs doesnt require an actual image // in testing mode. - const upload_image = Buffer.from("test"); const upload_form: UploadImage = { - image: upload_image, + image: Buffer.from("test"), }; const upload = await alphaImage.uploadImage(upload_form); expect(upload.files![0].file).toBeDefined(); @@ -60,9 +66,8 @@ test("Purge user, uploaded image removed", async () => { let user = await registerUser(alphaImage, alphaUrl); // upload test image - const upload_image = Buffer.from("test"); const upload_form: UploadImage = { - image: upload_image, + image: Buffer.from("test"), }; const upload = await user.uploadImage(upload_form); expect(upload.files![0].file).toBeDefined(); @@ -91,9 +96,8 @@ test("Purge post, linked image removed", async () => { let user = await registerUser(beta, betaUrl); // upload test image - const upload_image = Buffer.from("test"); const upload_form: UploadImage = { - image: upload_image, + image: Buffer.from("test"), }; const upload = await user.uploadImage(upload_form); expect(upload.files![0].file).toBeDefined(); @@ -124,3 +128,79 @@ test("Purge post, linked image removed", async () => { const content2 = downloadFileSync(upload.url); expect(content2).toBe(""); }); + +test("Images in remote post are proxied if setting enabled", async () => { + let user = await registerUser(beta, betaUrl); + let community = await createCommunity(gamma); + + const upload_form: UploadImage = { + image: Buffer.from("test"), + }; + const upload = await user.uploadImage(upload_form); + let post = await createPost( + gamma, + community.community_view.community.id, + upload.url, + "![](http://example.com/image2.png)", + ); + expect(post.post_view.post).toBeDefined(); + + // remote image gets proxied after upload + expect( + post.post_view.post.url?.startsWith( + "http://lemmy-gamma:8561/api/v3/image_proxy?url", + ), + ).toBeTruthy(); + expect( + post.post_view.post.body?.startsWith( + "![](http://lemmy-gamma:8561/api/v3/image_proxy?url", + ), + ).toBeTruthy(); + + let epsilonPost = await resolvePost(epsilon, post.post_view.post); + expect(epsilonPost.post).toBeDefined(); + + // remote image gets proxied after federation + expect( + epsilonPost.post!.post.url?.startsWith( + "http://lemmy-epsilon:8581/api/v3/image_proxy?url", + ), + ).toBeTruthy(); + expect( + epsilonPost.post!.post.body?.startsWith( + "![](http://lemmy-epsilon:8581/api/v3/image_proxy?url", + ), + ).toBeTruthy(); +}); + +test("No image proxying if setting is disabled", async () => { + let user = await registerUser(beta, betaUrl); + let community = await createCommunity(alpha); + + const upload_form: UploadImage = { + image: Buffer.from("test"), + }; + const upload = await user.uploadImage(upload_form); + let post = await createPost( + alpha, + community.community_view.community.id, + upload.url, + "![](http://example.com/image2.png)", + ); + expect(post.post_view.post).toBeDefined(); + + // remote image doesnt get proxied after upload + expect( + post.post_view.post.url?.startsWith("http://127.0.0.1:8551/pictrs/image/"), + ).toBeTruthy(); + expect(post.post_view.post.body).toBe("![](http://example.com/image2.png)"); + + let gammaPost = await resolvePost(delta, post.post_view.post); + expect(gammaPost.post).toBeDefined(); + + // remote image doesnt get proxied after federation + expect( + gammaPost.post!.post.url?.startsWith("http://127.0.0.1:8551/pictrs/image/"), + ).toBeTruthy(); + expect(gammaPost.post!.post.body).toBe("![](http://example.com/image2.png)"); +}); diff --git a/api_tests/src/post.spec.ts b/api_tests/src/post.spec.ts index 780160c391..74496d3455 100644 --- a/api_tests/src/post.spec.ts +++ b/api_tests/src/post.spec.ts @@ -39,7 +39,7 @@ import { loginUser, } from "./shared"; import { PostView } from "lemmy-js-client/dist/types/PostView"; -import { ResolveObject } from "lemmy-js-client"; +import { EditSite, ResolveObject } from "lemmy-js-client"; let betaCommunity: CommunityView | undefined; @@ -72,6 +72,16 @@ function assertPostFederation(postOne?: PostView, postTwo?: PostView) { } test("Create a post", async () => { + // Setup some allowlists and blocklists + let editSiteForm: EditSite = { + allowed_instances: ["lemmy-beta"], + }; + await delta.editSite(editSiteForm); + + editSiteForm.allowed_instances = []; + editSiteForm.blocked_instances = ["lemmy-alpha"]; + await epsilon.editSite(editSiteForm); + if (!betaCommunity) { throw "Missing beta community"; } @@ -109,6 +119,12 @@ test("Create a post", async () => { await expect( resolvePost(epsilon, postRes.post_view.post), ).rejects.toStrictEqual(Error("couldnt_find_object")); + + // remove added allow/blocklists + editSiteForm.allowed_instances = []; + editSiteForm.blocked_instances = []; + await delta.editSite(editSiteForm); + await epsilon.editSite(editSiteForm); }); test("Create a post in a non-existent community", async () => { diff --git a/api_tests/src/shared.ts b/api_tests/src/shared.ts index 13b07c9b79..621f64dfda 100644 --- a/api_tests/src/shared.ts +++ b/api_tests/src/shared.ts @@ -177,13 +177,6 @@ export async function setupLogins() { ]; await gamma.editSite(editSiteForm); - editSiteForm.allowed_instances = ["lemmy-beta"]; - await delta.editSite(editSiteForm); - - editSiteForm.allowed_instances = []; - editSiteForm.blocked_instances = ["lemmy-alpha"]; - await epsilon.editSite(editSiteForm); - // Create the main alpha/beta communities // Ignore thrown errors of duplicates try { @@ -203,10 +196,10 @@ export async function createPost( api: LemmyHttp, community_id: number, url: string = "https://example.com/", + body = randomString(10), // use example.com for consistent title and embed description name: string = randomString(5), ): Promise { - let body = randomString(10); let form: CreatePost = { name, url, @@ -528,7 +521,7 @@ export async function likeComment( export async function createCommunity( api: LemmyHttp, - name_: string = randomString(5), + name_: string = randomString(10), ): Promise { let description = "a sample description"; let form: CreateCommunity = { diff --git a/config/defaults.hjson b/config/defaults.hjson index b1c6d9ba54..c52f9055e2 100644 --- a/config/defaults.hjson +++ b/config/defaults.hjson @@ -36,22 +36,41 @@ # Maximum number of active sql connections pool_size: 30 } - # Settings related to activitypub federation # Pictrs image server configuration. pictrs: { # Address where pictrs is available (for image hosting) url: "http://localhost:8080/" # Set a custom pictrs API key. ( Required for deleting images ) api_key: "string" - # By default the thumbnails for external links are stored in pict-rs. This ensures that they - # can be reliably retrieved and can be resized using pict-rs APIs. However it also increases - # storage usage. In case this is disabled, the Opengraph image is directly returned as - # thumbnail. + # Backwards compatibility with 0.18.1. False is equivalent to `image_mode: None`, true is + # equivalent to `image_mode: StoreLinkPreviews`. # - # In some countries it is forbidden to copy preview images from newspaper articles and only - # hotlinking is allowed. If that is the case for your instance, make sure that this setting is - # disabled. + # To be removed in 0.20 cache_external_link_previews: true + # Specifies how to handle remote images, so that users don't have to connect directly to remote servers. + image_mode: + # Leave images unchanged, don't generate any local thumbnails for post urls. Instead the the + # Opengraph image is directly returned as thumbnail + "None" + + # or + + # Generate thumbnails for external post urls and store them persistently in pict-rs. This + # ensures that they can be reliably retrieved and can be resized using pict-rs APIs. However + # it also increases storage usage. + # + # This is the default behaviour, and also matches Lemmy 0.18. + "StoreLinkPreviews" + + # or + + # If enabled, all images from remote domains are rewritten to pass through `/api/v3/image_proxy`, + # including embedded images in markdown. Images are stored temporarily in pict-rs for caching. + # This improves privacy as users don't expose their IP to untrusted servers, and decreases load + # on other servers. However it increases bandwidth use for the local server. + # + # Requires pict-rs 0.5 + "ProxyAllImages" # Timeout for uploading images to pictrs (in seconds) upload_timeout: 30 } diff --git a/crates/api/src/local_user/save_settings.rs b/crates/api/src/local_user/save_settings.rs index a3f30bf1a3..79b95133e6 100644 --- a/crates/api/src/local_user/save_settings.rs +++ b/crates/api/src/local_user/save_settings.rs @@ -2,7 +2,12 @@ use actix_web::web::{Data, Json}; use lemmy_api_common::{ context::LemmyContext, person::SaveUserSettings, - utils::send_verification_email, + utils::{ + local_site_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_api, + send_verification_email, + }, SuccessResponse, }; use lemmy_db_schema::{ @@ -12,7 +17,7 @@ use lemmy_db_schema::{ person::{Person, PersonUpdateForm}, }, traits::Crud, - utils::{diesel_option_overwrite, diesel_option_overwrite_to_url}, + utils::diesel_option_overwrite, }; use lemmy_db_views::structs::{LocalUserView, SiteView}; use lemmy_utils::{ @@ -28,9 +33,11 @@ pub async fn save_user_settings( ) -> Result, LemmyError> { let site_view = SiteView::read_local(&mut context.pool()).await?; - let avatar = diesel_option_overwrite_to_url(&data.avatar)?; - let banner = diesel_option_overwrite_to_url(&data.banner)?; - let bio = diesel_option_overwrite(data.bio.clone()); + let slur_regex = local_site_to_slur_regex(&site_view.local_site); + let bio = diesel_option_overwrite(process_markdown_opt(&data.bio, &slur_regex, &context).await?); + + let avatar = proxy_image_link_opt_api(&data.avatar, &context).await?; + let banner = proxy_image_link_opt_api(&data.banner, &context).await?; let display_name = diesel_option_overwrite(data.display_name.clone()); let matrix_user_id = diesel_option_overwrite(data.matrix_user_id.clone()); let email_deref = data.email.as_deref().map(str::to_lowercase); diff --git a/crates/api/src/post/get_link_metadata.rs b/crates/api/src/post/get_link_metadata.rs index c576d6f4a8..a6a0c973b0 100644 --- a/crates/api/src/post/get_link_metadata.rs +++ b/crates/api/src/post/get_link_metadata.rs @@ -2,7 +2,7 @@ use actix_web::web::{Data, Json, Query}; use lemmy_api_common::{ context::LemmyContext, post::{GetSiteMetadata, GetSiteMetadataResponse}, - request::fetch_site_metadata, + request::fetch_link_metadata, }; use lemmy_utils::error::LemmyError; @@ -11,7 +11,7 @@ pub async fn get_link_metadata( data: Query, context: Data, ) -> Result, LemmyError> { - let metadata = fetch_site_metadata(context.client(), &data.url).await?; + let metadata = fetch_link_metadata(&data.url, false, &context).await?; Ok(Json(GetSiteMetadataResponse { metadata })) } diff --git a/crates/api/src/site/purge/person.rs b/crates/api/src/site/purge/person.rs index 3f14477863..a2c3b6e605 100644 --- a/crates/api/src/site/purge/person.rs +++ b/crates/api/src/site/purge/person.rs @@ -8,7 +8,7 @@ use lemmy_api_common::{ }; use lemmy_db_schema::{ source::{ - image_upload::ImageUpload, + images::LocalImage, moderator::{AdminPurgePerson, AdminPurgePersonForm}, person::{Person, PersonUpdateForm}, }, @@ -31,7 +31,7 @@ pub async fn purge_person( if let Ok(local_user) = LocalUserView::read_person(&mut context.pool(), person_id).await { let pictrs_uploads = - ImageUpload::get_all_by_local_user_id(&mut context.pool(), &local_user.local_user.id).await?; + LocalImage::get_all_by_local_user_id(&mut context.pool(), &local_user.local_user.id).await?; for upload in pictrs_uploads { delete_image_from_pictrs(&upload.pictrs_alias, &upload.pictrs_delete_token, &context) diff --git a/crates/api_common/Cargo.toml b/crates/api_common/Cargo.toml index a9db23f869..d9310a58da 100644 --- a/crates/api_common/Cargo.toml +++ b/crates/api_common/Cargo.toml @@ -25,7 +25,6 @@ full = [ "lemmy_db_views_actor/full", "lemmy_db_views_moderator/full", "activitypub_federation", - "percent-encoding", "encoding", "reqwest-middleware", "webpage", @@ -37,6 +36,7 @@ full = [ "futures", "once_cell", "jsonwebtoken", + "mime", ] [dependencies] @@ -54,11 +54,7 @@ tracing = { workspace = true, optional = true } reqwest-middleware = { workspace = true, optional = true } regex = { workspace = true } rosetta-i18n = { workspace = true, optional = true } -percent-encoding = { workspace = true, optional = true } -webpage = { version = "1.6", default-features = false, features = [ - "serde", -], optional = true } -encoding = { version = "0.2.33", optional = true } +anyhow = { workspace = true } futures = { workspace = true, optional = true } uuid = { workspace = true, optional = true } tokio = { workspace = true, optional = true } @@ -66,10 +62,18 @@ reqwest = { workspace = true, optional = true } ts-rs = { workspace = true, optional = true } once_cell = { workspace = true, optional = true } actix-web = { workspace = true, optional = true } +enum-map = { workspace = true } +urlencoding = { workspace = true } +async-trait = { workspace = true } +mime = { version = "0.3.17", optional = true } +webpage = { version = "1.6", default-features = false, features = [ + "serde", +], optional = true } +encoding = { version = "0.2.33", optional = true } jsonwebtoken = { version = "8.3.0", optional = true } # necessary for wasmt compilation getrandom = { version = "0.2.12", features = ["js"] } -enum-map = { workspace = true } +task-local-extensions = "0.1.4" [package.metadata.cargo-machete] ignored = ["getrandom"] diff --git a/crates/api_common/src/context.rs b/crates/api_common/src/context.rs index 888a987413..8d8dc50132 100644 --- a/crates/api_common/src/context.rs +++ b/crates/api_common/src/context.rs @@ -1,13 +1,18 @@ +use crate::request::client_builder; +use activitypub_federation::config::{Data, FederationConfig}; +use anyhow::anyhow; use lemmy_db_schema::{ source::secret::Secret, - utils::{ActualDbPool, DbPool}, + utils::{build_db_pool_for_tests, ActualDbPool, DbPool}, }; use lemmy_utils::{ rate_limit::RateLimitCell, settings::{structs::Settings, SETTINGS}, }; -use reqwest_middleware::ClientWithMiddleware; +use reqwest::{Request, Response}; +use reqwest_middleware::{ClientBuilder, ClientWithMiddleware, Middleware, Next}; use std::sync::Arc; +use task_local_extensions::Extensions; #[derive(Clone)] pub struct LemmyContext { @@ -49,4 +54,62 @@ impl LemmyContext { pub fn rate_limit_cell(&self) -> &RateLimitCell { &self.rate_limit_cell } + + /// Initialize a context for use in tests, optionally blocks network requests. + /// + /// Do not use this in production code. + pub async fn init_test_context() -> Data { + Self::build_test_context(true).await + } + + /// Initialize a context for use in tests, with network requests allowed. + /// TODO: get rid of this if possible. + /// + /// Do not use this in production code. + pub async fn init_test_context_with_networking() -> Data { + Self::build_test_context(false).await + } + + async fn build_test_context(block_networking: bool) -> Data { + // call this to run migrations + let pool = build_db_pool_for_tests().await; + + let client = client_builder(&SETTINGS).build().expect("build client"); + + let mut client = ClientBuilder::new(client); + if block_networking { + client = client.with(BlockedMiddleware); + } + let client = client.build(); + let secret = Secret { + id: 0, + jwt_secret: String::new(), + }; + + let rate_limit_cell = RateLimitCell::with_test_config(); + + let context = LemmyContext::create(pool, client, secret, rate_limit_cell.clone()); + let config = FederationConfig::builder() + .domain(context.settings().hostname.clone()) + .app_data(context) + .build() + .await + .expect("build federation config"); + config.to_request_data() + } +} + +struct BlockedMiddleware; + +/// A reqwest middleware which blocks all requests +#[async_trait::async_trait] +impl Middleware for BlockedMiddleware { + async fn handle( + &self, + _req: Request, + _extensions: &mut Extensions, + _next: Next<'_>, + ) -> reqwest_middleware::Result { + Err(anyhow!("Network requests not allowed").into()) + } } diff --git a/crates/api_common/src/post.rs b/crates/api_common/src/post.rs index 5b3a761101..8f482527c5 100644 --- a/crates/api_common/src/post.rs +++ b/crates/api_common/src/post.rs @@ -238,15 +238,28 @@ pub struct GetSiteMetadata { #[cfg_attr(feature = "full", ts(export))] /// The site metadata response. pub struct GetSiteMetadataResponse { - pub metadata: SiteMetadata, + pub metadata: LinkMetadata, } #[skip_serializing_none] -#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone)] +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone, Default)] #[cfg_attr(feature = "full", derive(TS))] #[cfg_attr(feature = "full", ts(export))] /// Site metadata, from its opengraph tags. -pub struct SiteMetadata { +pub struct LinkMetadata { + #[serde(flatten)] + pub opengraph_data: OpenGraphData, + pub content_type: Option, + #[serde(skip)] + pub thumbnail: Option, +} + +#[skip_serializing_none] +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone, Default)] +#[cfg_attr(feature = "full", derive(TS))] +#[cfg_attr(feature = "full", ts(export))] +/// Site metadata, from its opengraph tags. +pub struct OpenGraphData { pub title: Option, pub description: Option, pub(crate) image: Option, diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 741efee389..b9ab42f7cd 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -1,39 +1,91 @@ -use crate::{context::LemmyContext, post::SiteMetadata}; +use crate::{ + context::LemmyContext, + post::{LinkMetadata, OpenGraphData}, + utils::proxy_image_link, +}; use encoding::{all::encodings, DecoderTrap}; use lemmy_db_schema::newtypes::DbUrl; use lemmy_utils::{ error::{LemmyError, LemmyErrorType}, - settings::structs::Settings, + settings::structs::{PictrsImageMode, Settings}, version::VERSION, REQWEST_TIMEOUT, }; -use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; -use reqwest::{Client, ClientBuilder}; +use mime::Mime; +use reqwest::{header::CONTENT_TYPE, Client, ClientBuilder}; use reqwest_middleware::ClientWithMiddleware; use serde::Deserialize; use tracing::info; use url::Url; +use urlencoding::encode; use webpage::HTML; -/// Fetches the post link html tags (like title, description, image, etc) +pub fn client_builder(settings: &Settings) -> ClientBuilder { + let user_agent = format!( + "Lemmy/{}; +{}", + VERSION, + settings.get_protocol_and_hostname() + ); + + Client::builder() + .user_agent(user_agent.clone()) + .timeout(REQWEST_TIMEOUT) + .connect_timeout(REQWEST_TIMEOUT) +} + +/// Fetches metadata for the given link and optionally generates thumbnail. #[tracing::instrument(skip_all)] -pub async fn fetch_site_metadata( - client: &ClientWithMiddleware, +pub async fn fetch_link_metadata( url: &Url, -) -> Result { + generate_thumbnail: bool, + context: &LemmyContext, +) -> Result { info!("Fetching site metadata for url: {}", url); - let response = client.get(url.as_str()).send().await?; + let response = context.client().get(url.as_str()).send().await?; + + let content_type: Option = response + .headers() + .get(CONTENT_TYPE) + .and_then(|h| h.to_str().ok()) + .and_then(|h| h.parse().ok()); // Can't use .text() here, because it only checks the content header, not the actual bytes // https://github.com/LemmyNet/lemmy/issues/1964 let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec(); - let tags = html_to_site_metadata(&html_bytes, url)?; + let opengraph_data = extract_opengraph_data(&html_bytes, url).unwrap_or_default(); + let thumbnail = extract_thumbnail_from_opengraph_data( + url, + &opengraph_data, + &content_type, + generate_thumbnail, + context, + ) + .await; - Ok(tags) + Ok(LinkMetadata { + opengraph_data, + content_type: content_type.map(|c| c.to_string()), + thumbnail, + }) } -fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result { +#[tracing::instrument(skip_all)] +pub async fn fetch_link_metadata_opt( + url: Option<&Url>, + generate_thumbnail: bool, + context: &LemmyContext, +) -> LinkMetadata { + match &url { + Some(url) => fetch_link_metadata(url, generate_thumbnail, context) + .await + .unwrap_or_default(), + _ => Default::default(), + } +} + +/// Extract site metadata from HTML Opengraph attributes. +fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result { let html = String::from_utf8_lossy(html_bytes); // Make sure the first line is doctype html @@ -89,7 +141,7 @@ fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result Result, + generate_thumbnail: bool, + context: &LemmyContext, +) -> Option { + let is_image = content_type.as_ref().unwrap_or(&mime::TEXT_PLAIN).type_() == mime::IMAGE; + if generate_thumbnail && is_image { + let image_url = opengraph_data + .image + .as_ref() + .map(lemmy_db_schema::newtypes::DbUrl::inner) + .unwrap_or(url); + generate_pictrs_thumbnail(image_url, context) + .await + .ok() + .map(Into::into) + } else { + None + } +} + +#[derive(Deserialize, Debug)] +struct PictrsResponse { files: Vec, msg: String, } -#[derive(Deserialize, Debug, Clone)] -pub(crate) struct PictrsFile { +#[derive(Deserialize, Debug)] +struct PictrsFile { file: String, #[allow(dead_code)] delete_token: String, } -#[derive(Deserialize, Debug, Clone)] -pub(crate) struct PictrsPurgeResponse { +#[derive(Deserialize, Debug)] +struct PictrsPurgeResponse { msg: String, } -#[tracing::instrument(skip_all)] -pub(crate) async fn fetch_pictrs( - client: &ClientWithMiddleware, - settings: &Settings, - image_url: &Url, -) -> Result { - let pictrs_config = settings.pictrs_config()?; - is_image_content_type(client, image_url).await?; - - if pictrs_config.cache_external_link_previews { - // fetch remote non-pictrs images for persistent thumbnail link - let fetch_url = format!( - "{}image/download?url={}", - pictrs_config.url, - utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed - ); - - let response = client - .get(&fetch_url) - .timeout(REQWEST_TIMEOUT) - .send() - .await?; - - let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?; - - if response.msg == "ok" { - Ok(response) - } else { - Err(LemmyErrorType::PictrsResponseError(response.msg))? - } - } else { - Err(LemmyErrorType::PictrsCachingDisabled)? - } -} - /// Purges an image from pictrs /// Note: This should often be coerced from a Result to .ok() in order to fail softly, because: /// - It might fail due to image being not local @@ -167,13 +208,6 @@ pub async fn purge_image_from_pictrs( .next_back() .ok_or(LemmyErrorType::ImageUrlMissingLastPathSegment)?; - purge_image_from_pictrs_by_alias(alias, context).await -} - -pub async fn purge_image_from_pictrs_by_alias( - alias: &str, - context: &LemmyContext, -) -> Result<(), LemmyError> { let pictrs_config = context.settings().pictrs_config()?; let purge_url = format!("{}internal/purge?alias={}", pictrs_config.url, alias); @@ -190,10 +224,9 @@ pub async fn purge_image_from_pictrs_by_alias( let response: PictrsPurgeResponse = response.json().await.map_err(LemmyError::from)?; - if response.msg == "ok" { - Ok(()) - } else { - Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))? + match response.msg.as_str() { + "ok" => Ok(()), + _ => Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))?, } } @@ -217,62 +250,48 @@ pub async fn delete_image_from_pictrs( Ok(()) } -/// Both are options, since the URL might be either an html page, or an image -/// Returns the SiteMetadata, and an image URL, if there is a picture associated +/// Retrieves the image with local pict-rs and generates a thumbnail. Returns the thumbnail url. #[tracing::instrument(skip_all)] -pub async fn fetch_site_data( - client: &ClientWithMiddleware, - settings: &Settings, - url: Option<&Url>, - include_image: bool, -) -> (Option, Option) { - match &url { - Some(url) => { - // Fetch metadata - // Ignore errors, since it may be an image, or not have the data. - // Warning, this may ignore SSL errors - let metadata_option = fetch_site_metadata(client, url).await.ok(); - if !include_image { - (metadata_option, None) - } else { - let thumbnail_url = - fetch_pictrs_url_from_site_metadata(client, &metadata_option, settings, url) - .await - .ok(); - (metadata_option, thumbnail_url) - } - } - None => (None, None), +async fn generate_pictrs_thumbnail( + image_url: &Url, + context: &LemmyContext, +) -> Result { + let pictrs_config = context.settings().pictrs_config()?; + + if pictrs_config.image_mode() == PictrsImageMode::ProxyAllImages { + return Ok(proxy_image_link(image_url.clone(), context).await?.into()); + } + + // fetch remote non-pictrs images for persistent thumbnail link + // TODO: should limit size once supported by pictrs + let fetch_url = format!( + "{}image/download?url={}", + pictrs_config.url, + encode(image_url.as_str()) + ); + + let response = context + .client() + .get(&fetch_url) + .timeout(REQWEST_TIMEOUT) + .send() + .await?; + + let response: PictrsResponse = response.json().await?; + + if response.msg == "ok" { + let thumbnail_url = Url::parse(&format!( + "{}/pictrs/image/{}", + context.settings().get_protocol_and_hostname(), + response.files.first().expect("missing pictrs file").file + ))?; + Ok(thumbnail_url) + } else { + Err(LemmyErrorType::PictrsResponseError(response.msg))? } } -async fn fetch_pictrs_url_from_site_metadata( - client: &ClientWithMiddleware, - metadata_option: &Option, - settings: &Settings, - url: &Url, -) -> Result { - let pictrs_res = match metadata_option { - Some(metadata_res) => match &metadata_res.image { - // Metadata, with image - // Try to generate a small thumbnail if there's a full sized one from post-links - Some(metadata_image) => fetch_pictrs(client, settings, metadata_image).await, - // Metadata, but no image - None => fetch_pictrs(client, settings, url).await, - }, - // No metadata, try to fetch the URL as an image - None => fetch_pictrs(client, settings, url).await, - }?; - - Url::parse(&format!( - "{}/pictrs/image/{}", - settings.get_protocol_and_hostname(), - pictrs_res.files.first().expect("missing pictrs file").file - )) - .map(Into::into) - .map_err(Into::into) -} - +// TODO: get rid of this by reading content type from db #[tracing::instrument(skip_all)] async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> { let response = client.get(url.as_str()).send().await?; @@ -289,51 +308,50 @@ async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Resu } } -pub fn client_builder(settings: &Settings) -> ClientBuilder { - let user_agent = format!( - "Lemmy/{}; +{}", - VERSION, - settings.get_protocol_and_hostname() - ); - - Client::builder() - .user_agent(user_agent) - .timeout(REQWEST_TIMEOUT) - .connect_timeout(REQWEST_TIMEOUT) -} - #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] #![allow(clippy::indexing_slicing)] - use crate::request::{client_builder, fetch_site_metadata, html_to_site_metadata, SiteMetadata}; - use lemmy_utils::settings::SETTINGS; + use crate::{ + context::LemmyContext, + request::{extract_opengraph_data, fetch_link_metadata}, + }; use pretty_assertions::assert_eq; + use serial_test::serial; use url::Url; // These helped with testing #[tokio::test] - async fn test_site_metadata() { - let settings = &SETTINGS.clone(); - let client = client_builder(settings).build().unwrap().into(); + #[serial] + async fn test_link_metadata() { + let context = LemmyContext::init_test_context_with_networking().await; let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ").unwrap(); - let sample_res = fetch_site_metadata(&client, &sample_url).await.unwrap(); + let sample_res = fetch_link_metadata(&sample_url, false, &context) + .await + .unwrap(); assert_eq!( - SiteMetadata { - title: Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()), - description: Some( - "The F-Droid compatible repo at https://apt.izzysoft.de/fdroid/".to_string() - ), - image: Some( - Url::parse("https://gitlab.com/uploads/-/system/project/avatar/4877469/iod_logo.png") - .unwrap() - .into() - ), - embed_video_url: None, - }, - sample_res + Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()), + sample_res.opengraph_data.title ); + assert_eq!( + Some("The F-Droid compatible repo at https://apt.izzysoft.de/fdroid/".to_string()), + sample_res.opengraph_data.description + ); + assert_eq!( + Some( + Url::parse("https://gitlab.com/uploads/-/system/project/avatar/4877469/iod_logo.png") + .unwrap() + .into() + ), + sample_res.opengraph_data.image + ); + assert_eq!(None, sample_res.opengraph_data.embed_video_url); + assert_eq!( + Some(mime::TEXT_HTML_UTF_8.to_string()), + sample_res.content_type + ); + assert_eq!(None, sample_res.thumbnail); } // #[test] @@ -351,7 +369,7 @@ mod tests { // root relative url let html_bytes = b""; - let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + let metadata = extract_opengraph_data(html_bytes, &url).expect("Unable to parse metadata"); assert_eq!( metadata.image, Some(Url::parse("https://example.com/image.jpg").unwrap().into()) @@ -359,7 +377,7 @@ mod tests { // base relative url let html_bytes = b""; - let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + let metadata = extract_opengraph_data(html_bytes, &url).expect("Unable to parse metadata"); assert_eq!( metadata.image, Some( @@ -371,7 +389,7 @@ mod tests { // absolute url let html_bytes = b""; - let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + let metadata = extract_opengraph_data(html_bytes, &url).expect("Unable to parse metadata"); assert_eq!( metadata.image, Some(Url::parse("https://cdn.host.com/image.jpg").unwrap().into()) @@ -379,7 +397,7 @@ mod tests { // protocol relative url let html_bytes = b""; - let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata"); + let metadata = extract_opengraph_data(html_bytes, &url).expect("Unable to parse metadata"); assert_eq!( metadata.image, Some(Url::parse("https://example.com/image.jpg").unwrap().into()) diff --git a/crates/api_common/src/utils.rs b/crates/api_common/src/utils.rs index 605733efcf..55df7a6ec7 100644 --- a/crates/api_common/src/utils.rs +++ b/crates/api_common/src/utils.rs @@ -12,6 +12,7 @@ use lemmy_db_schema::{ community::{Community, CommunityModerator, CommunityUpdateForm}, community_block::CommunityBlock, email_verification::{EmailVerification, EmailVerificationForm}, + images::RemoteImage, instance::Instance, instance_block::InstanceBlock, local_site::LocalSite, @@ -35,14 +36,18 @@ use lemmy_utils::{ email::{send_email, translations::Lang}, error::{LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult}, rate_limit::{ActionType, BucketConfig}, - settings::structs::Settings, - utils::slurs::build_slur_regex, + settings::structs::{PictrsImageMode, Settings}, + utils::{ + markdown::markdown_rewrite_image_links, + slurs::{build_slur_regex, remove_slurs}, + }, }; use regex::Regex; use rosetta_i18n::{Language, LanguageId}; use std::collections::HashSet; use tracing::warn; use url::{ParseError, Url}; +use urlencoding::encode; pub static AUTH_COOKIE_NAME: &str = "jwt"; @@ -848,14 +853,115 @@ fn limit_expire_time(expires: DateTime) -> LemmyResult } } +pub async fn process_markdown( + text: &str, + slur_regex: &Option, + context: &LemmyContext, +) -> LemmyResult { + let text = remove_slurs(text, slur_regex); + if context.settings().pictrs_config()?.image_mode() == PictrsImageMode::ProxyAllImages { + let (text, links) = markdown_rewrite_image_links(text); + RemoteImage::create(&mut context.pool(), links).await?; + Ok(text) + } else { + Ok(text) + } +} + +pub async fn process_markdown_opt( + text: &Option, + slur_regex: &Option, + context: &LemmyContext, +) -> LemmyResult> { + match text { + Some(t) => process_markdown(t, slur_regex, context).await.map(Some), + None => Ok(None), + } +} + +/// A wrapper for `proxy_image_link` for use in tests. +/// +/// The parameter `force_image_proxy` is the config value of `pictrs.image_proxy`. Its necessary to pass +/// as separate parameter so it can be changed in tests. +async fn proxy_image_link_internal( + link: Url, + image_mode: PictrsImageMode, + context: &LemmyContext, +) -> LemmyResult { + // Dont rewrite links pointing to local domain. + if link.domain() == Some(&context.settings().hostname) { + Ok(link.into()) + } else if image_mode == PictrsImageMode::ProxyAllImages { + let proxied = format!( + "{}/api/v3/image_proxy?url={}", + context.settings().get_protocol_and_hostname(), + encode(link.as_str()) + ); + RemoteImage::create(&mut context.pool(), vec![link]).await?; + Ok(Url::parse(&proxied)?.into()) + } else { + Ok(link.into()) + } +} + +/// Rewrite a link to go through `/api/v3/image_proxy` endpoint. This is only for remote urls and +/// if image_proxy setting is enabled. +pub(crate) async fn proxy_image_link(link: Url, context: &LemmyContext) -> LemmyResult { + proxy_image_link_internal( + link, + context.settings().pictrs_config()?.image_mode(), + context, + ) + .await +} + +pub async fn proxy_image_link_opt_api( + link: &Option, + context: &LemmyContext, +) -> LemmyResult>> { + proxy_image_link_api(link, context).await.map(Some) +} + +pub async fn proxy_image_link_api( + link: &Option, + context: &LemmyContext, +) -> LemmyResult> { + let link: Option = match link.as_ref().map(String::as_str) { + // An empty string is an erase + Some("") => None, + Some(str_url) => Url::parse(str_url) + .map(|u| Some(u.into())) + .with_lemmy_type(LemmyErrorType::InvalidUrl)?, + None => None, + }; + if let Some(l) = link { + proxy_image_link(l.into(), context).await.map(Some) + } else { + Ok(link) + } +} + +pub async fn proxy_image_link_opt_apub( + link: Option, + context: &LemmyContext, +) -> LemmyResult> { + if let Some(l) = link { + proxy_image_link(l, context).await.map(Some) + } else { + Ok(None) + } +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] #![allow(clippy::indexing_slicing)] + use super::*; use crate::utils::{honeypot_check, limit_expire_time, password_length_check}; use chrono::{Days, Utc}; use pretty_assertions::assert_eq; + use serial_test::serial; #[test] #[rustfmt::skip] @@ -894,4 +1000,62 @@ mod tests { None ); } + + #[tokio::test] + #[serial] + async fn test_proxy_image_link() { + let context = LemmyContext::init_test_context().await; + + // image from local domain is unchanged + let local_url = Url::parse("http://lemmy-alpha/image.png").unwrap(); + let proxied = + proxy_image_link_internal(local_url.clone(), PictrsImageMode::ProxyAllImages, &context) + .await + .unwrap(); + assert_eq!(&local_url, proxied.inner()); + + // image from remote domain is proxied + let remote_image = Url::parse("http://lemmy-beta/image.png").unwrap(); + let proxied = proxy_image_link_internal( + remote_image.clone(), + PictrsImageMode::ProxyAllImages, + &context, + ) + .await + .unwrap(); + assert_eq!( + "https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Flemmy-beta%2Fimage.png", + proxied.as_str() + ); + assert!( + RemoteImage::validate(&mut context.pool(), remote_image.into()) + .await + .is_ok() + ); + } + + #[tokio::test] + #[serial] + async fn test_diesel_option_overwrite_to_url() { + let context = LemmyContext::init_test_context().await; + + assert!(matches!( + proxy_image_link_api(&None, &context).await, + Ok(None) + )); + assert!(matches!( + proxy_image_link_opt_api(&Some(String::new()), &context).await, + Ok(Some(None)) + )); + assert!( + proxy_image_link_opt_api(&Some("invalid_url".to_string()), &context) + .await + .is_err() + ); + let example_url = "https://lemmy-alpha/image.png"; + assert!(matches!( + proxy_image_link_opt_api(&Some(example_url.to_string()), &context).await, + Ok(Some(Some(url))) if url == Url::parse(example_url).unwrap().into() + )); + } } diff --git a/crates/api_crud/src/comment/create.rs b/crates/api_crud/src/comment/create.rs index 2e719eda2f..64f8a3cea3 100644 --- a/crates/api_crud/src/comment/create.rs +++ b/crates/api_crud/src/comment/create.rs @@ -11,6 +11,7 @@ use lemmy_api_common::{ generate_local_apub_endpoint, get_post, local_site_to_slur_regex, + process_markdown, EndpointType, }, }; @@ -28,11 +29,7 @@ use lemmy_db_schema::{ use lemmy_db_views::structs::LocalUserView; use lemmy_utils::{ error::{LemmyError, LemmyErrorExt, LemmyErrorType}, - utils::{ - mention::scrape_text_for_mentions, - slurs::remove_slurs, - validation::is_valid_body_field, - }, + utils::{mention::scrape_text_for_mentions, validation::is_valid_body_field}, }; const MAX_COMMENT_DEPTH_LIMIT: usize = 100; @@ -45,10 +42,8 @@ pub async fn create_comment( ) -> Result, LemmyError> { let local_site = LocalSite::read(&mut context.pool()).await?; - let content = remove_slurs( - &data.content.clone(), - &local_site_to_slur_regex(&local_site), - ); + let slur_regex = local_site_to_slur_regex(&local_site); + let content = process_markdown(&data.content, &slur_regex, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; // Check for a community ban diff --git a/crates/api_crud/src/comment/update.rs b/crates/api_crud/src/comment/update.rs index 21cf54cfaf..2d6bf79bed 100644 --- a/crates/api_crud/src/comment/update.rs +++ b/crates/api_crud/src/comment/update.rs @@ -5,7 +5,7 @@ use lemmy_api_common::{ comment::{CommentResponse, EditComment}, context::LemmyContext, send_activity::{ActivityChannel, SendActivityData}, - utils::{check_community_user_action, local_site_to_slur_regex}, + utils::{check_community_user_action, local_site_to_slur_regex, process_markdown_opt}, }; use lemmy_db_schema::{ source::{ @@ -19,11 +19,7 @@ use lemmy_db_schema::{ use lemmy_db_views::structs::{CommentView, LocalUserView}; use lemmy_utils::{ error::{LemmyError, LemmyErrorExt, LemmyErrorType}, - utils::{ - mention::scrape_text_for_mentions, - slurs::remove_slurs, - validation::is_valid_body_field, - }, + utils::{mention::scrape_text_for_mentions, validation::is_valid_body_field}, }; #[tracing::instrument(skip(context))] @@ -57,11 +53,8 @@ pub async fn update_comment( ) .await?; - // Update the Content - let content = data - .content - .as_ref() - .map(|c| remove_slurs(c, &local_site_to_slur_regex(&local_site))); + let slur_regex = local_site_to_slur_regex(&local_site); + let content = process_markdown_opt(&data.content, &slur_regex, &context).await?; is_valid_body_field(&content, false)?; let comment_id = data.comment_id; diff --git a/crates/api_crud/src/community/create.rs b/crates/api_crud/src/community/create.rs index a133d593c4..945baa4311 100644 --- a/crates/api_crud/src/community/create.rs +++ b/crates/api_crud/src/community/create.rs @@ -11,6 +11,8 @@ use lemmy_api_common::{ generate_shared_inbox_url, is_admin, local_site_to_slur_regex, + process_markdown_opt, + proxy_image_link_api, EndpointType, }, }; @@ -27,13 +29,12 @@ use lemmy_db_schema::{ }, }, traits::{ApubActor, Crud, Followable, Joinable}, - utils::diesel_option_overwrite_to_url_create, }; use lemmy_db_views::structs::{LocalUserView, SiteView}; use lemmy_utils::{ error::{LemmyError, LemmyErrorExt, LemmyErrorType}, utils::{ - slurs::{check_slurs, check_slurs_opt}, + slurs::check_slurs, validation::{is_valid_actor_name, is_valid_body_field}, }, }; @@ -51,14 +52,12 @@ pub async fn create_community( Err(LemmyErrorType::OnlyAdminsCanCreateCommunities)? } - // Check to make sure the icon and banners are urls - let icon = diesel_option_overwrite_to_url_create(&data.icon)?; - let banner = diesel_option_overwrite_to_url_create(&data.banner)?; - let slur_regex = local_site_to_slur_regex(&local_site); check_slurs(&data.name, &slur_regex)?; check_slurs(&data.title, &slur_regex)?; - check_slurs_opt(&data.description, &slur_regex)?; + let description = process_markdown_opt(&data.description, &slur_regex, &context).await?; + let icon = proxy_image_link_api(&data.icon, &context).await?; + let banner = proxy_image_link_api(&data.banner, &context).await?; is_valid_actor_name(&data.name, local_site.actor_name_max_length as usize)?; is_valid_body_field(&data.description, false)?; @@ -81,7 +80,7 @@ pub async fn create_community( let community_form = CommunityInsertForm::builder() .name(data.name.clone()) .title(data.title.clone()) - .description(data.description.clone()) + .description(description) .icon(icon) .banner(banner) .nsfw(data.nsfw) diff --git a/crates/api_crud/src/community/update.rs b/crates/api_crud/src/community/update.rs index 40ba1a2a18..9be785821d 100644 --- a/crates/api_crud/src/community/update.rs +++ b/crates/api_crud/src/community/update.rs @@ -5,7 +5,12 @@ use lemmy_api_common::{ community::{CommunityResponse, EditCommunity}, context::LemmyContext, send_activity::{ActivityChannel, SendActivityData}, - utils::{check_community_mod_action, local_site_to_slur_regex}, + utils::{ + check_community_mod_action, + local_site_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_api, + }, }; use lemmy_db_schema::{ source::{ @@ -14,7 +19,7 @@ use lemmy_db_schema::{ local_site::LocalSite, }, traits::Crud, - utils::{diesel_option_overwrite, diesel_option_overwrite_to_url, naive_now}, + utils::{diesel_option_overwrite, naive_now}, }; use lemmy_db_views::structs::LocalUserView; use lemmy_utils::{ @@ -32,12 +37,12 @@ pub async fn update_community( let slur_regex = local_site_to_slur_regex(&local_site); check_slurs_opt(&data.title, &slur_regex)?; - check_slurs_opt(&data.description, &slur_regex)?; + let description = process_markdown_opt(&data.description, &slur_regex, &context).await?; is_valid_body_field(&data.description, false)?; - let icon = diesel_option_overwrite_to_url(&data.icon)?; - let banner = diesel_option_overwrite_to_url(&data.banner)?; - let description = diesel_option_overwrite(data.description.clone()); + let description = diesel_option_overwrite(description); + let icon = proxy_image_link_opt_api(&data.icon, &context).await?; + let banner = proxy_image_link_opt_api(&data.banner, &context).await?; // Verify its a mod (only mods can edit it) check_community_mod_action( diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index e4af92916c..a3b623638e 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -4,7 +4,7 @@ use lemmy_api_common::{ build_response::build_post_response, context::LemmyContext, post::{CreatePost, PostResponse}, - request::fetch_site_data, + request::fetch_link_metadata_opt, send_activity::{ActivityChannel, SendActivityData}, utils::{ check_community_user_action, @@ -12,6 +12,8 @@ use lemmy_api_common::{ honeypot_check, local_site_to_slur_regex, mark_post_as_read, + process_markdown_opt, + proxy_image_link_opt_apub, EndpointType, }, }; @@ -31,7 +33,7 @@ use lemmy_utils::{ error::{LemmyError, LemmyErrorExt, LemmyErrorType}, spawn_try_task, utils::{ - slurs::{check_slurs, check_slurs_opt}, + slurs::check_slurs, validation::{check_url_scheme, clean_url_params, is_valid_body_field, is_valid_post_title}, }, }; @@ -49,14 +51,14 @@ pub async fn create_post( let slur_regex = local_site_to_slur_regex(&local_site); check_slurs(&data.name, &slur_regex)?; - check_slurs_opt(&data.body, &slur_regex)?; + let body = process_markdown_opt(&data.body, &slur_regex, &context).await?; honeypot_check(&data.honeypot)?; let data_url = data.url.as_ref(); - let url = data_url.map(clean_url_params).map(Into::into); // TODO no good way to handle a "clear" + let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear" is_valid_post_title(&data.name)?; - is_valid_body_field(&data.body, true)?; + is_valid_body_field(&body, true)?; check_url_scheme(&data.url)?; check_community_user_action( @@ -82,11 +84,8 @@ pub async fn create_post( } // Fetch post links and pictrs cached image - let (metadata_res, thumbnail_url) = - fetch_site_data(context.client(), context.settings(), data_url, true).await; - let (embed_title, embed_description, embed_video_url) = metadata_res - .map(|u| (u.title, u.description, u.embed_video_url)) - .unwrap_or_default(); + let metadata = fetch_link_metadata_opt(url.as_ref(), true, &context).await; + let url = proxy_image_link_opt_apub(url, &context).await?; // Only need to check if language is allowed in case user set it explicitly. When using default // language, it already only returns allowed languages. @@ -113,15 +112,15 @@ pub async fn create_post( let post_form = PostInsertForm::builder() .name(data.name.trim().to_string()) .url(url) - .body(data.body.clone()) + .body(body) .community_id(data.community_id) .creator_id(local_user_view.person.id) .nsfw(data.nsfw) - .embed_title(embed_title) - .embed_description(embed_description) - .embed_video_url(embed_video_url) + .embed_title(metadata.opengraph_data.title) + .embed_description(metadata.opengraph_data.description) + .embed_video_url(metadata.opengraph_data.embed_video_url) .language_id(language_id) - .thumbnail_url(thumbnail_url) + .thumbnail_url(metadata.thumbnail) .build(); let inserted_post = Post::create(&mut context.pool(), &post_form) diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index b17981c551..c367186150 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -4,9 +4,14 @@ use lemmy_api_common::{ build_response::build_post_response, context::LemmyContext, post::{EditPost, PostResponse}, - request::fetch_site_data, + request::fetch_link_metadata, send_activity::{ActivityChannel, SendActivityData}, - utils::{check_community_user_action, local_site_to_slur_regex}, + utils::{ + check_community_user_action, + local_site_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_apub, + }, }; use lemmy_db_schema::{ source::{ @@ -35,21 +40,19 @@ pub async fn update_post( ) -> Result, LemmyError> { let local_site = LocalSite::read(&mut context.pool()).await?; - let data_url = data.url.as_ref(); - // TODO No good way to handle a clear. // Issue link: https://github.com/LemmyNet/lemmy/issues/2287 - let url = Some(data_url.map(clean_url_params).map(Into::into)); + let url = data.url.as_ref().map(clean_url_params); let slur_regex = local_site_to_slur_regex(&local_site); check_slurs_opt(&data.name, &slur_regex)?; - check_slurs_opt(&data.body, &slur_regex)?; + let body = process_markdown_opt(&data.body, &slur_regex, &context).await?; if let Some(name) = &data.name { is_valid_post_title(name)?; } - is_valid_body_field(&data.body, true)?; + is_valid_body_field(&body, true)?; check_url_scheme(&data.url)?; let post_id = data.post_id; @@ -67,13 +70,23 @@ pub async fn update_post( Err(LemmyErrorType::NoPostEditAllowed)? } - // Fetch post links and Pictrs cached image - let data_url = data.url.as_ref(); - let (metadata_res, thumbnail_url) = - fetch_site_data(context.client(), context.settings(), data_url, true).await; - let (embed_title, embed_description, embed_video_url) = metadata_res - .map(|u| (Some(u.title), Some(u.description), Some(u.embed_video_url))) - .unwrap_or_default(); + // Fetch post links and Pictrs cached image if url was updated + let (embed_title, embed_description, embed_video_url, thumbnail_url) = match &url { + Some(url) => { + let metadata = fetch_link_metadata(url, true, &context).await?; + ( + Some(metadata.opengraph_data.title), + Some(metadata.opengraph_data.description), + Some(metadata.opengraph_data.embed_video_url), + Some(metadata.thumbnail), + ) + } + _ => Default::default(), + }; + let url = match url { + Some(url) => Some(proxy_image_link_opt_apub(Some(url), &context).await?), + _ => Default::default(), + }; let language_id = data.language_id; CommunityLanguage::is_allowed_community_language( @@ -86,13 +99,13 @@ pub async fn update_post( let post_form = PostUpdateForm { name: data.name.clone(), url, - body: diesel_option_overwrite(data.body.clone()), + body: diesel_option_overwrite(body), nsfw: data.nsfw, embed_title, embed_description, embed_video_url, language_id: data.language_id, - thumbnail_url: Some(thumbnail_url), + thumbnail_url, updated: Some(Some(naive_now())), ..Default::default() }; diff --git a/crates/api_crud/src/private_message/create.rs b/crates/api_crud/src/private_message/create.rs index a176cdcb21..c4832ec70d 100644 --- a/crates/api_crud/src/private_message/create.rs +++ b/crates/api_crud/src/private_message/create.rs @@ -9,6 +9,7 @@ use lemmy_api_common::{ generate_local_apub_endpoint, get_interface_language, local_site_to_slur_regex, + process_markdown, send_email_to_user, EndpointType, }, @@ -23,7 +24,7 @@ use lemmy_db_schema::{ use lemmy_db_views::structs::{LocalUserView, PrivateMessageView}; use lemmy_utils::{ error::{LemmyError, LemmyErrorExt, LemmyErrorType}, - utils::{markdown::markdown_to_html, slurs::remove_slurs, validation::is_valid_body_field}, + utils::{markdown::markdown_to_html, validation::is_valid_body_field}, }; #[tracing::instrument(skip(context))] @@ -34,7 +35,8 @@ pub async fn create_private_message( ) -> Result, LemmyError> { let local_site = LocalSite::read(&mut context.pool()).await?; - let content = remove_slurs(&data.content, &local_site_to_slur_regex(&local_site)); + let slur_regex = local_site_to_slur_regex(&local_site); + let content = process_markdown(&data.content, &slur_regex, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; check_person_block( diff --git a/crates/api_crud/src/private_message/update.rs b/crates/api_crud/src/private_message/update.rs index 9e3b7c6b30..dfcf522a8f 100644 --- a/crates/api_crud/src/private_message/update.rs +++ b/crates/api_crud/src/private_message/update.rs @@ -4,7 +4,7 @@ use lemmy_api_common::{ context::LemmyContext, private_message::{EditPrivateMessage, PrivateMessageResponse}, send_activity::{ActivityChannel, SendActivityData}, - utils::local_site_to_slur_regex, + utils::{local_site_to_slur_regex, process_markdown}, }; use lemmy_db_schema::{ source::{ @@ -17,7 +17,7 @@ use lemmy_db_schema::{ use lemmy_db_views::structs::{LocalUserView, PrivateMessageView}; use lemmy_utils::{ error::{LemmyError, LemmyErrorExt, LemmyErrorType}, - utils::{slurs::remove_slurs, validation::is_valid_body_field}, + utils::validation::is_valid_body_field, }; #[tracing::instrument(skip(context))] @@ -36,7 +36,8 @@ pub async fn update_private_message( } // Doing the update - let content = remove_slurs(&data.content, &local_site_to_slur_regex(&local_site)); + let slur_regex = local_site_to_slur_regex(&local_site); + let content = process_markdown(&data.content, &slur_regex, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; let private_message_id = data.private_message_id; diff --git a/crates/api_crud/src/site/create.rs b/crates/api_crud/src/site/create.rs index 06ddfd5340..9943750274 100644 --- a/crates/api_crud/src/site/create.rs +++ b/crates/api_crud/src/site/create.rs @@ -4,7 +4,14 @@ use actix_web::web::{Data, Json}; use lemmy_api_common::{ context::LemmyContext, site::{CreateSite, SiteResponse}, - utils::{generate_shared_inbox_url, is_admin, local_site_rate_limit_to_rate_limit_config}, + utils::{ + generate_shared_inbox_url, + is_admin, + local_site_rate_limit_to_rate_limit_config, + local_site_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_api, + }, }; use lemmy_db_schema::{ newtypes::DbUrl, @@ -15,7 +22,7 @@ use lemmy_db_schema::{ tagline::Tagline, }, traits::Crud, - utils::{diesel_option_overwrite, diesel_option_overwrite_to_url, naive_now}, + utils::{diesel_option_overwrite, naive_now}, }; use lemmy_db_views::structs::{LocalUserView, SiteView}; use lemmy_utils::{ @@ -50,12 +57,17 @@ pub async fn create_site( let inbox_url = Some(generate_shared_inbox_url(context.settings())?); let keypair = generate_actor_keypair()?; + let slur_regex = local_site_to_slur_regex(&local_site); + let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?; + let icon = proxy_image_link_opt_api(&data.icon, &context).await?; + let banner = proxy_image_link_opt_api(&data.banner, &context).await?; + let site_form = SiteUpdateForm { name: Some(data.name.clone()), - sidebar: diesel_option_overwrite(data.sidebar.clone()), + sidebar: diesel_option_overwrite(sidebar), description: diesel_option_overwrite(data.description.clone()), - icon: diesel_option_overwrite_to_url(&data.icon)?, - banner: diesel_option_overwrite_to_url(&data.banner)?, + icon, + banner, actor_id: Some(actor_id), last_refreshed_at: Some(naive_now()), inbox_url, diff --git a/crates/api_crud/src/site/update.rs b/crates/api_crud/src/site/update.rs index 62db5bb835..ba716c9a80 100644 --- a/crates/api_crud/src/site/update.rs +++ b/crates/api_crud/src/site/update.rs @@ -3,7 +3,13 @@ use actix_web::web::{Data, Json}; use lemmy_api_common::{ context::LemmyContext, site::{EditSite, SiteResponse}, - utils::{is_admin, local_site_rate_limit_to_rate_limit_config}, + utils::{ + is_admin, + local_site_rate_limit_to_rate_limit_config, + local_site_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_api, + }, }; use lemmy_db_schema::{ source::{ @@ -17,7 +23,7 @@ use lemmy_db_schema::{ tagline::Tagline, }, traits::Crud, - utils::{diesel_option_overwrite, diesel_option_overwrite_to_url, naive_now}, + utils::{diesel_option_overwrite, naive_now}, RegistrationMode, }; use lemmy_db_views::structs::{LocalUserView, SiteView}; @@ -54,12 +60,17 @@ pub async fn update_site( SiteLanguage::update(&mut context.pool(), discussion_languages.clone(), &site).await?; } + let slur_regex = local_site_to_slur_regex(&local_site); + let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?; + let icon = proxy_image_link_opt_api(&data.icon, &context).await?; + let banner = proxy_image_link_opt_api(&data.banner, &context).await?; + let site_form = SiteUpdateForm { name: data.name.clone(), - sidebar: diesel_option_overwrite(data.sidebar.clone()), + sidebar: diesel_option_overwrite(sidebar), description: diesel_option_overwrite(data.description.clone()), - icon: diesel_option_overwrite_to_url(&data.icon)?, - banner: diesel_option_overwrite_to_url(&data.banner)?, + icon, + banner, updated: Some(Some(naive_now())), ..Default::default() }; diff --git a/crates/apub/Cargo.toml b/crates/apub/Cargo.toml index 7a38074f08..175efbd45d 100644 --- a/crates/apub/Cargo.toml +++ b/crates/apub/Cargo.toml @@ -50,7 +50,5 @@ enum_delegate = "0.2.0" [dev-dependencies] serial_test = { workspace = true } -reqwest-middleware = { workspace = true } -task-local-extensions = "0.1.4" assert-json-diff = "2.0.2" pretty_assertions = { workspace = true } diff --git a/crates/apub/src/activities/community/update.rs b/crates/apub/src/activities/community/update.rs index 8733b8ad1b..8bd87103e3 100644 --- a/crates/apub/src/activities/community/update.rs +++ b/crates/apub/src/activities/community/update.rs @@ -8,7 +8,7 @@ use crate::{ }, activity_lists::AnnouncableActivities, insert_received_activity, - objects::{community::ApubCommunity, person::ApubPerson}, + objects::{community::ApubCommunity, person::ApubPerson, read_from_string_or_source_opt}, protocol::{activities::community::update::UpdateCommunity, InCommunity}, }; use activitypub_federation::{ @@ -18,8 +18,13 @@ use activitypub_federation::{ }; use lemmy_api_common::context::LemmyContext; use lemmy_db_schema::{ - source::{activity::ActivitySendTargets, community::Community, person::Person}, + source::{ + activity::ActivitySendTargets, + community::{Community, CommunityUpdateForm}, + person::Person, + }, traits::Crud, + utils::naive_now, }; use lemmy_utils::error::LemmyError; use url::Url; @@ -85,7 +90,33 @@ impl ActivityHandler for UpdateCommunity { insert_received_activity(&self.id, context).await?; let community = self.community(context).await?; - let community_update_form = self.object.into_update_form(); + let community_update_form = CommunityUpdateForm { + title: Some(self.object.name.unwrap_or(self.object.preferred_username)), + description: Some(read_from_string_or_source_opt( + &self.object.summary, + &None, + &self.object.source, + )), + removed: None, + published: self.object.published.map(Into::into), + updated: Some(self.object.updated.map(Into::into)), + deleted: None, + nsfw: Some(self.object.sensitive.unwrap_or(false)), + actor_id: Some(self.object.id.into()), + local: None, + private_key: None, + hidden: None, + public_key: Some(self.object.public_key.public_key_pem), + last_refreshed_at: Some(naive_now()), + icon: Some(self.object.icon.map(|i| i.url.into())), + banner: Some(self.object.image.map(|i| i.url.into())), + followers_url: Some(self.object.followers.into()), + inbox_url: Some(self.object.inbox.into()), + shared_inbox_url: Some(self.object.endpoints.map(|e| e.shared_inbox.into())), + moderators_url: self.object.attributed_to.map(Into::into), + posting_restricted_to_mods: self.object.posting_restricted_to_mods, + featured_url: self.object.featured.map(Into::into), + }; Community::update(&mut context.pool(), community.id, &community_update_form).await?; Ok(()) diff --git a/crates/apub/src/api/user_settings_backup.rs b/crates/apub/src/api/user_settings_backup.rs index e42e74d302..ebe2940d46 100644 --- a/crates/apub/src/api/user_settings_backup.rs +++ b/crates/apub/src/api/user_settings_backup.rs @@ -298,10 +298,7 @@ pub async fn import_settings( mod tests { #![allow(clippy::indexing_slicing)] - use crate::{ - api::user_settings_backup::{export_settings, import_settings}, - objects::tests::init_context, - }; + use crate::api::user_settings_backup::{export_settings, import_settings}; use activitypub_federation::config::Data; use lemmy_api_common::context::LemmyContext; use lemmy_db_schema::{ @@ -348,7 +345,7 @@ mod tests { #[tokio::test] #[serial] async fn test_settings_export_import() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let export_user = create_user("hanna".to_string(), Some("my bio".to_string()), &context).await?; @@ -397,7 +394,7 @@ mod tests { #[tokio::test] #[serial] async fn disallow_large_backup() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let export_user = create_user("hanna".to_string(), Some("my bio".to_string()), &context).await?; diff --git a/crates/apub/src/collections/community_moderators.rs b/crates/apub/src/collections/community_moderators.rs index 4ad499ea2d..87d88d071b 100644 --- a/crates/apub/src/collections/community_moderators.rs +++ b/crates/apub/src/collections/community_moderators.rs @@ -106,11 +106,7 @@ mod tests { use super::*; use crate::{ - objects::{ - community::tests::parse_lemmy_community, - person::tests::parse_lemmy_person, - tests::init_context, - }, + objects::{community::tests::parse_lemmy_community, person::tests::parse_lemmy_person}, protocol::tests::file_to_json_object, }; use lemmy_db_schema::{ @@ -129,7 +125,7 @@ mod tests { #[tokio::test] #[serial] async fn test_parse_lemmy_community_moderators() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let (new_mod, site) = parse_lemmy_person(&context).await?; let community = parse_lemmy_community(&context).await?; let community_id = community.id; diff --git a/crates/apub/src/objects/comment.rs b/crates/apub/src/objects/comment.rs index b32b7ba574..81a618bbaf 100644 --- a/crates/apub/src/objects/comment.rs +++ b/crates/apub/src/objects/comment.rs @@ -16,7 +16,10 @@ use activitypub_federation::{ traits::Object, }; use chrono::{DateTime, Utc}; -use lemmy_api_common::{context::LemmyContext, utils::local_site_opt_to_slur_regex}; +use lemmy_api_common::{ + context::LemmyContext, + utils::{local_site_opt_to_slur_regex, process_markdown}, +}; use lemmy_db_schema::{ source::{ comment::{Comment, CommentInsertForm, CommentUpdateForm}, @@ -29,7 +32,7 @@ use lemmy_db_schema::{ }; use lemmy_utils::{ error::{LemmyError, LemmyErrorType}, - utils::{markdown::markdown_to_html, slurs::remove_slurs}, + utils::markdown::markdown_to_html, }; use std::ops::Deref; use url::Url; @@ -158,7 +161,7 @@ impl Object for ApubComment { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); - let content = remove_slurs(&content, slur_regex); + let content = process_markdown(&content, slur_regex, context).await?; let language_id = LanguageTag::to_language_id_single(note.language, &mut context.pool()).await?; @@ -190,7 +193,6 @@ pub(crate) mod tests { instance::ApubSite, person::{tests::parse_lemmy_person, ApubPerson}, post::ApubPost, - tests::init_context, }, protocol::tests::file_to_json_object, }; @@ -230,7 +232,7 @@ pub(crate) mod tests { #[tokio::test] #[serial] pub(crate) async fn test_parse_lemmy_comment() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let url = Url::parse("https://enterprise.lemmy.ml/comment/38741")?; let data = prepare_comment_test(&url, &context).await?; @@ -255,7 +257,7 @@ pub(crate) mod tests { #[tokio::test] #[serial] async fn test_parse_pleroma_comment() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let url = Url::parse("https://enterprise.lemmy.ml/comment/38741")?; let data = prepare_comment_test(&url, &context).await?; diff --git a/crates/apub/src/objects/community.rs b/crates/apub/src/objects/community.rs index 3cbf352cde..e63c75078b 100644 --- a/crates/apub/src/objects/community.rs +++ b/crates/apub/src/objects/community.rs @@ -2,7 +2,7 @@ use crate::{ activities::GetActorType, check_apub_id_valid, local_site_data_cached, - objects::instance::fetch_instance_actor_for_object, + objects::{instance::fetch_instance_actor_for_object, read_from_string_or_source_opt}, protocol::{ objects::{group::Group, Endpoints, LanguageTag}, ImageObject, @@ -17,15 +17,24 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use lemmy_api_common::{ context::LemmyContext, - utils::{generate_featured_url, generate_moderators_url, generate_outbox_url}, + utils::{ + generate_featured_url, + generate_moderators_url, + generate_outbox_url, + local_site_opt_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_apub, + }, }; use lemmy_db_schema::{ source::{ activity::ActorType, actor_language::CommunityLanguage, - community::{Community, CommunityUpdateForm}, + community::{Community, CommunityInsertForm, CommunityUpdateForm}, + local_site::LocalSite, }, traits::{ApubActor, Crud}, + utils::naive_now, }; use lemmy_db_views_actor::structs::CommunityFollowerView; use lemmy_utils::{error::LemmyError, spawn_try_task, utils::markdown::markdown_to_html}; @@ -130,7 +139,38 @@ impl Object for ApubCommunity { ) -> Result { let instance_id = fetch_instance_actor_for_object(&group.id, context).await?; - let form = Group::into_insert_form(group.clone(), instance_id); + let local_site = LocalSite::read(&mut context.pool()).await.ok(); + let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let description = read_from_string_or_source_opt(&group.summary, &None, &group.source); + let description = process_markdown_opt(&description, slur_regex, context).await?; + let icon = proxy_image_link_opt_apub(group.icon.map(|i| i.url), context).await?; + let banner = proxy_image_link_opt_apub(group.image.map(|i| i.url), context).await?; + + let form = CommunityInsertForm { + name: group.preferred_username.clone(), + title: group.name.unwrap_or(group.preferred_username.clone()), + description, + removed: None, + published: group.published, + updated: group.updated, + deleted: Some(false), + nsfw: Some(group.sensitive.unwrap_or(false)), + actor_id: Some(group.id.into()), + local: Some(false), + private_key: None, + hidden: None, + public_key: group.public_key.public_key_pem, + last_refreshed_at: Some(naive_now()), + icon, + banner, + followers_url: Some(group.followers.clone().into()), + inbox_url: Some(group.inbox.into()), + shared_inbox_url: group.endpoints.map(|e| e.shared_inbox.into()), + moderators_url: group.attributed_to.clone().map(Into::into), + posting_restricted_to_mods: group.posting_restricted_to_mods, + instance_id, + featured_url: group.featured.map(Into::into), + }; let languages = LanguageTag::to_language_id_multiple(group.language, &mut context.pool()).await?; @@ -212,7 +252,7 @@ impl ApubCommunity { pub(crate) mod tests { use super::*; use crate::{ - objects::{instance::tests::parse_lemmy_instance, tests::init_context}, + objects::instance::tests::parse_lemmy_instance, protocol::tests::file_to_json_object, }; use activitypub_federation::fetch::collection_id::CollectionId; @@ -241,7 +281,7 @@ pub(crate) mod tests { #[tokio::test] #[serial] async fn test_parse_lemmy_community() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let site = parse_lemmy_instance(&context).await?; let community = parse_lemmy_community(&context).await?; diff --git a/crates/apub/src/objects/instance.rs b/crates/apub/src/objects/instance.rs index 392b86bd92..c7d4f11f6d 100644 --- a/crates/apub/src/objects/instance.rs +++ b/crates/apub/src/objects/instance.rs @@ -17,13 +17,17 @@ use activitypub_federation::{ traits::{Actor, Object}, }; use chrono::{DateTime, Utc}; -use lemmy_api_common::{context::LemmyContext, utils::local_site_opt_to_slur_regex}; +use lemmy_api_common::{ + context::LemmyContext, + utils::{local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub}, +}; use lemmy_db_schema::{ newtypes::InstanceId, source::{ activity::ActorType, actor_language::SiteLanguage, instance::Instance as DbInstance, + local_site::LocalSite, site::{Site, SiteInsertForm}, }, traits::Crud, @@ -126,18 +130,23 @@ impl Object for ApubSite { } #[tracing::instrument(skip_all)] - async fn from_json(apub: Self::Kind, data: &Data) -> Result { + async fn from_json(apub: Self::Kind, context: &Data) -> Result { let domain = apub.id.inner().domain().expect("group id has domain"); - let instance = DbInstance::read_or_create(&mut data.pool(), domain.to_string()).await?; + let instance = DbInstance::read_or_create(&mut context.pool(), domain.to_string()).await?; + let local_site = LocalSite::read(&mut context.pool()).await.ok(); + let slur_regex = &local_site_opt_to_slur_regex(&local_site); let sidebar = read_from_string_or_source_opt(&apub.content, &None, &apub.source); + let sidebar = process_markdown_opt(&sidebar, slur_regex, context).await?; + let icon = proxy_image_link_opt_apub(apub.icon.map(|i| i.url), context).await?; + let banner = proxy_image_link_opt_apub(apub.image.map(|i| i.url), context).await?; let site_form = SiteInsertForm { name: apub.name.clone(), sidebar, updated: apub.updated, - icon: apub.icon.clone().map(|i| i.url.into()), - banner: apub.image.clone().map(|i| i.url.into()), + icon, + banner, description: apub.summary, actor_id: Some(apub.id.clone().into()), last_refreshed_at: Some(naive_now()), @@ -146,10 +155,11 @@ impl Object for ApubSite { private_key: None, instance_id: instance.id, }; - let languages = LanguageTag::to_language_id_multiple(apub.language, &mut data.pool()).await?; + let languages = + LanguageTag::to_language_id_multiple(apub.language, &mut context.pool()).await?; - let site = Site::create(&mut data.pool(), &site_form).await?; - SiteLanguage::update(&mut data.pool(), languages, &site).await?; + let site = Site::create(&mut context.pool(), &site_form).await?; + SiteLanguage::update(&mut context.pool(), languages, &site).await?; Ok(site.into()) } } @@ -205,7 +215,7 @@ pub(in crate::objects) async fn fetch_instance_actor_for_object + C #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::{objects::tests::init_context, protocol::tests::file_to_json_object}; + use crate::protocol::tests::file_to_json_object; use lemmy_db_schema::traits::Crud; use lemmy_utils::error::LemmyResult; use pretty_assertions::assert_eq; @@ -223,7 +233,7 @@ pub(crate) mod tests { #[tokio::test] #[serial] async fn test_parse_lemmy_instance() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let site = parse_lemmy_instance(&context).await?; assert_eq!(site.name, "Enterprise"); diff --git a/crates/apub/src/objects/mod.rs b/crates/apub/src/objects/mod.rs index e15a988403..cabd07e6db 100644 --- a/crates/apub/src/objects/mod.rs +++ b/crates/apub/src/objects/mod.rs @@ -51,54 +51,3 @@ pub(crate) fn verify_is_remote_object(id: &Url, settings: &Settings) -> Result<( Ok(()) } } - -#[cfg(test)] -pub(crate) mod tests { - use activitypub_federation::config::{Data, FederationConfig}; - use anyhow::anyhow; - use lemmy_api_common::{context::LemmyContext, request::client_builder}; - use lemmy_db_schema::{source::secret::Secret, utils::build_db_pool_for_tests}; - use lemmy_utils::{error::LemmyResult, rate_limit::RateLimitCell, settings::SETTINGS}; - use reqwest::{Request, Response}; - use reqwest_middleware::{ClientBuilder, Middleware, Next}; - use task_local_extensions::Extensions; - - struct BlockedMiddleware; - - /// A reqwest middleware which blocks all requests - #[async_trait::async_trait] - impl Middleware for BlockedMiddleware { - async fn handle( - &self, - _req: Request, - _extensions: &mut Extensions, - _next: Next<'_>, - ) -> reqwest_middleware::Result { - Err(anyhow!("Network requests not allowed").into()) - } - } - - // TODO: would be nice if we didnt have to use a full context for tests. - pub(crate) async fn init_context() -> LemmyResult> { - // call this to run migrations - let pool = build_db_pool_for_tests().await; - - let client = client_builder(&SETTINGS).build()?; - - let client = ClientBuilder::new(client).with(BlockedMiddleware).build(); - let secret = Secret { - id: 0, - jwt_secret: String::new(), - }; - - let rate_limit_cell = RateLimitCell::with_test_config(); - - let context = LemmyContext::create(pool, client, secret, rate_limit_cell.clone()); - let config = FederationConfig::builder() - .domain("example.com") - .app_data(context) - .build() - .await?; - Ok(config.to_request_data()) - } -} diff --git a/crates/apub/src/objects/person.rs b/crates/apub/src/objects/person.rs index e6ce522051..8e0d335bc2 100644 --- a/crates/apub/src/objects/person.rs +++ b/crates/apub/src/objects/person.rs @@ -20,11 +20,17 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use lemmy_api_common::{ context::LemmyContext, - utils::{generate_outbox_url, local_site_opt_to_slur_regex}, + utils::{ + generate_outbox_url, + local_site_opt_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_apub, + }, }; use lemmy_db_schema::{ source::{ activity::ActorType, + local_site::LocalSite, person::{Person as DbPerson, PersonInsertForm, PersonUpdateForm}, }, traits::{ApubActor, Crud}, @@ -144,7 +150,12 @@ impl Object for ApubPerson { ) -> Result { let instance_id = fetch_instance_actor_for_object(&person.id, context).await?; + let local_site = LocalSite::read(&mut context.pool()).await.ok(); + let slur_regex = &local_site_opt_to_slur_regex(&local_site); let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source); + let bio = process_markdown_opt(&bio, slur_regex, context).await?; + let avatar = proxy_image_link_opt_apub(person.icon.map(|i| i.url), context).await?; + let banner = proxy_image_link_opt_apub(person.image.map(|i| i.url), context).await?; // Some Mastodon users have `name: ""` (empty string), need to convert that to `None` // https://github.com/mastodon/mastodon/issues/25233 @@ -156,8 +167,8 @@ impl Object for ApubPerson { banned: None, ban_expires: None, deleted: Some(false), - avatar: person.icon.map(|i| i.url.into()), - banner: person.image.map(|i| i.url.into()), + avatar, + banner, published: person.published.map(Into::into), updated: person.updated.map(Into::into), actor_id: Some(person.id.into()), @@ -210,10 +221,7 @@ impl GetActorType for ApubPerson { pub(crate) mod tests { use super::*; use crate::{ - objects::{ - instance::{tests::parse_lemmy_instance, ApubSite}, - tests::init_context, - }, + objects::instance::{tests::parse_lemmy_instance, ApubSite}, protocol::{objects::instance::Instance, tests::file_to_json_object}, }; use activitypub_federation::fetch::object_id::ObjectId; @@ -237,7 +245,7 @@ pub(crate) mod tests { #[tokio::test] #[serial] async fn test_parse_lemmy_person() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let (person, site) = parse_lemmy_person(&context).await?; assert_eq!(person.display_name, Some("Jean-Luc Picard".to_string())); @@ -251,7 +259,7 @@ pub(crate) mod tests { #[tokio::test] #[serial] async fn test_parse_pleroma_person() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; // create and parse a fake pleroma instance actor, to avoid network request during test let mut json: Instance = file_to_json_object("assets/lemmy/objects/instance.json")?; diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index 14ec8a2454..0aa4507a4a 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -24,8 +24,14 @@ use chrono::{DateTime, Utc}; use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator}; use lemmy_api_common::{ context::LemmyContext, - request::fetch_site_data, - utils::{is_mod_or_admin, local_site_opt_to_sensitive, local_site_opt_to_slur_regex}, + request::fetch_link_metadata_opt, + utils::{ + is_mod_or_admin, + local_site_opt_to_sensitive, + local_site_opt_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_apub, + }, }; use lemmy_db_schema::{ self, @@ -40,11 +46,7 @@ use lemmy_db_schema::{ }; use lemmy_utils::{ error::LemmyError, - utils::{ - markdown::markdown_to_html, - slurs::{check_slurs_opt, remove_slurs}, - validation::check_url_scheme, - }, + utils::{markdown::markdown_to_html, slurs::check_slurs_opt, validation::check_url_scheme}, }; use std::ops::Deref; use stringreader::StringReader; @@ -111,6 +113,13 @@ impl Object for ApubPost { let community = Community::read(&mut context.pool(), community_id).await?; let language = LanguageTag::new_single(self.language_id, &mut context.pool()).await?; + let attachment = self + .url + .clone() + .map(|url| Attachment::new(url.into(), self.url_content_type.clone())) + .into_iter() + .collect(); + let page = Page { kind: PageType::Page, id: self.ap_id.clone().into(), @@ -121,7 +130,7 @@ impl Object for ApubPost { content: self.body.as_ref().map(|b| markdown_to_html(b)), media_type: Some(MediaTypeMarkdownOrHtml::Html), source: self.body.clone().map(Source::new), - attachment: self.url.clone().map(Attachment::new).into_iter().collect(), + attachment, image: self.thumbnail_url.clone().map(ImageObject::new), comments_enabled: Some(!self.locked), sensitive: Some(self.nsfw), @@ -210,33 +219,22 @@ impl Object for ApubPost { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let allow_sensitive = local_site_opt_to_sensitive(&local_site); let page_is_sensitive = page.sensitive.unwrap_or(false); - let include_image = allow_sensitive || !page_is_sensitive; + let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive; + let mut thumbnail_url = page.image.map(|i| i.url); + let do_generate_thumbnail = thumbnail_url.is_none() && allow_generate_thumbnail; - // Only fetch metadata if the post has a url and was not seen previously. We dont want to - // waste resources by fetching metadata for the same post multiple times. - // Additionally, only fetch image if content is not sensitive or is allowed on local site. - let (metadata_res, thumbnail) = match &url { - Some(url) if old_post.is_err() => { - fetch_site_data( - context.client(), - context.settings(), - Some(url), - include_image, - ) - .await - } - _ => (None, None), - }; - // If no image was included with metadata, use post image instead when available. - let thumbnail_url = thumbnail.or_else(|| page.image.map(|i| i.url.into())); + // Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it. + let metadata = fetch_link_metadata_opt(url.as_ref(), do_generate_thumbnail, context).await; + if let Some(thumbnail_url_) = metadata.thumbnail { + thumbnail_url = Some(thumbnail_url_.into()); + } + let url = proxy_image_link_opt_apub(url, context).await?; + let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?; - let (embed_title, embed_description, embed_video_url) = metadata_res - .map(|u| (u.title, u.description, u.embed_video_url)) - .unwrap_or_default(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); - let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source) - .map(|s| remove_slurs(&s, slur_regex)); + let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source); + let body = process_markdown_opt(&body, slur_regex, context).await?; let language_id = LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; @@ -252,15 +250,16 @@ impl Object for ApubPost { updated: page.updated.map(Into::into), deleted: Some(false), nsfw: page.sensitive, - embed_title, - embed_description, - embed_video_url, + embed_title: metadata.opengraph_data.title, + embed_description: metadata.opengraph_data.description, + embed_video_url: metadata.opengraph_data.embed_video_url, thumbnail_url, ap_id: Some(page.id.clone().into()), local: Some(false), language_id, featured_community: None, featured_local: None, + url_content_type: metadata.content_type, } } else { // if is mod action, only update locked/stickied fields, nothing else @@ -299,7 +298,6 @@ mod tests { instance::ApubSite, person::{tests::parse_lemmy_person, ApubPerson}, post::ApubPost, - tests::init_context, }, protocol::tests::file_to_json_object, }; @@ -311,7 +309,7 @@ mod tests { #[tokio::test] #[serial] async fn test_parse_lemmy_post() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let (person, site) = parse_lemmy_person(&context).await?; let community = parse_lemmy_community(&context).await?; @@ -335,7 +333,7 @@ mod tests { #[tokio::test] #[serial] async fn test_convert_mastodon_post_title() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let (person, site) = parse_lemmy_person(&context).await?; let community = parse_lemmy_community(&context).await?; diff --git a/crates/apub/src/objects/private_message.rs b/crates/apub/src/objects/private_message.rs index 6608d8616a..d5c00632f0 100644 --- a/crates/apub/src/objects/private_message.rs +++ b/crates/apub/src/objects/private_message.rs @@ -12,9 +12,13 @@ use activitypub_federation::{ traits::Object, }; use chrono::{DateTime, Utc}; -use lemmy_api_common::{context::LemmyContext, utils::check_person_block}; +use lemmy_api_common::{ + context::LemmyContext, + utils::{check_person_block, local_site_opt_to_slur_regex, process_markdown}, +}; use lemmy_db_schema::{ source::{ + local_site::LocalSite, person::Person, private_message::{PrivateMessage, PrivateMessageInsertForm}, }, @@ -121,7 +125,10 @@ impl Object for ApubPrivateMessage { let recipient = note.to[0].dereference(context).await?; check_person_block(creator.id, recipient.id, &mut context.pool()).await?; + let local_site = LocalSite::read(&mut context.pool()).await.ok(); + let slur_regex = &local_site_opt_to_slur_regex(&local_site); let content = read_from_string_or_source(¬e.content, &None, ¬e.source); + let content = process_markdown(&content, slur_regex, context).await?; let form = PrivateMessageInsertForm { creator_id: creator.id, @@ -146,7 +153,6 @@ mod tests { objects::{ instance::{tests::parse_lemmy_instance, ApubSite}, person::ApubPerson, - tests::init_context, }, protocol::tests::file_to_json_object, }; @@ -185,7 +191,7 @@ mod tests { #[tokio::test] #[serial] async fn test_parse_lemmy_pm() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let url = Url::parse("https://enterprise.lemmy.ml/private_message/1621")?; let data = prepare_comment_test(&url, &context).await?; let json: ChatMessage = file_to_json_object("assets/lemmy/objects/chat_message.json")?; @@ -208,7 +214,7 @@ mod tests { #[tokio::test] #[serial] async fn test_parse_pleroma_pm() -> LemmyResult<()> { - let context = init_context().await?; + let context = LemmyContext::init_test_context().await; let url = Url::parse("https://enterprise.lemmy.ml/private_message/1621")?; let data = prepare_comment_test(&url, &context).await?; let pleroma_url = Url::parse("https://queer.hacktivis.me/objects/2")?; diff --git a/crates/apub/src/protocol/objects/group.rs b/crates/apub/src/protocol/objects/group.rs index 7dce298a6d..1a85cb59fe 100644 --- a/crates/apub/src/protocol/objects/group.rs +++ b/crates/apub/src/protocol/objects/group.rs @@ -25,11 +25,6 @@ use activitypub_federation::{ }; use chrono::{DateTime, Utc}; use lemmy_api_common::{context::LemmyContext, utils::local_site_opt_to_slur_regex}; -use lemmy_db_schema::{ - newtypes::InstanceId, - source::community::{CommunityInsertForm, CommunityUpdateForm}, - utils::naive_now, -}; use lemmy_utils::{ error::LemmyError, utils::slurs::{check_slurs, check_slurs_opt}, @@ -94,64 +89,4 @@ impl Group { check_slurs_opt(&description, slur_regex)?; Ok(()) } - - pub(crate) fn into_insert_form(self, instance_id: InstanceId) -> CommunityInsertForm { - let description = read_from_string_or_source_opt(&self.summary, &None, &self.source); - - CommunityInsertForm { - name: self.preferred_username.clone(), - title: self.name.unwrap_or(self.preferred_username.clone()), - description, - removed: None, - published: self.published, - updated: self.updated, - deleted: Some(false), - nsfw: Some(self.sensitive.unwrap_or(false)), - actor_id: Some(self.id.into()), - local: Some(false), - private_key: None, - hidden: None, - public_key: self.public_key.public_key_pem, - last_refreshed_at: Some(naive_now()), - icon: self.icon.map(|i| i.url.into()), - banner: self.image.map(|i| i.url.into()), - followers_url: Some(self.followers.into()), - inbox_url: Some(self.inbox.into()), - shared_inbox_url: self.endpoints.map(|e| e.shared_inbox.into()), - moderators_url: self.attributed_to.map(Into::into), - posting_restricted_to_mods: self.posting_restricted_to_mods, - instance_id, - featured_url: self.featured.map(Into::into), - } - } - - pub(crate) fn into_update_form(self) -> CommunityUpdateForm { - CommunityUpdateForm { - title: Some(self.name.unwrap_or(self.preferred_username)), - description: Some(read_from_string_or_source_opt( - &self.summary, - &None, - &self.source, - )), - removed: None, - published: self.published.map(Into::into), - updated: Some(self.updated.map(Into::into)), - deleted: None, - nsfw: Some(self.sensitive.unwrap_or(false)), - actor_id: Some(self.id.into()), - local: None, - private_key: None, - hidden: None, - public_key: Some(self.public_key.public_key_pem), - last_refreshed_at: Some(naive_now()), - icon: Some(self.icon.map(|i| i.url.into())), - banner: Some(self.image.map(|i| i.url.into())), - followers_url: Some(self.followers.into()), - inbox_url: Some(self.inbox.into()), - shared_inbox_url: Some(self.endpoints.map(|e| e.shared_inbox.into())), - moderators_url: self.attributed_to.map(Into::into), - posting_restricted_to_mods: self.posting_restricted_to_mods, - featured_url: self.featured.map(Into::into), - } - } } diff --git a/crates/apub/src/protocol/objects/page.rs b/crates/apub/src/protocol/objects/page.rs index 6f9ec7c1f2..fbcb982549 100644 --- a/crates/apub/src/protocol/objects/page.rs +++ b/crates/apub/src/protocol/objects/page.rs @@ -20,7 +20,6 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use itertools::Itertools; use lemmy_api_common::context::LemmyContext; -use lemmy_db_schema::newtypes::DbUrl; use lemmy_utils::error::{LemmyError, LemmyErrorType}; use serde::{de::Error, Deserialize, Deserializer, Serialize}; use serde_with::skip_serializing_none; @@ -72,24 +71,25 @@ pub struct Page { #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] pub(crate) struct Link { - pub(crate) href: Url, - pub(crate) r#type: LinkType, + href: Url, + media_type: Option, + r#type: LinkType, } #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] pub(crate) struct Image { #[serde(rename = "type")] - pub(crate) kind: ImageType, - pub(crate) url: Url, + kind: ImageType, + url: Url, } #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] pub(crate) struct Document { #[serde(rename = "type")] - pub(crate) kind: DocumentType, - pub(crate) url: Url, + kind: DocumentType, + url: Url, } #[derive(Clone, Debug, Deserialize, Serialize)] @@ -167,11 +167,21 @@ impl Page { } impl Attachment { - pub(crate) fn new(url: DbUrl) -> Attachment { - Attachment::Link(Link { - href: url.into(), - r#type: Default::default(), - }) + /// Creates new attachment for a given link and mime type. + pub(crate) fn new(url: Url, media_type: Option) -> Attachment { + let is_image = media_type.clone().unwrap_or_default().starts_with("image"); + if is_image { + Attachment::Image(Image { + kind: Default::default(), + url, + }) + } else { + Attachment::Link(Link { + href: url, + media_type, + r#type: Default::default(), + }) + } } } diff --git a/crates/db_schema/src/impls/image_upload.rs b/crates/db_schema/src/impls/image_upload.rs deleted file mode 100644 index b62e5ceba6..0000000000 --- a/crates/db_schema/src/impls/image_upload.rs +++ /dev/null @@ -1,35 +0,0 @@ -use crate::{ - newtypes::LocalUserId, - schema::image_upload::dsl::{image_upload, local_user_id}, - source::image_upload::{ImageUpload, ImageUploadForm}, - utils::{get_conn, DbPool}, -}; -use diesel::{insert_into, result::Error, ExpressionMethods, QueryDsl, Table}; -use diesel_async::RunQueryDsl; - -impl ImageUpload { - pub async fn create(pool: &mut DbPool<'_>, form: &ImageUploadForm) -> Result { - let conn = &mut get_conn(pool).await?; - insert_into(image_upload) - .values(form) - .get_result::(conn) - .await - } - - pub async fn get_all_by_local_user_id( - pool: &mut DbPool<'_>, - user_id: &LocalUserId, - ) -> Result, Error> { - let conn = &mut get_conn(pool).await?; - image_upload - .filter(local_user_id.eq(user_id)) - .select(image_upload::all_columns()) - .load::(conn) - .await - } - - pub async fn delete_by_alias(pool: &mut DbPool<'_>, alias: &str) -> Result { - let conn = &mut get_conn(pool).await?; - diesel::delete(image_upload.find(alias)).execute(conn).await - } -} diff --git a/crates/db_schema/src/impls/images.rs b/crates/db_schema/src/impls/images.rs new file mode 100644 index 0000000000..a5982bd98d --- /dev/null +++ b/crates/db_schema/src/impls/images.rs @@ -0,0 +1,78 @@ +use crate::{ + newtypes::{DbUrl, LocalUserId}, + schema::{ + local_image::dsl::{local_image, local_user_id, pictrs_alias}, + remote_image::dsl::{link, remote_image}, + }, + source::images::{LocalImage, LocalImageForm, RemoteImage, RemoteImageForm}, + utils::{get_conn, DbPool}, +}; +use diesel::{ + dsl::exists, + insert_into, + result::Error, + select, + ExpressionMethods, + NotFound, + QueryDsl, + Table, +}; +use diesel_async::RunQueryDsl; +use url::Url; + +impl LocalImage { + pub async fn create(pool: &mut DbPool<'_>, form: &LocalImageForm) -> Result { + let conn = &mut get_conn(pool).await?; + insert_into(local_image) + .values(form) + .get_result::(conn) + .await + } + + pub async fn get_all_by_local_user_id( + pool: &mut DbPool<'_>, + user_id: &LocalUserId, + ) -> Result, Error> { + let conn = &mut get_conn(pool).await?; + local_image + .filter(local_user_id.eq(user_id)) + .select(local_image::all_columns()) + .load::(conn) + .await + } + + pub async fn delete_by_alias(pool: &mut DbPool<'_>, alias: &str) -> Result { + let conn = &mut get_conn(pool).await?; + diesel::delete(local_image.filter(pictrs_alias.eq(alias))) + .execute(conn) + .await + } +} + +impl RemoteImage { + pub async fn create(pool: &mut DbPool<'_>, links: Vec) -> Result { + let conn = &mut get_conn(pool).await?; + let forms = links + .into_iter() + .map(|url| RemoteImageForm { link: url.into() }) + .collect::>(); + insert_into(remote_image) + .values(forms) + .on_conflict_do_nothing() + .execute(conn) + .await + } + + pub async fn validate(pool: &mut DbPool<'_>, link_: DbUrl) -> Result<(), Error> { + let conn = &mut get_conn(pool).await?; + + let exists = select(exists(remote_image.filter((link).eq(link_)))) + .get_result::(conn) + .await?; + if exists { + Ok(()) + } else { + Err(NotFound) + } + } +} diff --git a/crates/db_schema/src/impls/mod.rs b/crates/db_schema/src/impls/mod.rs index cf75750ca0..6d4549b14c 100644 --- a/crates/db_schema/src/impls/mod.rs +++ b/crates/db_schema/src/impls/mod.rs @@ -11,7 +11,7 @@ pub mod email_verification; pub mod federation_allowlist; pub mod federation_blocklist; pub mod federation_queue_state; -pub mod image_upload; +pub mod images; pub mod instance; pub mod instance_block; pub mod language; diff --git a/crates/db_schema/src/impls/post.rs b/crates/db_schema/src/impls/post.rs index daa5e71dc8..f49af62269 100644 --- a/crates/db_schema/src/impls/post.rs +++ b/crates/db_schema/src/impls/post.rs @@ -434,6 +434,7 @@ mod tests { language_id: Default::default(), featured_community: false, featured_local: false, + url_content_type: None, }; // Post Like diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index daea8ded10..f709d159be 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -301,15 +301,6 @@ diesel::table! { } } -diesel::table! { - image_upload (pictrs_alias) { - local_user_id -> Int4, - pictrs_alias -> Text, - pictrs_delete_token -> Text, - published -> Timestamptz, - } -} - diesel::table! { instance (id) { id -> Int4, @@ -341,6 +332,15 @@ diesel::table! { } } +diesel::table! { + local_image (pictrs_alias) { + local_user_id -> Int4, + pictrs_alias -> Text, + pictrs_delete_token -> Text, + published -> Timestamptz, + } +} + diesel::table! { use diesel::sql_types::*; use super::sql_types::ListingTypeEnum; @@ -692,6 +692,7 @@ diesel::table! { language_id -> Int4, featured_community -> Bool, featured_local -> Bool, + url_content_type -> Nullable, } } @@ -807,6 +808,14 @@ diesel::table! { } } +diesel::table! { + remote_image (id) { + id -> Int4, + link -> Text, + published -> Timestamptz, + } +} + diesel::table! { secret (id) { id -> Int4, @@ -922,9 +931,9 @@ diesel::joinable!(email_verification -> local_user (local_user_id)); diesel::joinable!(federation_allowlist -> instance (instance_id)); diesel::joinable!(federation_blocklist -> instance (instance_id)); diesel::joinable!(federation_queue_state -> instance (instance_id)); -diesel::joinable!(image_upload -> local_user (local_user_id)); diesel::joinable!(instance_block -> instance (instance_id)); diesel::joinable!(instance_block -> person (person_id)); +diesel::joinable!(local_image -> local_user (local_user_id)); diesel::joinable!(local_site -> site (site_id)); diesel::joinable!(local_site_rate_limit -> local_site (local_site_id)); diesel::joinable!(local_user -> person (person_id)); @@ -1002,10 +1011,10 @@ diesel::allow_tables_to_appear_in_same_query!( federation_allowlist, federation_blocklist, federation_queue_state, - image_upload, instance, instance_block, language, + local_image, local_site, local_site_rate_limit, local_user, @@ -1040,6 +1049,7 @@ diesel::allow_tables_to_appear_in_same_query!( private_message_report, received_activity, registration_application, + remote_image, secret, sent_activity, site, diff --git a/crates/db_schema/src/source/images.rs b/crates/db_schema/src/source/images.rs new file mode 100644 index 0000000000..f8befb856a --- /dev/null +++ b/crates/db_schema/src/source/images.rs @@ -0,0 +1,50 @@ +use crate::newtypes::{DbUrl, LocalUserId}; +#[cfg(feature = "full")] +use crate::schema::{local_image, remote_image}; +use chrono::{DateTime, Utc}; +use serde_with::skip_serializing_none; +use std::fmt::Debug; +use typed_builder::TypedBuilder; + +#[skip_serializing_none] +#[derive(PartialEq, Eq, Debug, Clone)] +#[cfg_attr(feature = "full", derive(Queryable, Associations))] +#[cfg_attr(feature = "full", diesel(table_name = local_image))] +#[cfg_attr( + feature = "full", + diesel(belongs_to(crate::source::local_user::LocalUser)) +)] +pub struct LocalImage { + pub local_user_id: LocalUserId, + pub pictrs_alias: String, + pub pictrs_delete_token: String, + pub published: DateTime, +} + +#[derive(Debug, Clone, TypedBuilder)] +#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))] +#[cfg_attr(feature = "full", diesel(table_name = local_image))] +pub struct LocalImageForm { + pub local_user_id: LocalUserId, + pub pictrs_alias: String, + pub pictrs_delete_token: String, +} + +/// Stores all images which are hosted on remote domains. When attempting to proxy an image, it +/// is checked against this table to avoid Lemmy being used as a general purpose proxy. +#[skip_serializing_none] +#[derive(PartialEq, Eq, Debug, Clone)] +#[cfg_attr(feature = "full", derive(Queryable, Identifiable))] +#[cfg_attr(feature = "full", diesel(table_name = remote_image))] +pub struct RemoteImage { + pub id: i32, + pub link: DbUrl, + pub published: DateTime, +} + +#[derive(Debug, Clone, TypedBuilder)] +#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))] +#[cfg_attr(feature = "full", diesel(table_name = remote_image))] +pub struct RemoteImageForm { + pub link: DbUrl, +} diff --git a/crates/db_schema/src/source/mod.rs b/crates/db_schema/src/source/mod.rs index 814318848d..9a6e4941a3 100644 --- a/crates/db_schema/src/source/mod.rs +++ b/crates/db_schema/src/source/mod.rs @@ -16,7 +16,7 @@ pub mod email_verification; pub mod federation_allowlist; pub mod federation_blocklist; pub mod federation_queue_state; -pub mod image_upload; +pub mod images; pub mod instance; pub mod instance_block; pub mod language; diff --git a/crates/db_schema/src/source/post.rs b/crates/db_schema/src/source/post.rs index 3b22a737ac..4ac3e2a659 100644 --- a/crates/db_schema/src/source/post.rs +++ b/crates/db_schema/src/source/post.rs @@ -55,6 +55,7 @@ pub struct Post { pub featured_community: bool, /// Whether the post is featured to its site. pub featured_local: bool, + pub url_content_type: Option, } #[derive(Debug, Clone, TypedBuilder)] @@ -85,6 +86,7 @@ pub struct PostInsertForm { pub language_id: Option, pub featured_community: Option, pub featured_local: Option, + pub url_content_type: Option, } #[derive(Debug, Clone, Default)] @@ -109,6 +111,7 @@ pub struct PostUpdateForm { pub language_id: Option, pub featured_community: Option, pub featured_local: Option, + pub url_content_type: Option, } #[derive(PartialEq, Eq, Debug)] diff --git a/crates/db_views/src/comment_view.rs b/crates/db_views/src/comment_view.rs index 62f6634c1a..61f8c82e83 100644 --- a/crates/db_views/src/comment_view.rs +++ b/crates/db_views/src/comment_view.rs @@ -1015,6 +1015,7 @@ mod tests { language_id: Default::default(), featured_community: false, featured_local: false, + url_content_type: None, }, community: Community { id: data.inserted_community.id, diff --git a/crates/db_views/src/post_view.rs b/crates/db_views/src/post_view.rs index f91e768cbe..abeb7398cd 100644 --- a/crates/db_views/src/post_view.rs +++ b/crates/db_views/src/post_view.rs @@ -1468,6 +1468,7 @@ mod tests { language_id: LanguageId(47), featured_community: false, featured_local: false, + url_content_type: None, }, my_vote: None, unread_comments: 0, diff --git a/crates/routes/Cargo.toml b/crates/routes/Cargo.toml index 365bdb7a0d..d70014678d 100644 --- a/crates/routes/Cargo.toml +++ b/crates/routes/Cargo.toml @@ -33,4 +33,5 @@ url = { workspace = true } once_cell = { workspace = true } tracing = { workspace = true } tokio = { workspace = true } +urlencoding = { workspace = true } rss = "2.0.7" diff --git a/crates/routes/src/images.rs b/crates/routes/src/images.rs index 16b388ca6f..f40b3c10c5 100644 --- a/crates/routes/src/images.rs +++ b/crates/routes/src/images.rs @@ -6,6 +6,7 @@ use actix_web::{ StatusCode, }, web, + web::Query, Error, HttpRequest, HttpResponse, @@ -13,15 +14,17 @@ use actix_web::{ use futures::stream::{Stream, StreamExt}; use lemmy_api_common::context::LemmyContext; use lemmy_db_schema::source::{ - image_upload::{ImageUpload, ImageUploadForm}, + images::{LocalImage, LocalImageForm, RemoteImage}, local_site::LocalSite, }; use lemmy_db_views::structs::LocalUserView; -use lemmy_utils::{rate_limit::RateLimitCell, REQWEST_TIMEOUT}; +use lemmy_utils::{error::LemmyResult, rate_limit::RateLimitCell, REQWEST_TIMEOUT}; use reqwest::Body; use reqwest_middleware::{ClientWithMiddleware, RequestBuilder}; use serde::{Deserialize, Serialize}; use std::time::Duration; +use url::Url; +use urlencoding::decode; pub fn config( cfg: &mut web::ServiceConfig, @@ -87,13 +90,14 @@ async fn upload( body: web::Payload, // require login local_user_view: LocalUserView, + client: web::Data, context: web::Data, ) -> Result { // TODO: check rate limit here let pictrs_config = context.settings().pictrs_config()?; let image_url = format!("{}image", pictrs_config.url); - let mut client_req = adapt_request(&req, context.client(), image_url); + let mut client_req = adapt_request(&req, &client, image_url); if let Some(addr) = req.head().peer_addr { client_req = client_req.header("X-Forwarded-For", addr.to_string()) @@ -109,12 +113,12 @@ async fn upload( let images = res.json::().await.map_err(error::ErrorBadRequest)?; if let Some(images) = &images.files { for uploaded_image in images { - let form = ImageUploadForm { + let form = LocalImageForm { local_user_id: local_user_view.local_user.id, pictrs_alias: uploaded_image.file.to_string(), pictrs_delete_token: uploaded_image.delete_token.to_string(), }; - ImageUpload::create(&mut context.pool(), &form) + LocalImage::create(&mut context.pool(), &form) .await .map_err(error::ErrorBadRequest)?; } @@ -158,15 +162,15 @@ async fn full_res( url }; - image(url, req, client).await + image(url, req, &client).await } async fn image( url: String, req: HttpRequest, - client: web::Data, + client: &ClientWithMiddleware, ) -> Result { - let mut client_req = adapt_request(&req, &client, url); + let mut client_req = adapt_request(&req, client, url); if let Some(addr) = req.head().peer_addr { client_req = client_req.header("X-Forwarded-For", addr.to_string()); @@ -212,13 +216,35 @@ async fn delete( let res = client_req.send().await.map_err(error::ErrorBadRequest)?; - ImageUpload::delete_by_alias(&mut context.pool(), &file) + LocalImage::delete_by_alias(&mut context.pool(), &file) .await .map_err(error::ErrorBadRequest)?; Ok(HttpResponse::build(res.status()).body(BodyStream::new(res.bytes_stream()))) } +#[derive(Deserialize)] +pub struct ImageProxyParams { + url: String, +} + +pub async fn image_proxy( + Query(params): Query, + context: web::Data, +) -> LemmyResult { + let url = Url::parse(&decode(¶ms.url)?)?; + + // Check that url corresponds to a federated image so that this can't be abused as a proxy + // for arbitrary purposes. + RemoteImage::validate(&mut context.pool(), url.clone().into()).await?; + + let pictrs_config = context.settings().pictrs_config()?; + let url = format!("{}image/original?proxy={}", pictrs_config.url, ¶ms.url); + let image_response = context.client().get(url).send().await?; + + Ok(HttpResponse::Ok().streaming(image_response.bytes_stream())) +} + fn make_send(mut stream: S) -> impl Stream + Send + Unpin + 'static where S: Stream + Unpin + 'static, diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 5392191491..e61d92b78e 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -39,8 +39,8 @@ http = { workspace = true } doku = { workspace = true, features = ["url-2"] } uuid = { workspace = true, features = ["serde", "v4"] } rosetta-i18n = { workspace = true } -percent-encoding = { workspace = true } tokio = { workspace = true } +urlencoding = { workspace = true } openssl = "0.10.63" html2text = "0.6.0" deser-hjson = "2.2.4" diff --git a/crates/utils/src/settings/mod.rs b/crates/utils/src/settings/mod.rs index 25aa7206d1..4642a67cfc 100644 --- a/crates/utils/src/settings/mod.rs +++ b/crates/utils/src/settings/mod.rs @@ -6,12 +6,13 @@ use crate::{ use anyhow::{anyhow, Context}; use deser_hjson::from_str; use once_cell::sync::Lazy; -use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use regex::Regex; use std::{env, fs, io::Error}; +use urlencoding::encode; pub mod structs; +use crate::settings::structs::PictrsImageMode; use structs::DatabaseConnection; static DEFAULT_CONFIG_FILE: &str = "config/config.hjson"; @@ -53,11 +54,11 @@ impl Settings { DatabaseConnection::Parts(parts) => { format!( "postgres://{}:{}@{}:{}/{}", - utf8_percent_encode(&parts.user, NON_ALPHANUMERIC), - utf8_percent_encode(&parts.password, NON_ALPHANUMERIC), + encode(&parts.user), + encode(&parts.password), parts.host, parts.port, - utf8_percent_encode(&parts.database, NON_ALPHANUMERIC), + encode(&parts.database), ) } } @@ -112,3 +113,17 @@ impl Settings { .ok_or_else(|| anyhow!("images_disabled").into()) } } + +impl PictrsConfig { + pub fn image_mode(&self) -> PictrsImageMode { + if let Some(cache_external_link_previews) = self.cache_external_link_previews { + if cache_external_link_previews { + PictrsImageMode::StoreLinkPreviews + } else { + PictrsImageMode::None + } + } else { + self.image_mode.clone() + } + } +} diff --git a/crates/utils/src/settings/structs.rs b/crates/utils/src/settings/structs.rs index c4ff315ae3..46e9b747c9 100644 --- a/crates/utils/src/settings/structs.rs +++ b/crates/utils/src/settings/structs.rs @@ -12,7 +12,6 @@ pub struct Settings { /// settings related to the postgresql database #[default(Default::default())] pub database: DatabaseConfig, - /// Settings related to activitypub federation /// Pictrs image server configuration. #[default(Some(Default::default()))] pub(crate) pictrs: Option, @@ -79,22 +78,43 @@ pub struct PictrsConfig { #[default(None)] pub api_key: Option, - /// By default the thumbnails for external links are stored in pict-rs. This ensures that they - /// can be reliably retrieved and can be resized using pict-rs APIs. However it also increases - /// storage usage. In case this is disabled, the Opengraph image is directly returned as - /// thumbnail. + /// Backwards compatibility with 0.18.1. False is equivalent to `image_mode: None`, true is + /// equivalent to `image_mode: StoreLinkPreviews`. /// - /// In some countries it is forbidden to copy preview images from newspaper articles and only - /// hotlinking is allowed. If that is the case for your instance, make sure that this setting is - /// disabled. - #[default(true)] - pub cache_external_link_previews: bool, + /// To be removed in 0.20 + pub(super) cache_external_link_previews: Option, - /// Timeout for uploading images to pictrs (in seconds) + /// Specifies how to handle remote images, so that users don't have to connect directly to remote servers. + #[default(PictrsImageMode::StoreLinkPreviews)] + pub(super) image_mode: PictrsImageMode, + + /// Timeout for uploading images to pictrs (in seconds) #[default(30)] pub upload_timeout: u64, } +#[derive(Debug, Deserialize, Serialize, Clone, SmartDefault, Document, PartialEq)] +#[serde(deny_unknown_fields)] +pub enum PictrsImageMode { + /// Leave images unchanged, don't generate any local thumbnails for post urls. Instead the the + /// Opengraph image is directly returned as thumbnail + None, + /// Generate thumbnails for external post urls and store them persistently in pict-rs. This + /// ensures that they can be reliably retrieved and can be resized using pict-rs APIs. However + /// it also increases storage usage. + /// + /// This is the default behaviour, and also matches Lemmy 0.18. + #[default] + StoreLinkPreviews, + /// If enabled, all images from remote domains are rewritten to pass through `/api/v3/image_proxy`, + /// including embedded images in markdown. Images are stored temporarily in pict-rs for caching. + /// This improves privacy as users don't expose their IP to untrusted servers, and decreases load + /// on other servers. However it increases bandwidth use for the local server. + /// + /// Requires pict-rs 0.5 + ProxyAllImages, +} + #[derive(Debug, Deserialize, Serialize, Clone, SmartDefault, Document)] #[serde(default)] pub struct DatabaseConfig { diff --git a/crates/utils/src/utils/markdown.rs b/crates/utils/src/utils/markdown.rs deleted file mode 100644 index 1f2884e1fc..0000000000 --- a/crates/utils/src/utils/markdown.rs +++ /dev/null @@ -1,113 +0,0 @@ -use markdown_it::MarkdownIt; -use once_cell::sync::Lazy; - -mod spoiler_rule; - -static MARKDOWN_PARSER: Lazy = Lazy::new(|| { - let mut parser = MarkdownIt::new(); - markdown_it::plugins::cmark::add(&mut parser); - markdown_it::plugins::extra::add(&mut parser); - spoiler_rule::add(&mut parser); - - parser -}); - -/// Replace special HTML characters in API parameters to prevent XSS attacks. -/// -/// Taken from https://github.com/OWASP/CheatSheetSeries/blob/master/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.md#output-encoding-for-html-contexts -/// -/// `>` is left in place because it is interpreted as markdown quote. -pub fn sanitize_html(text: &str) -> String { - text - .replace('&', "&") - .replace('<', "<") - .replace('\"', """) - .replace('\'', "'") -} - -/// Converts text from markdown to HTML, while escaping special characters. -pub fn markdown_to_html(text: &str) -> String { - MARKDOWN_PARSER.parse(text).xrender() -} - -#[cfg(test)] -mod tests { - #![allow(clippy::unwrap_used)] - #![allow(clippy::indexing_slicing)] - - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn test_basic_markdown() { - let tests: Vec<_> = vec![ - ( - "headings", - "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", - "

h1

\n

h2

\n

h3

\n

h4

\n
h5
\n
h6
\n" - ), - ( - "line breaks", - "First\rSecond", - "

First\nSecond

\n"), - ( - "emphasis", - "__bold__ **bold** *italic* ***bold+italic***", - "

bold bold italic bold+italic

\n" - ), - ( - "blockquotes", - "> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n", - "
\n

Hello

\n
    \n
  • Hola
  • \n
  • 안영
  • \n
\n
\n

Goodbye

\n
\n
\n" - ), - ( - "lists (ordered, unordered)", - "1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen", - "
    \n
  1. pen
  2. \n
  3. apple
  4. \n
  5. apple pen
  6. \n
\n
    \n
  • pen
  • \n
  • pineapple
  • \n
  • pineapple pen
  • \n
\n" - ), - ( - "code and code blocks", - "this is my amazing `code snippet` and my amazing ```code block```", - "

this is my amazing code snippet and my amazing code block

\n" - ), - ( - "links", - "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", - "

Lemmy

\n" - ), - ( - "images", - "![My linked image](https://image.com \"image alt text\")", - "

\"My

\n" - ), - // Ensure any custom plugins are added to 'MARKDOWN_PARSER' implementation. - ( - "basic spoiler", - "::: spoiler click to see more\nhow spicy!\n:::\n", - "
click to see more

how spicy!\n

\n" - ), - ( - "escape html special chars", - " hello &\"", - "

<script>alert(‘xss’);</script> hello &"

\n" - ) - ]; - - tests.iter().for_each(|&(msg, input, expected)| { - let result = markdown_to_html(input); - - assert_eq!( - result, expected, - "Testing {}, with original input '{}'", - msg, input - ); - }); - } - - #[test] - fn test_sanitize_html() { - let sanitized = sanitize_html(" hello &\"'"); - let expected = "<script>alert('xss');</script> hello &"'"; - assert_eq!(expected, sanitized) - } -} diff --git a/crates/utils/src/utils/markdown/link_rule.rs b/crates/utils/src/utils/markdown/link_rule.rs new file mode 100644 index 0000000000..15edcd7b1d --- /dev/null +++ b/crates/utils/src/utils/markdown/link_rule.rs @@ -0,0 +1,38 @@ +use markdown_it::{generics::inline::full_link, MarkdownIt, Node, NodeValue, Renderer}; + +/// Renders markdown links. Copied directly from markdown-it source, unlike original code it also +/// sets `rel=nofollow` attribute. +/// +/// TODO: We can set nofollow only if post was not made by mod/admin, but then we have to construct +/// new parser for every invocation which might have performance implications. +/// https://github.com/markdown-it-rust/markdown-it/blob/master/src/plugins/cmark/inline/link.rs +#[derive(Debug)] +pub struct Link { + pub url: String, + pub title: Option, +} + +impl NodeValue for Link { + fn render(&self, node: &Node, fmt: &mut dyn Renderer) { + let mut attrs = node.attrs.clone(); + attrs.push(("href", self.url.clone())); + attrs.push(("rel", "nofollow".to_string())); + + if let Some(title) = &self.title { + attrs.push(("title", title.clone())); + } + + fmt.open("a", &attrs); + fmt.contents(&node.children); + fmt.close("a"); + } +} + +pub fn add(md: &mut MarkdownIt) { + full_link::add::(md, |href, title| { + Node::new(Link { + url: href.unwrap_or_default(), + title, + }) + }); +} diff --git a/crates/utils/src/utils/markdown/mod.rs b/crates/utils/src/utils/markdown/mod.rs new file mode 100644 index 0000000000..bee2dcb94b --- /dev/null +++ b/crates/utils/src/utils/markdown/mod.rs @@ -0,0 +1,246 @@ +use crate::settings::SETTINGS; +use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt}; +use once_cell::sync::Lazy; +use url::Url; +use urlencoding::encode; + +mod link_rule; +mod spoiler_rule; + +static MARKDOWN_PARSER: Lazy = Lazy::new(|| { + let mut parser = MarkdownIt::new(); + markdown_it::plugins::cmark::add(&mut parser); + markdown_it::plugins::extra::add(&mut parser); + spoiler_rule::add(&mut parser); + link_rule::add(&mut parser); + + parser +}); + +/// Replace special HTML characters in API parameters to prevent XSS attacks. +/// +/// Taken from https://github.com/OWASP/CheatSheetSeries/blob/master/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.md#output-encoding-for-html-contexts +/// +/// `>` is left in place because it is interpreted as markdown quote. +pub fn sanitize_html(text: &str) -> String { + text + .replace('&', "&") + .replace('<', "<") + .replace('\"', """) + .replace('\'', "'") +} + +pub fn markdown_to_html(text: &str) -> String { + MARKDOWN_PARSER.parse(text).xrender() +} + +/// Rewrites all links to remote domains in markdown, so they go through `/api/v3/image_proxy`. +pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec) { + let ast = MARKDOWN_PARSER.parse(&src); + let mut links_offsets = vec![]; + + // Walk the syntax tree to find positions of image links + ast.walk(|node, _depth| { + if let Some(image) = node.cast::() { + // srcmap is always present for image + // https://github.com/markdown-it-rust/markdown-it/issues/36#issuecomment-1777844387 + let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets(); + // necessary for custom emojis which look like `![name](url "title")` + let start_offset = node_offsets.1 + - image.url.len() + - 1 + - image + .title + .as_ref() + .map(|t| t.len() + 3) + .unwrap_or_default(); + let end_offset = node_offsets.1 - 1; + + links_offsets.push((start_offset, end_offset)); + } + }); + + let mut links = vec![]; + // Go through the collected links in reverse order + while let Some((start, end)) = links_offsets.pop() { + let content = src.get(start..end).unwrap_or_default(); + // necessary for custom emojis which look like `![name](url "title")` + let (url, extra) = if content.contains(' ') { + let split = content.split_once(' ').expect("split is valid"); + (split.0, Some(split.1)) + } else { + (content, None) + }; + match Url::parse(url) { + Ok(parsed) => { + links.push(parsed.clone()); + // If link points to remote domain, replace with proxied link + if parsed.domain() != Some(&SETTINGS.hostname) { + let mut proxied = format!( + "{}/api/v3/image_proxy?url={}", + SETTINGS.get_protocol_and_hostname(), + encode(url), + ); + // restore custom emoji format + if let Some(extra) = extra { + proxied = format!("{proxied} {extra}"); + } + src.replace_range(start..end, &proxied); + } + } + Err(_) => { + // If its not a valid url, replace with empty text + src.replace_range(start..end, ""); + } + } + } + + (src, links) +} + +#[cfg(test)] +mod tests { + #![allow(clippy::unwrap_used)] + #![allow(clippy::indexing_slicing)] + + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_basic_markdown() { + let tests: Vec<_> = vec![ + ( + "headings", + "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", + "

h1

\n

h2

\n

h3

\n

h4

\n
h5
\n
h6
\n" + ), + ( + "line breaks", + "First\rSecond", + "

First\nSecond

\n"), + ( + "emphasis", + "__bold__ **bold** *italic* ***bold+italic***", + "

bold bold italic bold+italic

\n" + ), + ( + "blockquotes", + "> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n", + "
\n

Hello

\n
    \n
  • Hola
  • \n
  • 안영
  • \n
\n
\n

Goodbye

\n
\n
\n" + ), + ( + "lists (ordered, unordered)", + "1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen", + "
    \n
  1. pen
  2. \n
  3. apple
  4. \n
  5. apple pen
  6. \n
\n
    \n
  • pen
  • \n
  • pineapple
  • \n
  • pineapple pen
  • \n
\n" + ), + ( + "code and code blocks", + "this is my amazing `code snippet` and my amazing ```code block```", + "

this is my amazing code snippet and my amazing code block

\n" + ), + // Links with added nofollow attribute + ( + "links", + "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", + "

Lemmy

\n" + ), + // Remote images with proxy + ( + "images", + "![My linked image](https://example.com/image.png \"image alt text\")", + "

\"My

\n" + ), + // Local images without proxy + ( + "images", + "![My linked image](https://lemmy-alpha/image.png \"image alt text\")", + "

\"My

\n" + ), + // Ensure spoiler plugin is added + ( + "basic spoiler", + "::: spoiler click to see more\nhow spicy!\n:::\n", + "
click to see more

how spicy!\n

\n" + ), + ( + "escape html special chars", + " hello &\"", + "

<script>alert(‘xss’);</script> hello &"

\n" + ) + ]; + + tests.iter().for_each(|&(msg, input, expected)| { + let result = markdown_to_html(input); + + assert_eq!( + result, expected, + "Testing {}, with original input '{}'", + msg, input + ); + }); + } + + #[test] + fn test_markdown_proxy_images() { + let tests: Vec<_> = + vec![ + ( + "remote image proxied", + "![link](http://example.com/image.jpg)", + "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", + ), + ( + "local image unproxied", + "![link](http://lemmy-alpha/image.jpg)", + "![link](http://lemmy-alpha/image.jpg)", + ), + ( + "multiple image links", + "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", + "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", + ), + ( + "empty link handled", + "![image]()", + "![image]()" + ), + ( + "empty label handled", + "![](http://example.com/image.jpg)", + "![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" + ), + ( + "invalid image link removed", + "![image](http-not-a-link)", + "![image]()" + ), + ( + "label with nested markdown handled", + "![a *b* c](http://example.com/image.jpg)", + "![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" + ), + ( + "custom emoji support", + r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, + r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# + ) + ]; + + tests.iter().for_each(|&(msg, input, expected)| { + let result = markdown_rewrite_image_links(input.to_string()); + + assert_eq!( + result.0, expected, + "Testing {}, with original input '{}'", + msg, input + ); + }); + } + + #[test] + fn test_sanitize_html() { + let sanitized = sanitize_html(" hello &\"'"); + let expected = "<script>alert('xss');</script> hello &"'"; + assert_eq!(expected, sanitized) + } +} diff --git a/docker/federation/lemmy_epsilon.hjson b/docker/federation/lemmy_epsilon.hjson index 17fe2f6356..c24baa9f80 100644 --- a/docker/federation/lemmy_epsilon.hjson +++ b/docker/federation/lemmy_epsilon.hjson @@ -10,4 +10,8 @@ database: { host: postgres_epsilon } + pictrs: { + api_key: "my-pictrs-key" + image_mode: ProxyAllImages + } } diff --git a/docker/federation/lemmy_gamma.hjson b/docker/federation/lemmy_gamma.hjson index 2041c98409..d7e5b60656 100644 --- a/docker/federation/lemmy_gamma.hjson +++ b/docker/federation/lemmy_gamma.hjson @@ -10,4 +10,8 @@ database: { host: postgres_gamma } + pictrs: { + api_key: "my-pictrs-key" + image_mode: ProxyAllImages + } } diff --git a/docker/lemmy.hjson b/docker/lemmy.hjson index c4308b034c..1446d3bb2a 100644 --- a/docker/lemmy.hjson +++ b/docker/lemmy.hjson @@ -21,6 +21,7 @@ pictrs: { url: "http://pictrs:8080/" # api_key: "API_KEY" + image_proxy: true cache_external_link_previews: true } diff --git a/migrations/2023-10-24-131607_proxy_links/down.sql b/migrations/2023-10-24-131607_proxy_links/down.sql new file mode 100644 index 0000000000..66a961ed3d --- /dev/null +++ b/migrations/2023-10-24-131607_proxy_links/down.sql @@ -0,0 +1,4 @@ +DROP TABLE remote_image; + +ALTER TABLE local_image RENAME TO image_upload; + diff --git a/migrations/2023-10-24-131607_proxy_links/up.sql b/migrations/2023-10-24-131607_proxy_links/up.sql new file mode 100644 index 0000000000..80e1822f31 --- /dev/null +++ b/migrations/2023-10-24-131607_proxy_links/up.sql @@ -0,0 +1,8 @@ +CREATE TABLE remote_image ( + id serial PRIMARY KEY, + link text NOT NULL UNIQUE, + published timestamptz DEFAULT now() NOT NULL +); + +ALTER TABLE image_upload RENAME TO local_image; + diff --git a/migrations/2023-10-27-142514_post_url_content_type/down.sql b/migrations/2023-10-27-142514_post_url_content_type/down.sql new file mode 100644 index 0000000000..df7e2c3bb3 --- /dev/null +++ b/migrations/2023-10-27-142514_post_url_content_type/down.sql @@ -0,0 +1,3 @@ +ALTER TABLE post + DROP COLUMN url_content_type; + diff --git a/migrations/2023-10-27-142514_post_url_content_type/up.sql b/migrations/2023-10-27-142514_post_url_content_type/up.sql new file mode 100644 index 0000000000..ce78109f01 --- /dev/null +++ b/migrations/2023-10-27-142514_post_url_content_type/up.sql @@ -0,0 +1,3 @@ +ALTER TABLE post + ADD COLUMN url_content_type text; + diff --git a/src/api_routes_http.rs b/src/api_routes_http.rs index 018a445b1b..912dcfbf95 100644 --- a/src/api_routes_http.rs +++ b/src/api_routes_http.rs @@ -130,11 +130,13 @@ use lemmy_apub::api::{ search::search, user_settings_backup::{export_settings, import_settings}, }; +use lemmy_routes::images::image_proxy; use lemmy_utils::rate_limit::RateLimitCell; pub fn config(cfg: &mut web::ServiceConfig, rate_limit: &RateLimitCell) { cfg.service( web::scope("/api/v3") + .route("/image_proxy", web::get().to(image_proxy)) // Site .service( web::scope("/site") diff --git a/src/lib.rs b/src/lib.rs index 7b7436e65d..1b2507f4e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,7 +53,7 @@ use lemmy_utils::{ }; use prometheus::default_registry; use prometheus_metrics::serve_prometheus; -use reqwest_middleware::{ClientBuilder, ClientWithMiddleware}; +use reqwest_middleware::ClientBuilder; use reqwest_tracing::TracingMiddleware; use serde_json::json; use std::{env, ops::Deref}; @@ -198,15 +198,10 @@ pub async fn start_lemmy_server(args: CmdArgs) -> Result<(), LemmyError> { startup_server_handle.stop(true).await; } - // Pictrs cannot use proxy - let pictrs_client = ClientBuilder::new(client_builder(&SETTINGS).no_proxy().build()?) - .with(TracingMiddleware::default()) - .build(); Some(create_http_server( federation_config.clone(), SETTINGS.clone(), federation_enabled, - pictrs_client, )?) } else { None @@ -272,7 +267,6 @@ fn create_http_server( federation_config: FederationConfig, settings: Settings, federation_enabled: bool, - pictrs_client: ClientWithMiddleware, ) -> Result { // this must come before the HttpServer creation // creates a middleware that populates http metrics for each path, method, and status code @@ -284,6 +278,11 @@ fn create_http_server( let context: LemmyContext = federation_config.deref().clone(); let rate_limit_cell = federation_config.rate_limit_cell().clone(); + // Pictrs cannot use proxy + let pictrs_client = ClientBuilder::new(client_builder(&SETTINGS).no_proxy().build()?) + .with(TracingMiddleware::default()) + .build(); + // Create Http server let bind = (settings.bind, settings.port); let server = HttpServer::new(move || {