From db88ba3cbb7595e7a660c1bfd535c6a707f34e2f Mon Sep 17 00:00:00 2001 From: Nutomic Date: Thu, 28 Nov 2024 23:23:18 +0000 Subject: [PATCH] Allow bypassing image proxy for specific domains (#5223) * Allow bypassing proxy for some domains with ProxyAllImages * remove web:: * remove expect * bypass imgur by default * correct imgur domain * restore processing, cleanup --------- Co-authored-by: sunaurus --- config/defaults.hjson | 9 ++++ crates/routes/src/images.rs | 69 +++++++++++++++------------- crates/utils/src/settings/structs.rs | 9 ++++ crates/utils/translations | 2 +- 4 files changed, 56 insertions(+), 33 deletions(-) diff --git a/config/defaults.hjson b/config/defaults.hjson index c12f879c7..282b7957d 100644 --- a/config/defaults.hjson +++ b/config/defaults.hjson @@ -73,6 +73,15 @@ # # Requires pict-rs 0.5 "ProxyAllImages" + # Allows bypassing proxy for specific image hosts when using ProxyAllImages. + # + # imgur.com is bypassed by default to avoid rate limit errors. When specifying any bypass + # in the config, this default is ignored and you need to list imgur explicitly. To proxy imgur + # requests, specify a noop bypass list, eg `proxy_bypass_domains ["example.org"]`. + proxy_bypass_domains: [ + "i.imgur.com" + /* ... */ + ] # Timeout for uploading images to pictrs (in seconds) upload_timeout: 30 # Resize post thumbnails to this maximum width/height. diff --git a/crates/routes/src/images.rs b/crates/routes/src/images.rs index f535c9147..e41626f12 100644 --- a/crates/routes/src/images.rs +++ b/crates/routes/src/images.rs @@ -1,13 +1,13 @@ use actix_web::{ - body::BodyStream, + body::{BodyStream, BoxBody}, http::{ header::{HeaderName, ACCEPT_ENCODING, HOST}, StatusCode, }, - web, - web::Query, + web::*, HttpRequest, HttpResponse, + Responder, }; use futures::stream::{Stream, StreamExt}; use lemmy_api_common::{context::LemmyContext, request::PictrsResponse}; @@ -23,22 +23,18 @@ use serde::Deserialize; use std::time::Duration; use url::Url; -pub fn config( - cfg: &mut web::ServiceConfig, - client: ClientWithMiddleware, - rate_limit: &RateLimitCell, -) { +pub fn config(cfg: &mut ServiceConfig, client: ClientWithMiddleware, rate_limit: &RateLimitCell) { cfg - .app_data(web::Data::new(client)) + .app_data(Data::new(client)) .service( - web::resource("/pictrs/image") + resource("/pictrs/image") .wrap(rate_limit.image()) - .route(web::post().to(upload)), + .route(post().to(upload)), ) // This has optional query params: /image/{filename}?format=jpg&thumbnail=256 - .service(web::resource("/pictrs/image/{filename}").route(web::get().to(full_res))) - .service(web::resource("/pictrs/image/delete/{token}/{filename}").route(web::get().to(delete))) - .service(web::resource("/pictrs/healthz").route(web::get().to(healthz))); + .service(resource("/pictrs/image/{filename}").route(get().to(full_res))) + .service(resource("/pictrs/image/delete/{token}/{filename}").route(get().to(delete))) + .service(resource("/pictrs/healthz").route(get().to(healthz))); } trait ProcessUrl { @@ -127,11 +123,11 @@ fn adapt_request( async fn upload( req: HttpRequest, - body: web::Payload, + body: Payload, // require login local_user_view: LocalUserView, - client: web::Data, - context: web::Data, + client: Data, + context: Data, ) -> LemmyResult { // TODO: check rate limit here let pictrs_config = context.settings().pictrs_config()?; @@ -171,11 +167,11 @@ async fn upload( } async fn full_res( - filename: web::Path, - web::Query(params): web::Query, + filename: Path, + Query(params): Query, req: HttpRequest, - client: web::Data, - context: web::Data, + client: Data, + context: Data, local_user_view: Option, ) -> LemmyResult { // block access to images if instance is private and unauthorized, public @@ -224,10 +220,10 @@ async fn image( } async fn delete( - components: web::Path<(String, String)>, + components: Path<(String, String)>, req: HttpRequest, - client: web::Data, - context: web::Data, + client: Data, + context: Data, // require login _local_user_view: LocalUserView, ) -> LemmyResult { @@ -251,8 +247,8 @@ async fn delete( async fn healthz( req: HttpRequest, - client: web::Data, - context: web::Data, + client: Data, + context: Data, ) -> LemmyResult { let pictrs_config = context.settings().pictrs_config()?; let url = format!("{}healthz", pictrs_config.url); @@ -265,15 +261,15 @@ async fn healthz( let res = client_req.send().await?; - Ok(HttpResponse::build(convert_status(res.status())).body(BodyStream::new(res.bytes_stream()))) + Ok(HttpResponse::build(res.status()).body(BodyStream::new(res.bytes_stream()))) } pub async fn image_proxy( Query(params): Query, req: HttpRequest, - client: web::Data, - context: web::Data, -) -> LemmyResult { + client: Data, + context: Data, +) -> LemmyResult, HttpResponse>> { let url = Url::parse(¶ms.url)?; // Check that url corresponds to a federated image so that this can't be abused as a proxy @@ -281,10 +277,19 @@ pub async fn image_proxy( RemoteImage::validate(&mut context.pool(), url.clone().into()).await?; let pictrs_config = context.settings().pictrs_config()?; - let processed_url = params.process_url(¶ms.url, &pictrs_config.url); - image(processed_url, req, &client).await + let bypass_proxy = pictrs_config + .proxy_bypass_domains + .iter() + .any(|s| url.domain().is_some_and(|d| d == s)); + if bypass_proxy { + // Bypass proxy and redirect user to original image + Ok(Either::Left(Redirect::to(url.to_string()).respond_to(&req))) + } else { + // Proxy the image data through Lemmy + Ok(Either::Right(image(processed_url, req, &client).await?)) + } } fn make_send(mut stream: S) -> impl Stream + Send + Unpin + 'static diff --git a/crates/utils/src/settings/structs.rs b/crates/utils/src/settings/structs.rs index e8106d482..fe9978161 100644 --- a/crates/utils/src/settings/structs.rs +++ b/crates/utils/src/settings/structs.rs @@ -87,6 +87,15 @@ pub struct PictrsConfig { #[default(PictrsImageMode::StoreLinkPreviews)] pub(super) image_mode: PictrsImageMode, + /// Allows bypassing proxy for specific image hosts when using ProxyAllImages. + /// + /// imgur.com is bypassed by default to avoid rate limit errors. When specifying any bypass + /// in the config, this default is ignored and you need to list imgur explicitly. To proxy imgur + /// requests, specify a noop bypass list, eg `proxy_bypass_domains ["example.org"]`. + #[default(vec!["i.imgur.com".to_string()])] + #[doku(example = "i.imgur.com")] + pub proxy_bypass_domains: Vec, + /// Timeout for uploading images to pictrs (in seconds) #[default(30)] pub upload_timeout: u64, diff --git a/crates/utils/translations b/crates/utils/translations index 072fed29c..dbb09b078 160000 --- a/crates/utils/translations +++ b/crates/utils/translations @@ -1 +1 @@ -Subproject commit 072fed29c839d7b465a22ef8dd94308425a01170 +Subproject commit dbb09b0784982827d5d9b7dcf39f1703c1212b83