Merge pull request #2866 from LemmyNet/remove-reqwest-retry

Optimize fetching of post url metadata
This commit is contained in:
Nutomic 2023-05-18 17:19:54 +02:00 committed by GitHub
commit a9d708f494
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 11 additions and 49 deletions

32
Cargo.lock generated
View file

@ -2771,7 +2771,6 @@ dependencies = [
"pict-rs", "pict-rs",
"reqwest", "reqwest",
"reqwest-middleware", "reqwest-middleware",
"reqwest-retry",
"reqwest-tracing", "reqwest-tracing",
"serde", "serde",
"serde_json", "serde_json",
@ -4331,26 +4330,6 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "reqwest-retry"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e29d842a94e8ab9b581fd3b906053872aef2fb3e474cbd88712047895d2deee4"
dependencies = [
"anyhow",
"async-trait",
"chrono",
"futures",
"http",
"hyper",
"reqwest",
"reqwest-middleware",
"retry-policies",
"task-local-extensions",
"tokio",
"tracing",
]
[[package]] [[package]]
name = "reqwest-tracing" name = "reqwest-tracing"
version = "0.4.0" version = "0.4.0"
@ -4372,17 +4351,6 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0"
[[package]]
name = "retry-policies"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47f9e19b18c6cdd796cc70aea8a9ea5ee7b813be611c6589e3624fcdbfd05f9d"
dependencies = [
"anyhow",
"chrono",
"rand 0.8.5",
]
[[package]] [[package]]
name = "rgb" name = "rgb"
version = "0.8.34" version = "0.8.34"

View file

@ -78,7 +78,6 @@ reqwest-middleware = "0.2.0"
reqwest-tracing = "0.4.0" reqwest-tracing = "0.4.0"
clokwerk = "0.3.5" clokwerk = "0.3.5"
doku = { version = "0.20.0", features = ["url-2"] } doku = { version = "0.20.0", features = ["url-2"] }
reqwest-retry = "0.2.0"
bcrypt = "0.13.0" bcrypt = "0.13.0"
chrono = { version = "0.4.22", features = ["serde"], default-features = false } chrono = { version = "0.4.22", features = ["serde"], default-features = false }
serde_json = { version = "1.0.87", features = ["preserve_order"] } serde_json = { version = "1.0.87", features = ["preserve_order"] }
@ -134,7 +133,6 @@ reqwest-middleware = { workspace = true }
reqwest-tracing = { workspace = true } reqwest-tracing = { workspace = true }
clokwerk = { workspace = true } clokwerk = { workspace = true }
doku = { workspace = true } doku = { workspace = true }
reqwest-retry = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
futures = { workspace = true } futures = { workspace = true }
actix = { workspace = true } actix = { workspace = true }

View file

@ -185,6 +185,9 @@ impl Object for ApubPost {
name = name.chars().take(MAX_TITLE_LENGTH).collect(); name = name.chars().take(MAX_TITLE_LENGTH).collect();
} }
// read existing, local post if any (for generating mod log)
let old_post = page.id.dereference_local(context).await;
let form = if !page.is_mod_action(context).await? { let form = if !page.is_mod_action(context).await? {
let first_attachment = page.attachment.into_iter().map(Attachment::url).next(); let first_attachment = page.attachment.into_iter().map(Attachment::url).next();
let url = if first_attachment.is_some() { let url = if first_attachment.is_some() {
@ -195,10 +198,13 @@ impl Object for ApubPost {
} else { } else {
None None
}; };
let (metadata_res, thumbnail_url) = if let Some(url) = &url { // Only fetch metadata if the post has a url and was not seen previously. We dont want to
fetch_site_data(context.client(), context.settings(), Some(url)).await // waste resources by fetching metadata for the same post multiple times.
} else { let (metadata_res, thumbnail_url) = match &url {
(None, page.image.map(|i| i.url.into())) Some(url) if old_post.is_err() => {
fetch_site_data(context.client(), context.settings(), Some(url)).await
}
_ => (None, page.image.map(|i| i.url.into())),
}; };
let (embed_title, embed_description, embed_video_url) = metadata_res let (embed_title, embed_description, embed_video_url) = metadata_res
.map(|u| (u.title, u.description, u.embed_video_url)) .map(|u| (u.title, u.description, u.embed_video_url))
@ -245,8 +251,6 @@ impl Object for ApubPost {
.updated(page.updated.map(|u| u.naive_local())) .updated(page.updated.map(|u| u.naive_local()))
.build() .build()
}; };
// read existing, local post if any (for generating mod log)
let old_post = page.id.dereference_local(context).await;
let post = Post::create(context.pool(), &form).await?; let post = Post::create(context.pool(), &form).await?;

View file

@ -34,7 +34,6 @@ use lemmy_utils::{
}; };
use reqwest::Client; use reqwest::Client;
use reqwest_middleware::ClientBuilder; use reqwest_middleware::ClientBuilder;
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use reqwest_tracing::TracingMiddleware; use reqwest_tracing::TracingMiddleware;
use std::{env, thread, time::Duration}; use std::{env, thread, time::Duration};
use tracing::subscriber::set_global_default; use tracing::subscriber::set_global_default;
@ -110,18 +109,11 @@ pub async fn start_lemmy_server() -> Result<(), LemmyError> {
let reqwest_client = Client::builder() let reqwest_client = Client::builder()
.user_agent(user_agent.clone()) .user_agent(user_agent.clone())
.timeout(REQWEST_TIMEOUT) .timeout(REQWEST_TIMEOUT)
.connect_timeout(REQWEST_TIMEOUT)
.build()?; .build()?;
let retry_policy = ExponentialBackoff {
max_n_retries: 3,
max_retry_interval: REQWEST_TIMEOUT,
min_retry_interval: Duration::from_millis(100),
backoff_exponent: 2,
};
let client = ClientBuilder::new(reqwest_client.clone()) let client = ClientBuilder::new(reqwest_client.clone())
.with(TracingMiddleware::default()) .with(TracingMiddleware::default())
.with(RetryTransientMiddleware::new_with_policy(retry_policy))
.build(); .build();
// Pictrs cannot use the retry middleware // Pictrs cannot use the retry middleware