Add some url validation (#5338)

This commit is contained in:
Felix Ableitner 2025-02-03 21:14:40 +01:00
parent 707d9211b8
commit cee261d6c8
4 changed files with 349 additions and 197 deletions

484
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -101,7 +101,7 @@ lemmy_db_views = { version = "=0.19.8", path = "./crates/db_views" }
lemmy_db_views_actor = { version = "=0.19.8", path = "./crates/db_views_actor" }
lemmy_db_views_moderator = { version = "=0.19.8", path = "./crates/db_views_moderator" }
lemmy_federate = { version = "=0.19.8", path = "./crates/federate" }
activitypub_federation = { version = "0.5.9", default-features = false, features = [
activitypub_federation = { version = "0.5.10", default-features = false, features = [
"actix-web",
] }
diesel = "2.1.6"

View file

@ -19,7 +19,7 @@ pub async fn get_link_metadata(
_local_user_view: LocalUserView,
) -> LemmyResult<Json<GetSiteMetadataResponse>> {
let url = Url::parse(&data.url).with_lemmy_type(LemmyErrorType::InvalidUrl)?;
let metadata = fetch_link_metadata(&url, &context).await?;
let metadata = fetch_link_metadata(&url, &context, false).await?;
Ok(Json(GetSiteMetadataResponse { metadata }))
}

View file

@ -25,13 +25,16 @@ use lemmy_utils::{
};
use mime::{Mime, TEXT_HTML};
use reqwest::{
header::{CONTENT_TYPE, RANGE},
header::{CONTENT_TYPE, LOCATION, RANGE},
redirect::Policy,
Client,
ClientBuilder,
Response,
};
use reqwest_middleware::ClientWithMiddleware;
use serde::{Deserialize, Serialize};
use std::net::IpAddr;
use tokio::net::lookup_host;
use tracing::{info, warn};
use url::Url;
use urlencoding::encode;
@ -44,11 +47,44 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder {
.user_agent(user_agent.clone())
.timeout(REQWEST_TIMEOUT)
.connect_timeout(REQWEST_TIMEOUT)
.redirect(Policy::none())
}
/// Fetches metadata for the given link and optionally generates thumbnail.
#[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResult<LinkMetadata> {
pub async fn fetch_link_metadata(
url: &Url,
context: &LemmyContext,
recursion: bool,
) -> LemmyResult<LinkMetadata> {
if url.scheme() != "http" && url.scheme() != "https" {
return Err(LemmyErrorType::InvalidUrl.into());
}
// Resolve the domain and throw an error if it points to any internal IP,
// using logic from nightly IpAddr::is_global.
if !cfg!(debug_assertions) {
// TODO: Replace with IpAddr::is_global() once stabilized
// https://doc.rust-lang.org/std/net/enum.IpAddr.html#method.is_global
let domain = url.domain().ok_or(LemmyErrorType::UrlWithoutDomain)?;
let invalid_ip = lookup_host((domain.to_owned(), 80))
.await?
.any(|addr| match addr.ip() {
IpAddr::V4(addr) => {
addr.is_private() || addr.is_link_local() || addr.is_loopback() || addr.is_multicast()
}
IpAddr::V6(addr) => {
addr.is_loopback()
|| addr.is_multicast()
|| ((addr.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local
|| ((addr.segments()[0] & 0xffc0) == 0xfe80) // is_unicast_link_local
}
});
if invalid_ip {
return Err(LemmyErrorType::InvalidUrl.into());
}
}
info!("Fetching site metadata for url: {}", url);
// We only fetch the first 64kB of data in order to not waste bandwidth especially for large
// binary files
@ -62,6 +98,16 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
.send()
.await?;
// Manually follow one redirect, using internal IP check. Further redirects are ignored.
let location = response
.headers()
.get(LOCATION)
.and_then(|l| l.to_str().ok());
if let (Some(location), false) = (location, recursion) {
let url = location.parse()?;
return Box::pin(fetch_link_metadata(&url, context, true)).await;
}
let mut content_type: Option<Mime> = response
.headers()
.get(CONTENT_TYPE)
@ -150,7 +196,9 @@ pub async fn generate_post_link_metadata(
context: Data<LemmyContext>,
) -> LemmyResult<()> {
let metadata = match &post.url {
Some(url) => fetch_link_metadata(url, &context).await.unwrap_or_default(),
Some(url) => fetch_link_metadata(url, &context, false)
.await
.unwrap_or_default(),
_ => Default::default(),
};
@ -513,7 +561,9 @@ mod tests {
async fn test_link_metadata() {
let context = LemmyContext::init_test_context().await;
let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ").unwrap();
let sample_res = fetch_link_metadata(&sample_url, &context).await.unwrap();
let sample_res = fetch_link_metadata(&sample_url, &context, false)
.await
.unwrap();
assert_eq!(
Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
sample_res.opengraph_data.title