mirror of
https://github.com/LemmyNet/lemmy.git
synced 2025-02-10 11:04:49 +00:00
Add some url validation (#5338)
* Add some url validation * use library fn * manually follow redirects * remove log * avoid infinite redirect/infinite recursion * fix * upgrade lib * check url scheme
This commit is contained in:
parent
52047459bb
commit
8e19d55295
10 changed files with 77 additions and 47 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -10,9 +10,9 @@ checksum = "8f27d075294830fcab6f66e320dab524bc6d048f4a151698e153205559113772"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "activitypub_federation"
|
name = "activitypub_federation"
|
||||||
version = "0.6.1"
|
version = "0.6.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ee819cada736b6e26c59706f9e6ff89a48060e635c0546ff984d84baefc8c13a"
|
checksum = "ce5c105760d36108026acde9cb779d8ef4714d5e551f248a9e8e0369b6671b78"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"activitystreams-kinds",
|
"activitystreams-kinds",
|
||||||
"actix-web",
|
"actix-web",
|
||||||
|
|
|
@ -89,7 +89,7 @@ lemmy_api_common = { version = "=0.19.6-beta.7", path = "./crates/api_common" }
|
||||||
lemmy_routes = { version = "=0.19.6-beta.7", path = "./crates/routes" }
|
lemmy_routes = { version = "=0.19.6-beta.7", path = "./crates/routes" }
|
||||||
lemmy_db_views = { version = "=0.19.6-beta.7", path = "./crates/db_views" }
|
lemmy_db_views = { version = "=0.19.6-beta.7", path = "./crates/db_views" }
|
||||||
lemmy_federate = { version = "=0.19.6-beta.7", path = "./crates/federate" }
|
lemmy_federate = { version = "=0.19.6-beta.7", path = "./crates/federate" }
|
||||||
activitypub_federation = { version = "0.6.1", default-features = false, features = [
|
activitypub_federation = { version = "0.6.2", default-features = false, features = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
] }
|
] }
|
||||||
diesel = "2.2.6"
|
diesel = "2.2.6"
|
||||||
|
|
|
@ -16,7 +16,7 @@ pub async fn get_link_metadata(
|
||||||
_local_user_view: LocalUserView,
|
_local_user_view: LocalUserView,
|
||||||
) -> LemmyResult<Json<GetSiteMetadataResponse>> {
|
) -> LemmyResult<Json<GetSiteMetadataResponse>> {
|
||||||
let url = Url::parse(&data.url).with_lemmy_type(LemmyErrorType::InvalidUrl)?;
|
let url = Url::parse(&data.url).with_lemmy_type(LemmyErrorType::InvalidUrl)?;
|
||||||
let metadata = fetch_link_metadata(&url, &context).await?;
|
let metadata = fetch_link_metadata(&url, &context, false).await?;
|
||||||
|
|
||||||
Ok(Json(GetSiteMetadataResponse { metadata }))
|
Ok(Json(GetSiteMetadataResponse { metadata }))
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,20 +15,23 @@ use lemmy_db_schema::source::{
|
||||||
site::Site,
|
site::Site,
|
||||||
};
|
};
|
||||||
use lemmy_utils::{
|
use lemmy_utils::{
|
||||||
error::{LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult},
|
error::{FederationError, LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult},
|
||||||
settings::structs::{PictrsImageMode, Settings},
|
settings::structs::{PictrsImageMode, Settings},
|
||||||
REQWEST_TIMEOUT,
|
REQWEST_TIMEOUT,
|
||||||
VERSION,
|
VERSION,
|
||||||
};
|
};
|
||||||
use mime::{Mime, TEXT_HTML};
|
use mime::{Mime, TEXT_HTML};
|
||||||
use reqwest::{
|
use reqwest::{
|
||||||
header::{CONTENT_TYPE, RANGE},
|
header::{CONTENT_TYPE, LOCATION, RANGE},
|
||||||
|
redirect::Policy,
|
||||||
Client,
|
Client,
|
||||||
ClientBuilder,
|
ClientBuilder,
|
||||||
Response,
|
Response,
|
||||||
};
|
};
|
||||||
use reqwest_middleware::ClientWithMiddleware;
|
use reqwest_middleware::ClientWithMiddleware;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::net::IpAddr;
|
||||||
|
use tokio::net::lookup_host;
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use urlencoding::encode;
|
use urlencoding::encode;
|
||||||
|
@ -41,12 +44,45 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder {
|
||||||
.user_agent(user_agent.clone())
|
.user_agent(user_agent.clone())
|
||||||
.timeout(REQWEST_TIMEOUT)
|
.timeout(REQWEST_TIMEOUT)
|
||||||
.connect_timeout(REQWEST_TIMEOUT)
|
.connect_timeout(REQWEST_TIMEOUT)
|
||||||
|
.redirect(Policy::none())
|
||||||
.use_rustls_tls()
|
.use_rustls_tls()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fetches metadata for the given link and optionally generates thumbnail.
|
/// Fetches metadata for the given link and optionally generates thumbnail.
|
||||||
#[tracing::instrument(skip_all)]
|
#[tracing::instrument(skip_all)]
|
||||||
pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResult<LinkMetadata> {
|
pub async fn fetch_link_metadata(
|
||||||
|
url: &Url,
|
||||||
|
context: &LemmyContext,
|
||||||
|
recursion: bool,
|
||||||
|
) -> LemmyResult<LinkMetadata> {
|
||||||
|
if url.scheme() != "http" && url.scheme() != "https" {
|
||||||
|
return Err(LemmyErrorType::InvalidUrl.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve the domain and throw an error if it points to any internal IP,
|
||||||
|
// using logic from nightly IpAddr::is_global.
|
||||||
|
if !cfg!(debug_assertions) {
|
||||||
|
// TODO: Replace with IpAddr::is_global() once stabilized
|
||||||
|
// https://doc.rust-lang.org/std/net/enum.IpAddr.html#method.is_global
|
||||||
|
let domain = url.domain().ok_or(FederationError::UrlWithoutDomain)?;
|
||||||
|
let invalid_ip = lookup_host((domain.to_owned(), 80))
|
||||||
|
.await?
|
||||||
|
.any(|addr| match addr.ip() {
|
||||||
|
IpAddr::V4(addr) => {
|
||||||
|
addr.is_private() || addr.is_link_local() || addr.is_loopback() || addr.is_multicast()
|
||||||
|
}
|
||||||
|
IpAddr::V6(addr) => {
|
||||||
|
addr.is_loopback()
|
||||||
|
|| addr.is_multicast()
|
||||||
|
|| ((addr.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local
|
||||||
|
|| ((addr.segments()[0] & 0xffc0) == 0xfe80) // is_unicast_link_local
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if invalid_ip {
|
||||||
|
return Err(LemmyErrorType::InvalidUrl.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
info!("Fetching site metadata for url: {}", url);
|
info!("Fetching site metadata for url: {}", url);
|
||||||
// We only fetch the first MB of data in order to not waste bandwidth especially for large
|
// We only fetch the first MB of data in order to not waste bandwidth especially for large
|
||||||
// binary files. This high limit is particularly needed for youtube, which includes a lot of
|
// binary files. This high limit is particularly needed for youtube, which includes a lot of
|
||||||
|
@ -63,6 +99,16 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu
|
||||||
.await?
|
.await?
|
||||||
.error_for_status()?;
|
.error_for_status()?;
|
||||||
|
|
||||||
|
// Manually follow one redirect, using internal IP check. Further redirects are ignored.
|
||||||
|
let location = response
|
||||||
|
.headers()
|
||||||
|
.get(LOCATION)
|
||||||
|
.and_then(|l| l.to_str().ok());
|
||||||
|
if let (Some(location), false) = (location, recursion) {
|
||||||
|
let url = location.parse()?;
|
||||||
|
return Box::pin(fetch_link_metadata(&url, context, true)).await;
|
||||||
|
}
|
||||||
|
|
||||||
let mut content_type: Option<Mime> = response
|
let mut content_type: Option<Mime> = response
|
||||||
.headers()
|
.headers()
|
||||||
.get(CONTENT_TYPE)
|
.get(CONTENT_TYPE)
|
||||||
|
@ -150,7 +196,9 @@ pub async fn generate_post_link_metadata(
|
||||||
context: Data<LemmyContext>,
|
context: Data<LemmyContext>,
|
||||||
) -> LemmyResult<()> {
|
) -> LemmyResult<()> {
|
||||||
let metadata = match &post.url {
|
let metadata = match &post.url {
|
||||||
Some(url) => fetch_link_metadata(url, &context).await.unwrap_or_default(),
|
Some(url) => fetch_link_metadata(url, &context, false)
|
||||||
|
.await
|
||||||
|
.unwrap_or_default(),
|
||||||
_ => Default::default(),
|
_ => Default::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -498,7 +546,7 @@ mod tests {
|
||||||
async fn test_link_metadata() -> LemmyResult<()> {
|
async fn test_link_metadata() -> LemmyResult<()> {
|
||||||
let context = LemmyContext::init_test_context().await;
|
let context = LemmyContext::init_test_context().await;
|
||||||
let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ")?;
|
let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ")?;
|
||||||
let sample_res = fetch_link_metadata(&sample_url, &context).await?;
|
let sample_res = fetch_link_metadata(&sample_url, &context, false).await?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
|
Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
|
||||||
sample_res.opengraph_data.title
|
sample_res.opengraph_data.title
|
||||||
|
|
|
@ -3,7 +3,7 @@ use crate::{
|
||||||
check_apub_id_valid_with_strictness,
|
check_apub_id_valid_with_strictness,
|
||||||
fetcher::markdown_links::markdown_rewrite_remote_links,
|
fetcher::markdown_links::markdown_rewrite_remote_links,
|
||||||
mentions::collect_non_local_mentions,
|
mentions::collect_non_local_mentions,
|
||||||
objects::{append_attachments_to_comment, read_from_string_or_source, verify_is_remote_object},
|
objects::{append_attachments_to_comment, read_from_string_or_source},
|
||||||
protocol::{
|
protocol::{
|
||||||
objects::{note::Note, LanguageTag},
|
objects::{note::Note, LanguageTag},
|
||||||
InCommunity,
|
InCommunity,
|
||||||
|
@ -13,7 +13,10 @@ use crate::{
|
||||||
use activitypub_federation::{
|
use activitypub_federation::{
|
||||||
config::Data,
|
config::Data,
|
||||||
kinds::object::NoteType,
|
kinds::object::NoteType,
|
||||||
protocol::{values::MediaTypeMarkdownOrHtml, verification::verify_domains_match},
|
protocol::{
|
||||||
|
values::MediaTypeMarkdownOrHtml,
|
||||||
|
verification::{verify_domains_match, verify_is_remote_object},
|
||||||
|
},
|
||||||
traits::Object,
|
traits::Object,
|
||||||
};
|
};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
use super::verify_is_remote_object;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
activities::GetActorType,
|
activities::GetActorType,
|
||||||
check_apub_id_valid_with_strictness,
|
check_apub_id_valid_with_strictness,
|
||||||
|
@ -15,7 +14,10 @@ use activitypub_federation::{
|
||||||
config::Data,
|
config::Data,
|
||||||
fetch::object_id::ObjectId,
|
fetch::object_id::ObjectId,
|
||||||
kinds::actor::ApplicationType,
|
kinds::actor::ApplicationType,
|
||||||
protocol::{values::MediaTypeHtml, verification::verify_domains_match},
|
protocol::{
|
||||||
|
values::MediaTypeHtml,
|
||||||
|
verification::{verify_domains_match, verify_is_remote_object},
|
||||||
|
},
|
||||||
traits::{Actor, Object},
|
traits::{Actor, Object},
|
||||||
};
|
};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
|
|
@ -1,16 +1,8 @@
|
||||||
use crate::protocol::{objects::page::Attachment, Source};
|
use crate::protocol::{objects::page::Attachment, Source};
|
||||||
use activitypub_federation::{
|
use activitypub_federation::{config::Data, protocol::values::MediaTypeMarkdownOrHtml};
|
||||||
config::Data,
|
|
||||||
fetch::object_id::ObjectId,
|
|
||||||
protocol::values::MediaTypeMarkdownOrHtml,
|
|
||||||
traits::Object,
|
|
||||||
};
|
|
||||||
use anyhow::anyhow;
|
|
||||||
use html2md::parse_html;
|
use html2md::parse_html;
|
||||||
use lemmy_api_common::context::LemmyContext;
|
use lemmy_api_common::context::LemmyContext;
|
||||||
use lemmy_utils::error::LemmyResult;
|
use lemmy_utils::error::LemmyResult;
|
||||||
use serde::Deserialize;
|
|
||||||
use std::fmt::Debug;
|
|
||||||
|
|
||||||
pub mod comment;
|
pub mod comment;
|
||||||
pub mod community;
|
pub mod community;
|
||||||
|
@ -62,22 +54,3 @@ pub(crate) async fn append_attachments_to_comment(
|
||||||
|
|
||||||
Ok(content)
|
Ok(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// When for example a Post is made in a remote community, the community will send it back,
|
|
||||||
/// wrapped in Announce. If we simply receive this like any other federated object, overwrite the
|
|
||||||
/// existing, local Post. In particular, it will set the field local = false, so that the object
|
|
||||||
/// can't be fetched from the Activitypub HTTP endpoint anymore (which only serves local objects).
|
|
||||||
pub(crate) fn verify_is_remote_object<T>(
|
|
||||||
id: &ObjectId<T>,
|
|
||||||
context: &Data<LemmyContext>,
|
|
||||||
) -> LemmyResult<()>
|
|
||||||
where
|
|
||||||
T: Object<DataType = LemmyContext> + Debug + Send + 'static,
|
|
||||||
for<'de2> <T as Object>::Kind: Deserialize<'de2>,
|
|
||||||
{
|
|
||||||
if id.is_local(context) {
|
|
||||||
Err(anyhow!("cant accept local object from remote instance").into())
|
|
||||||
} else {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
use super::verify_is_remote_object;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
activities::GetActorType,
|
activities::GetActorType,
|
||||||
check_apub_id_valid_with_strictness,
|
check_apub_id_valid_with_strictness,
|
||||||
|
@ -13,7 +12,7 @@ use crate::{
|
||||||
};
|
};
|
||||||
use activitypub_federation::{
|
use activitypub_federation::{
|
||||||
config::Data,
|
config::Data,
|
||||||
protocol::verification::verify_domains_match,
|
protocol::verification::{verify_domains_match, verify_is_remote_object},
|
||||||
traits::{Actor, Object},
|
traits::{Actor, Object},
|
||||||
};
|
};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
|
|
@ -3,7 +3,7 @@ use crate::{
|
||||||
check_apub_id_valid_with_strictness,
|
check_apub_id_valid_with_strictness,
|
||||||
fetcher::markdown_links::{markdown_rewrite_remote_links_opt, to_local_url},
|
fetcher::markdown_links::{markdown_rewrite_remote_links_opt, to_local_url},
|
||||||
local_site_data_cached,
|
local_site_data_cached,
|
||||||
objects::{read_from_string_or_source_opt, verify_is_remote_object},
|
objects::read_from_string_or_source_opt,
|
||||||
protocol::{
|
protocol::{
|
||||||
objects::{
|
objects::{
|
||||||
page::{Attachment, AttributedTo, Hashtag, HashtagType, Page, PageType},
|
page::{Attachment, AttributedTo, Hashtag, HashtagType, Page, PageType},
|
||||||
|
@ -16,7 +16,10 @@ use crate::{
|
||||||
};
|
};
|
||||||
use activitypub_federation::{
|
use activitypub_federation::{
|
||||||
config::Data,
|
config::Data,
|
||||||
protocol::{values::MediaTypeMarkdownOrHtml, verification::verify_domains_match},
|
protocol::{
|
||||||
|
values::MediaTypeMarkdownOrHtml,
|
||||||
|
verification::{verify_domains_match, verify_is_remote_object},
|
||||||
|
},
|
||||||
traits::Object,
|
traits::Object,
|
||||||
};
|
};
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
use super::verify_is_remote_object;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
check_apub_id_valid_with_strictness,
|
check_apub_id_valid_with_strictness,
|
||||||
fetcher::markdown_links::markdown_rewrite_remote_links,
|
fetcher::markdown_links::markdown_rewrite_remote_links,
|
||||||
|
@ -10,7 +9,10 @@ use crate::{
|
||||||
};
|
};
|
||||||
use activitypub_federation::{
|
use activitypub_federation::{
|
||||||
config::Data,
|
config::Data,
|
||||||
protocol::{values::MediaTypeHtml, verification::verify_domains_match},
|
protocol::{
|
||||||
|
values::MediaTypeHtml,
|
||||||
|
verification::{verify_domains_match, verify_is_remote_object},
|
||||||
|
},
|
||||||
traits::Object,
|
traits::Object,
|
||||||
};
|
};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
|
Loading…
Reference in a new issue