mirror of
https://github.com/LemmyNet/lemmy.git
synced 2024-12-23 03:11:32 +00:00
* Removing scheme from block urls. Fixes #4656 * Fix comment. * Fixing domain checking. * Removing pointless URL building in url blocklist regex. * Remove trailing /
This commit is contained in:
parent
6b9d9dfaa5
commit
66e06b3952
2 changed files with 48 additions and 38 deletions
|
@ -536,25 +536,8 @@ pub async fn get_url_blocklist(context: &LemmyContext) -> LemmyResult<RegexSet>
|
|||
.try_get_with::<_, LemmyError>((), async {
|
||||
let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
|
||||
|
||||
let regexes = urls.iter().map(|url| {
|
||||
let url = &url.url;
|
||||
let parsed = Url::parse(url).expect("Coundln't parse URL.");
|
||||
if url.ends_with('/') {
|
||||
format!(
|
||||
"({}://)?{}{}?",
|
||||
parsed.scheme(),
|
||||
escape(parsed.domain().expect("No domain.")),
|
||||
escape(parsed.path())
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"({}://)?{}{}",
|
||||
parsed.scheme(),
|
||||
escape(parsed.domain().expect("No domain.")),
|
||||
escape(parsed.path())
|
||||
)
|
||||
}
|
||||
});
|
||||
// The urls are already validated on saving, so just escape them.
|
||||
let regexes = urls.iter().map(|url| escape(&url.url));
|
||||
|
||||
let set = RegexSet::new(regexes)?;
|
||||
Ok(set)
|
||||
|
|
|
@ -309,21 +309,44 @@ pub fn is_url_blocked(url: &Option<Url>, blocklist: &RegexSet) -> LemmyResult<()
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Check that urls are valid, and also remove the scheme, and uniques
|
||||
pub fn check_urls_are_valid(urls: &Vec<String>) -> LemmyResult<Vec<String>> {
|
||||
let mut parsed_urls = vec![];
|
||||
for url in urls {
|
||||
let url = Url::parse(url).or_else(|e| {
|
||||
if e == ParseError::RelativeUrlWithoutBase {
|
||||
Url::parse(&format!("https://{url}"))
|
||||
} else {
|
||||
Err(e)
|
||||
}
|
||||
})?;
|
||||
|
||||
parsed_urls.push(url.to_string());
|
||||
parsed_urls.push(build_url_str_without_scheme(url)?);
|
||||
}
|
||||
|
||||
Ok(parsed_urls)
|
||||
let unique_urls = parsed_urls.into_iter().unique().collect();
|
||||
Ok(unique_urls)
|
||||
}
|
||||
|
||||
pub fn build_url_str_without_scheme(url_str: &str) -> LemmyResult<String> {
|
||||
// Parse and check for errors
|
||||
let mut url = Url::parse(url_str).or_else(|e| {
|
||||
if e == ParseError::RelativeUrlWithoutBase {
|
||||
Url::parse(&format!("http://{url_str}"))
|
||||
} else {
|
||||
Err(e)
|
||||
}
|
||||
})?;
|
||||
|
||||
// Set the scheme to http, then remove the http:// part
|
||||
url
|
||||
.set_scheme("http")
|
||||
.map_err(|_| LemmyErrorType::InvalidUrl)?;
|
||||
|
||||
let mut out = url
|
||||
.to_string()
|
||||
.get(7..)
|
||||
.ok_or(LemmyErrorType::InvalidUrl)?
|
||||
.to_string();
|
||||
|
||||
// Remove trailing / if necessary
|
||||
if out.ends_with('/') {
|
||||
out.pop();
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -600,17 +623,21 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_url_parsed() {
|
||||
// Make sure the scheme is removed, and uniques also
|
||||
assert_eq!(
|
||||
vec![String::from("https://example.com/")],
|
||||
check_urls_are_valid(&vec![String::from("example.com")]).unwrap()
|
||||
&check_urls_are_valid(&vec![
|
||||
"example.com".to_string(),
|
||||
"http://example.com".to_string(),
|
||||
"https://example.com".to_string(),
|
||||
"https://example.com/test?q=test2&q2=test3#test4".to_string(),
|
||||
])
|
||||
.unwrap(),
|
||||
&vec![
|
||||
"example.com".to_string(),
|
||||
"example.com/test?q=test2&q2=test3#test4".to_string()
|
||||
],
|
||||
);
|
||||
|
||||
assert!(check_urls_are_valid(&vec![
|
||||
String::from("example.com"),
|
||||
String::from("https://example.blog")
|
||||
])
|
||||
.is_ok());
|
||||
|
||||
assert!(check_urls_are_valid(&vec![String::from("https://example .com"),]).is_err());
|
||||
assert!(check_urls_are_valid(&vec!["https://example .com".to_string()]).is_err());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue