From d55bd2f2bcb7eb981f54be46dc233ef7266dafd8 Mon Sep 17 00:00:00 2001 From: Nutomic Date: Tue, 24 Oct 2023 22:25:52 +0200 Subject: [PATCH] Allow Arabic and Cyrillic usernames/community names (fixes #1764) (#4083) * Allow Arabic and Cyrillic usernames/community names (fixes #1764) * update comment --- Cargo.lock | 2 -- Cargo.toml | 2 +- api_tests/src/user.spec.ts | 18 +++++++++++++++ crates/utils/src/utils/validation.rs | 33 +++++++++++++++++++++++----- 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 378f6cb2ec..6d0639edad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,8 +11,6 @@ checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" [[package]] name = "activitypub_federation" version = "0.5.0-beta.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "509cbafa1b42e01b7ca76c26298814a6638825df4fd67aef2f4c9d36a39c2b6d" dependencies = [ "activitystreams-kinds", "actix-web", diff --git a/Cargo.toml b/Cargo.toml index a01cc687b7..95652b3116 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,7 @@ lemmy_routes = { version = "=0.19.0-rc.3", path = "./crates/routes" } lemmy_db_views = { version = "=0.19.0-rc.3", path = "./crates/db_views" } lemmy_db_views_actor = { version = "=0.19.0-rc.3", path = "./crates/db_views_actor" } lemmy_db_views_moderator = { version = "=0.19.0-rc.3", path = "./crates/db_views_moderator" } -activitypub_federation = { version = "0.5.0-beta.3", default-features = false, features = [ +activitypub_federation = { git = "https://github.com/LemmyNet/activitypub-federation-rust.git", branch = "webfinger-alphabets", default-features = false, features = [ "actix-web", ] } diesel = "2.1.0" diff --git a/api_tests/src/user.spec.ts b/api_tests/src/user.spec.ts index eddf568b84..d651af7e1c 100644 --- a/api_tests/src/user.spec.ts +++ b/api_tests/src/user.spec.ts @@ -129,3 +129,21 @@ test("Requests with invalid auth should be treated as unauthenticated", async () let posts = invalid_auth.getPosts(form); expect((await posts).posts).toBeDefined(); }); + +test("Create user with Arabic name", async () => { + let userRes = await registerUser(alpha, "تجريب"); + expect(userRes.jwt).toBeDefined(); + let user = new LemmyHttp(alphaUrl, { + headers: { Authorization: `Bearer ${userRes.jwt ?? ""}` }, + }); + + let site = await getSite(user); + expect(site.my_user).toBeDefined(); + if (!site.my_user) { + throw "Missing site user"; + } + apShortname = `@${site.my_user.local_user_view.person.name}@lemmy-alpha:8541`; + + let alphaPerson = (await resolvePerson(alpha, apShortname)).person; + expect(alphaPerson).toBeDefined(); +}); diff --git a/crates/utils/src/utils/validation.rs b/crates/utils/src/utils/validation.rs index 46fe9e2d06..36aa2c5f10 100644 --- a/crates/utils/src/utils/validation.rs +++ b/crates/utils/src/utils/validation.rs @@ -4,8 +4,6 @@ use once_cell::sync::Lazy; use regex::{Regex, RegexBuilder}; use url::Url; -static VALID_ACTOR_NAME_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex")); static VALID_POST_TITLE_REGEX: Lazy = Lazy::new(|| Regex::new(r".*\S{3,200}.*").expect("compile regex")); @@ -89,10 +87,23 @@ fn has_newline(name: &str) -> bool { } pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> LemmyResult<()> { - let check = name.chars().count() <= actor_name_max_length - && VALID_ACTOR_NAME_REGEX.is_match(name) - && !has_newline(name); - if !check { + static VALID_ACTOR_NAME_REGEX_EN: Lazy = + Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex")); + static VALID_ACTOR_NAME_REGEX_AR: Lazy = + Lazy::new(|| Regex::new(r"^[\p{Arabic}0-9_]{3,}$").expect("compile regex")); + static VALID_ACTOR_NAME_REGEX_RU: Lazy = + Lazy::new(|| Regex::new(r"^[\p{Cyrillic}0-9_]{3,}$").expect("compile regex")); + + let check = name.chars().count() <= actor_name_max_length && !has_newline(name); + + // Only allow characters from a single alphabet per username. This avoids problems with lookalike + // characters like `o` which looks identical in Latin and Cyrillic, and can be used to imitate + // other users. Checks for additional alphabets can be added in the same way. + let lang_check = VALID_ACTOR_NAME_REGEX_EN.is_match(name) + || VALID_ACTOR_NAME_REGEX_AR.is_match(name) + || VALID_ACTOR_NAME_REGEX_RU.is_match(name); + + if !check || !lang_check { Err(LemmyErrorType::InvalidName.into()) } else { Ok(()) @@ -329,8 +340,18 @@ mod tests { let actor_name_max_length = 20; assert!(is_valid_actor_name("Hello_98", actor_name_max_length).is_ok()); assert!(is_valid_actor_name("ten", actor_name_max_length).is_ok()); + assert!(is_valid_actor_name("تجريب", actor_name_max_length).is_ok()); + assert!(is_valid_actor_name("تجريب_123", actor_name_max_length).is_ok()); + assert!(is_valid_actor_name("Владимир", actor_name_max_length).is_ok()); + + // mixed scripts + assert!(is_valid_actor_name("تجريب_abc", actor_name_max_length).is_err()); + assert!(is_valid_actor_name("Влад_abc", actor_name_max_length).is_err()); + // dash assert!(is_valid_actor_name("Hello-98", actor_name_max_length).is_err()); + // too short assert!(is_valid_actor_name("a", actor_name_max_length).is_err()); + // empty assert!(is_valid_actor_name("", actor_name_max_length).is_err()); }