Merge pull request #1351 from LemmyNet/shorten-slur-filter

Shorten slur filter to avoid false positives (fixes #535)
This commit is contained in:
Dessalines 2021-01-15 12:03:53 -05:00 committed by GitHub
commit 381fd82c13
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 6 deletions

View file

@ -53,7 +53,7 @@ fn test_valid_post_title() {
#[test] #[test]
fn test_slur_filter() { fn test_slur_filter() {
let test = let test =
"coons test dindu ladyboy tranny retardeds. Capitalized Niggerz. This is a bunch of other safe text."; "faggot test kike tranny cocksucker retardeds. Capitalized Niggerz. This is a bunch of other safe text.";
let slur_free = "No slurs here"; let slur_free = "No slurs here";
assert_eq!( assert_eq!(
remove_slurs(&test), remove_slurs(&test),
@ -63,13 +63,13 @@ fn test_slur_filter() {
let has_slurs_vec = vec![ let has_slurs_vec = vec![
"Niggerz", "Niggerz",
"coons", "cocksucker",
"dindu", "faggot",
"ladyboy", "kike",
"retardeds", "retardeds",
"tranny", "tranny",
]; ];
let has_slurs_err_str = "No slurs - Niggerz, coons, dindu, ladyboy, retardeds, tranny"; let has_slurs_err_str = "No slurs - Niggerz, cocksucker, faggot, kike, retardeds, tranny";
assert_eq!(slur_check(test), Err(has_slurs_vec)); assert_eq!(slur_check(test), Err(has_slurs_vec));
assert_eq!(slur_check(slur_free), Ok(())); assert_eq!(slur_check(slur_free), Ok(()));

View file

@ -7,7 +7,7 @@ use regex::{Regex, RegexBuilder};
lazy_static! { lazy_static! {
static ref EMAIL_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9.!#$%&*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$").unwrap(); static ref EMAIL_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9.!#$%&*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$").unwrap();
static ref SLUR_REGEX: Regex = RegexBuilder::new(r"(fag(g|got|tard)?|maricos?|cock\s?sucker(s|ing)?|\bn(i|1)g(\b|g?(a|er)?(s|z)?)\b|dindu(s?)|mudslime?s?|kikes?|mongoloids?|towel\s*heads?|\bspi(c|k)s?\b|\bchinks?|niglets?|beaners?|\bnips?\b|\bcoons?\b|jungle\s*bunn(y|ies?)|jigg?aboo?s?|\bpakis?\b|rag\s*heads?|gooks?|cunts?|bitch(es|ing|y)?|puss(y|ies?)|twats?|feminazis?|whor(es?|ing)|\bslut(s|t?y)?|\btr(a|@)nn?(y|ies?)|ladyboy(s?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().unwrap(); static ref SLUR_REGEX: Regex = RegexBuilder::new(r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|\bn(i|1)g(\b|g?(a|er)?(s|z)?)\b|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().unwrap();
static ref USERNAME_MATCHES_REGEX: Regex = Regex::new(r"/u/[a-zA-Z][0-9a-zA-Z_]*").unwrap(); static ref USERNAME_MATCHES_REGEX: Regex = Regex::new(r"/u/[a-zA-Z][0-9a-zA-Z_]*").unwrap();
// TODO keep this old one, it didn't work with port well tho // TODO keep this old one, it didn't work with port well tho
// static ref MENTIONS_REGEX: Regex = Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)").unwrap(); // static ref MENTIONS_REGEX: Regex = Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)").unwrap();