This commit is contained in:
Felix Ableitner 2024-11-21 22:58:24 +01:00
parent d566710e96
commit e590a811eb

View file

@ -1,9 +1,18 @@
use crate::{newtypes::LanguageId, source::language::Language, utils::DbPool};
use lemmy_utils::error::LemmyResult;
use lingua::{IsoCode639_1, Language as LinguaLanguage, LanguageDetectorBuilder};
use lingua::{IsoCode639_1, Language as LinguaLang, LanguageDetectorBuilder};
pub async fn detect_language(input: &str, pool: &mut DbPool<'_>) -> LemmyResult<LanguageId> {
// TODO: should only detect languages which are allowed in community
// TODO: cache conversion table Lingua to LanguageId and reverse (maybe load it directly from
// migration sql)
// TODO: instead of at post creation, could also run this as a background task
// TODO: probably uses a lot of ram/cpu, need to make it configurable:
// - analyze only local posts or all posts
// - low accuracy or high accuracy setting
// - min confidence value
//
// >>>> This should be a plugin!
let detector = LanguageDetectorBuilder::from_iso_codes_639_1(&[
IsoCode639_1::EN,
IsoCode639_1::ES,
@ -11,7 +20,7 @@ pub async fn detect_language(input: &str, pool: &mut DbPool<'_>) -> LemmyResult<
])
.build();
let lang: Option<LinguaLanguage> = detector.detect_language_of(input);
let lang: Option<LinguaLang> = detector.detect_language_of(input);
let Some(lang) = lang else {
return Ok(LanguageId(0));
};