feat(ml): better multilingual search with nllb models (#13567)

This commit is contained in:
Mert
2025-03-31 11:06:57 -04:00
committed by GitHub
parent 838a8dd9a6
commit 6789c2ac19
16 changed files with 301 additions and 18 deletions

View File

@@ -86,6 +86,66 @@ RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
WEBLATE_TO_FLORES200 = {
"af": "afr_Latn",
"ar": "arb_Arab",
"az": "azj_Latn",
"be": "bel_Cyrl",
"bg": "bul_Cyrl",
"ca": "cat_Latn",
"cs": "ces_Latn",
"da": "dan_Latn",
"de": "deu_Latn",
"el": "ell_Grek",
"en": "eng_Latn",
"es": "spa_Latn",
"et": "est_Latn",
"fa": "pes_Arab",
"fi": "fin_Latn",
"fr": "fra_Latn",
"he": "heb_Hebr",
"hi": "hin_Deva",
"hr": "hrv_Latn",
"hu": "hun_Latn",
"hy": "hye_Armn",
"id": "ind_Latn",
"it": "ita_Latn",
"ja": "jpn_Hira",
"kmr": "kmr_Latn",
"ko": "kor_Hang",
"lb": "ltz_Latn",
"lt": "lit_Latn",
"lv": "lav_Latn",
"mfa": "zsm_Latn",
"mk": "mkd_Cyrl",
"mn": "khk_Cyrl",
"mr": "mar_Deva",
"ms": "zsm_Latn",
"nb-NO": "nob_Latn",
"nn": "nno_Latn",
"nl": "nld_Latn",
"pl": "pol_Latn",
"pt-BR": "por_Latn",
"pt": "por_Latn",
"ro": "ron_Latn",
"ru": "rus_Cyrl",
"sk": "slk_Latn",
"sl": "slv_Latn",
"sr-Cyrl": "srp_Cyrl",
"sv": "swe_Latn",
"ta": "tam_Taml",
"te": "tel_Telu",
"th": "tha_Thai",
"tr": "tur_Latn",
"uk": "ukr_Cyrl",
"ur": "urd_Arab",
"vi": "vie_Latn",
"zh-CN": "zho_Hans",
"zh-Hans": "zho_Hans",
"zh-TW": "zho_Hant",
}
def get_model_source(model_name: str) -> ModelSource | None:
cleaned_name = clean_name(model_name)