mirror of
				https://github.com/immich-app/immich.git
				synced 2025-11-04 10:22:53 +09:00 
			
		
		
		
	* feat: add OCR functionality and related configurations * chore: update labeler configuration for machine learning files * feat(i18n): enhance OCR model descriptions and add orientation classification and unwarping features * chore: update Dockerfile to include ccache for improved build performance * feat(ocr): enhance OCR model configuration with orientation classification and unwarping options, update PaddleOCR integration, and improve response structure * refactor(ocr): remove OCR_CLEANUP job from enum and type definitions * refactor(ocr): remove obsolete OCR entity and migration files, and update asset job status and schema to accommodate new OCR table structure * refactor(ocr): update OCR schema and response structure to use individual coordinates instead of bounding box, and adjust related service and repository files * feat: enhance OCR configuration and functionality - Updated OCR settings to include minimum detection box score, minimum detection score, and minimum recognition score. - Refactored PaddleOCRecognizer to utilize new scoring parameters. - Introduced new database tables for asset OCR data and search functionality. - Modified related services and repositories to support the new OCR features. - Updated translations for improved clarity in settings UI. * sql changes * use rapidocr * change dto * update web * update lock * update api * store positions as normalized floats * match column order in db * update admin ui settings descriptions fix max resolution key set min threshold to 0.1 fix bind * apply config correctly, adjust defaults * unnecessary model type * unnecessary sources * fix(ocr): switch RapidOCR lang type from LangDet to LangRec * fix(ocr): expose lang_type (LangRec.CH) and font_path on OcrOptions for RapidOCR * fix(ocr): make OCR text search case- and accent-insensitive using ILIKE + unaccent * fix(ocr): add OCR search fields * fix: Add OCR database migration and update ML prediction logic. * trigrams are already case insensitive * add tests * format * update migrations * wrong uuid function * linting * maybe fix medium tests * formatting * fix weblate check * openapi * sql * minor fixes * maybe fix medium tests part 2 * passing medium tests * format web * readd sql * format dart * disabled in e2e * chore: translation ordering --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
		
			
				
	
	
		
			171 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			171 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from immich_ml.config import clean_name
 | 
						|
from immich_ml.schemas import ModelSource
 | 
						|
 | 
						|
_OPENCLIP_MODELS = {
 | 
						|
    "RN101__openai",
 | 
						|
    "RN101__yfcc15m",
 | 
						|
    "RN50__cc12m",
 | 
						|
    "RN50__openai",
 | 
						|
    "RN50__yfcc15m",
 | 
						|
    "RN50x16__openai",
 | 
						|
    "RN50x4__openai",
 | 
						|
    "RN50x64__openai",
 | 
						|
    "ViT-B-16-SigLIP-256__webli",
 | 
						|
    "ViT-B-16-SigLIP-384__webli",
 | 
						|
    "ViT-B-16-SigLIP-512__webli",
 | 
						|
    "ViT-B-16-SigLIP-i18n-256__webli",
 | 
						|
    "ViT-B-16-SigLIP__webli",
 | 
						|
    "ViT-B-16-plus-240__laion400m_e31",
 | 
						|
    "ViT-B-16-plus-240__laion400m_e32",
 | 
						|
    "ViT-B-16__laion400m_e31",
 | 
						|
    "ViT-B-16__laion400m_e32",
 | 
						|
    "ViT-B-16__openai",
 | 
						|
    "ViT-B-32__laion2b-s34b-b79k",
 | 
						|
    "ViT-B-32__laion2b_e16",
 | 
						|
    "ViT-B-32__laion400m_e31",
 | 
						|
    "ViT-B-32__laion400m_e32",
 | 
						|
    "ViT-B-32__openai",
 | 
						|
    "ViT-H-14-378-quickgelu__dfn5b",
 | 
						|
    "ViT-H-14-quickgelu__dfn5b",
 | 
						|
    "ViT-H-14__laion2b-s32b-b79k",
 | 
						|
    "ViT-L-14-336__openai",
 | 
						|
    "ViT-L-14-quickgelu__dfn2b",
 | 
						|
    "ViT-L-14__laion2b-s32b-b82k",
 | 
						|
    "ViT-L-14__laion400m_e31",
 | 
						|
    "ViT-L-14__laion400m_e32",
 | 
						|
    "ViT-L-14__openai",
 | 
						|
    "ViT-L-16-SigLIP-256__webli",
 | 
						|
    "ViT-L-16-SigLIP-384__webli",
 | 
						|
    "ViT-SO400M-14-SigLIP-384__webli",
 | 
						|
    "ViT-g-14__laion2b-s12b-b42k",
 | 
						|
    "XLM-Roberta-Base-ViT-B-32__laion5b_s13b_b90k",
 | 
						|
    "XLM-Roberta-Large-ViT-H-14__frozen_laion5b_s13b_b90k",
 | 
						|
    "nllb-clip-base-siglip__mrl",
 | 
						|
    "nllb-clip-base-siglip__v1",
 | 
						|
    "nllb-clip-large-siglip__mrl",
 | 
						|
    "nllb-clip-large-siglip__v1",
 | 
						|
    "ViT-B-16-SigLIP2__webli",
 | 
						|
    "ViT-B-32-SigLIP2-256__webli",
 | 
						|
    "ViT-L-16-SigLIP2-256__webli",
 | 
						|
    "ViT-L-16-SigLIP2-384__webli",
 | 
						|
    "ViT-L-16-SigLIP2-512__webli",
 | 
						|
    "ViT-SO400M-14-SigLIP2-378__webli",
 | 
						|
    "ViT-SO400M-14-SigLIP2__webli",
 | 
						|
    "ViT-SO400M-16-SigLIP2-256__webli",
 | 
						|
    "ViT-SO400M-16-SigLIP2-384__webli",
 | 
						|
    "ViT-SO400M-16-SigLIP2-512__webli",
 | 
						|
    "ViT-gopt-16-SigLIP2-256__webli",
 | 
						|
    "ViT-gopt-16-SigLIP2-384__webli",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
_MCLIP_MODELS = {
 | 
						|
    "LABSE-Vit-L-14",
 | 
						|
    "XLM-Roberta-Large-Vit-B-16Plus",
 | 
						|
    "XLM-Roberta-Large-Vit-B-32",
 | 
						|
    "XLM-Roberta-Large-Vit-L-14",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
_INSIGHTFACE_MODELS = {
 | 
						|
    "antelopev2",
 | 
						|
    "buffalo_s",
 | 
						|
    "buffalo_m",
 | 
						|
    "buffalo_l",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
_PADDLE_MODELS = {
 | 
						|
    "PP-OCRv5_server",
 | 
						|
    "PP-OCRv5_mobile",
 | 
						|
}
 | 
						|
 | 
						|
SUPPORTED_PROVIDERS = [
 | 
						|
    "CUDAExecutionProvider",
 | 
						|
    "ROCMExecutionProvider",
 | 
						|
    "OpenVINOExecutionProvider",
 | 
						|
    "CoreMLExecutionProvider",
 | 
						|
    "CPUExecutionProvider",
 | 
						|
]
 | 
						|
 | 
						|
RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
 | 
						|
RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
 | 
						|
 | 
						|
 | 
						|
WEBLATE_TO_FLORES200 = {
 | 
						|
    "af": "afr_Latn",
 | 
						|
    "ar": "arb_Arab",
 | 
						|
    "az": "azj_Latn",
 | 
						|
    "be": "bel_Cyrl",
 | 
						|
    "bg": "bul_Cyrl",
 | 
						|
    "ca": "cat_Latn",
 | 
						|
    "cs": "ces_Latn",
 | 
						|
    "da": "dan_Latn",
 | 
						|
    "de": "deu_Latn",
 | 
						|
    "el": "ell_Grek",
 | 
						|
    "en": "eng_Latn",
 | 
						|
    "es": "spa_Latn",
 | 
						|
    "et": "est_Latn",
 | 
						|
    "fa": "pes_Arab",
 | 
						|
    "fi": "fin_Latn",
 | 
						|
    "fr": "fra_Latn",
 | 
						|
    "he": "heb_Hebr",
 | 
						|
    "hi": "hin_Deva",
 | 
						|
    "hr": "hrv_Latn",
 | 
						|
    "hu": "hun_Latn",
 | 
						|
    "hy": "hye_Armn",
 | 
						|
    "id": "ind_Latn",
 | 
						|
    "it": "ita_Latn",
 | 
						|
    "ja": "jpn_Hira",
 | 
						|
    "kmr": "kmr_Latn",
 | 
						|
    "ko": "kor_Hang",
 | 
						|
    "lb": "ltz_Latn",
 | 
						|
    "lt": "lit_Latn",
 | 
						|
    "lv": "lav_Latn",
 | 
						|
    "mfa": "zsm_Latn",
 | 
						|
    "mk": "mkd_Cyrl",
 | 
						|
    "mn": "khk_Cyrl",
 | 
						|
    "mr": "mar_Deva",
 | 
						|
    "ms": "zsm_Latn",
 | 
						|
    "nb-NO": "nob_Latn",
 | 
						|
    "nn": "nno_Latn",
 | 
						|
    "nl": "nld_Latn",
 | 
						|
    "pl": "pol_Latn",
 | 
						|
    "pt-BR": "por_Latn",
 | 
						|
    "pt": "por_Latn",
 | 
						|
    "ro": "ron_Latn",
 | 
						|
    "ru": "rus_Cyrl",
 | 
						|
    "sk": "slk_Latn",
 | 
						|
    "sl": "slv_Latn",
 | 
						|
    "sr-Cyrl": "srp_Cyrl",
 | 
						|
    "sv": "swe_Latn",
 | 
						|
    "ta": "tam_Taml",
 | 
						|
    "te": "tel_Telu",
 | 
						|
    "th": "tha_Thai",
 | 
						|
    "tr": "tur_Latn",
 | 
						|
    "uk": "ukr_Cyrl",
 | 
						|
    "ur": "urd_Arab",
 | 
						|
    "vi": "vie_Latn",
 | 
						|
    "zh-CN": "zho_Hans",
 | 
						|
    "zh-Hans": "zho_Hans",
 | 
						|
    "zh-TW": "zho_Hant",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
def get_model_source(model_name: str) -> ModelSource | None:
 | 
						|
    cleaned_name = clean_name(model_name)
 | 
						|
 | 
						|
    if cleaned_name in _INSIGHTFACE_MODELS:
 | 
						|
        return ModelSource.INSIGHTFACE
 | 
						|
 | 
						|
    if cleaned_name in _MCLIP_MODELS:
 | 
						|
        return ModelSource.MCLIP
 | 
						|
 | 
						|
    if cleaned_name in _OPENCLIP_MODELS:
 | 
						|
        return ModelSource.OPENCLIP
 | 
						|
 | 
						|
    if cleaned_name in _PADDLE_MODELS:
 | 
						|
        return ModelSource.PADDLE
 | 
						|
 | 
						|
    return None
 |