mirror of
				https://github.com/immich-app/immich.git
				synced 2025-10-31 20:07:41 +09:00 
			
		
		
		
	feat(ml): coreml (#17718)
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				CodeQL / Analyze (javascript) (push) Has been cancelled
				
			
		
			
				
	
				CodeQL / Analyze (python) (push) Has been cancelled
				
			
		
			
				
	
				Docker / pre-job (push) Has been cancelled
				
			
		
			
				
	
				Docker / Re-Tag ML () (push) Has been cancelled
				
			
		
			
				
	
				Docker / Re-Tag ML (-armnn) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Re-Tag ML (-cuda) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Re-Tag ML (-openvino) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Re-Tag ML (-rknn) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Re-Tag ML (-rocm) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Re-Tag Server () (push) Has been cancelled
				
			
		
			
				
	
				Docker / Build and Push ML (armnn, linux/arm64, -armnn) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Build and Push ML (cpu, ) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Build and Push ML (cuda, linux/amd64, -cuda) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Build and Push ML (openvino, linux/amd64, -openvino) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Build and Push ML (rknn, linux/arm64, -rknn) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Build and Push ML (rocm, linux/amd64, {"linux/amd64": "mich"}, -rocm) (push) Has been cancelled
				
			
		
			
				
	
				Docker / Build and Push Server (push) Has been cancelled
				
			
		
			
				
	
				Docker / Docker Build & Push Server Success (push) Has been cancelled
				
			
		
			
				
	
				Docker / Docker Build & Push ML Success (push) Has been cancelled
				
			
		
			
				
	
				Docs build / pre-job (push) Has been cancelled
				
			
		
			
				
	
				Docs build / Docs Build (push) Has been cancelled
				
			
		
			
				
	
				Zizmor / Zizmor (push) Has been cancelled
				
			
		
			
				
	
				Static Code Analysis / pre-job (push) Has been cancelled
				
			
		
			
				
	
				Static Code Analysis / Run Dart Code Analysis (push) Has been cancelled
				
			
		
			
				
	
				Test / pre-job (push) Has been cancelled
				
			
		
			
				
	
				Test / Test & Lint Server (push) Has been cancelled
				
			
		
			
				
	
				Test / Unit Test CLI (push) Has been cancelled
				
			
		
			
				
	
				Test / Unit Test CLI (Windows) (push) Has been cancelled
				
			
		
			
				
	
				Test / Lint Web (push) Has been cancelled
				
			
		
			
				
	
				Test / Test Web (push) Has been cancelled
				
			
		
			
				
	
				Test / Test i18n (push) Has been cancelled
				
			
		
			
				
	
				Test / End-to-End Lint (push) Has been cancelled
				
			
		
			
				
	
				Test / Medium Tests (Server) (push) Has been cancelled
				
			
		
			
				
	
				Test / End-to-End Tests (Server & CLI) (ubuntu-24.04-arm) (push) Has been cancelled
				
			
		
			
				
	
				Test / End-to-End Tests (Server & CLI) (ubuntu-latest) (push) Has been cancelled
				
			
		
			
				
	
				Test / End-to-End Tests (Web) (ubuntu-24.04-arm) (push) Has been cancelled
				
			
		
			
				
	
				Test / End-to-End Tests (Web) (ubuntu-latest) (push) Has been cancelled
				
			
		
			
				
	
				Test / End-to-End Tests Success (push) Has been cancelled
				
			
		
			
				
	
				Test / Unit Test Mobile (push) Has been cancelled
				
			
		
			
				
	
				Test / Unit Test ML (push) Has been cancelled
				
			
		
			
				
	
				Test / .github Files Formatting (push) Has been cancelled
				
			
		
			
				
	
				Test / ShellCheck (push) Has been cancelled
				
			
		
			
				
	
				Test / OpenAPI Clients (push) Has been cancelled
				
			
		
			
				
	
				Test / SQL Schema Checks (push) Has been cancelled
				
			
		
			
				
	
				CLI Build / CLI Publish (push) Has been cancelled
				
			
		
			
				
	
				CLI Build / Docker (push) Has been cancelled
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	CodeQL / Analyze (javascript) (push) Has been cancelled
				
			CodeQL / Analyze (python) (push) Has been cancelled
				
			Docker / pre-job (push) Has been cancelled
				
			Docker / Re-Tag ML () (push) Has been cancelled
				
			Docker / Re-Tag ML (-armnn) (push) Has been cancelled
				
			Docker / Re-Tag ML (-cuda) (push) Has been cancelled
				
			Docker / Re-Tag ML (-openvino) (push) Has been cancelled
				
			Docker / Re-Tag ML (-rknn) (push) Has been cancelled
				
			Docker / Re-Tag ML (-rocm) (push) Has been cancelled
				
			Docker / Re-Tag Server () (push) Has been cancelled
				
			Docker / Build and Push ML (armnn, linux/arm64, -armnn) (push) Has been cancelled
				
			Docker / Build and Push ML (cpu, ) (push) Has been cancelled
				
			Docker / Build and Push ML (cuda, linux/amd64, -cuda) (push) Has been cancelled
				
			Docker / Build and Push ML (openvino, linux/amd64, -openvino) (push) Has been cancelled
				
			Docker / Build and Push ML (rknn, linux/arm64, -rknn) (push) Has been cancelled
				
			Docker / Build and Push ML (rocm, linux/amd64, {"linux/amd64": "mich"}, -rocm) (push) Has been cancelled
				
			Docker / Build and Push Server (push) Has been cancelled
				
			Docker / Docker Build & Push Server Success (push) Has been cancelled
				
			Docker / Docker Build & Push ML Success (push) Has been cancelled
				
			Docs build / pre-job (push) Has been cancelled
				
			Docs build / Docs Build (push) Has been cancelled
				
			Zizmor / Zizmor (push) Has been cancelled
				
			Static Code Analysis / pre-job (push) Has been cancelled
				
			Static Code Analysis / Run Dart Code Analysis (push) Has been cancelled
				
			Test / pre-job (push) Has been cancelled
				
			Test / Test & Lint Server (push) Has been cancelled
				
			Test / Unit Test CLI (push) Has been cancelled
				
			Test / Unit Test CLI (Windows) (push) Has been cancelled
				
			Test / Lint Web (push) Has been cancelled
				
			Test / Test Web (push) Has been cancelled
				
			Test / Test i18n (push) Has been cancelled
				
			Test / End-to-End Lint (push) Has been cancelled
				
			Test / Medium Tests (Server) (push) Has been cancelled
				
			Test / End-to-End Tests (Server & CLI) (ubuntu-24.04-arm) (push) Has been cancelled
				
			Test / End-to-End Tests (Server & CLI) (ubuntu-latest) (push) Has been cancelled
				
			Test / End-to-End Tests (Web) (ubuntu-24.04-arm) (push) Has been cancelled
				
			Test / End-to-End Tests (Web) (ubuntu-latest) (push) Has been cancelled
				
			Test / End-to-End Tests Success (push) Has been cancelled
				
			Test / Unit Test Mobile (push) Has been cancelled
				
			Test / Unit Test ML (push) Has been cancelled
				
			Test / .github Files Formatting (push) Has been cancelled
				
			Test / ShellCheck (push) Has been cancelled
				
			Test / OpenAPI Clients (push) Has been cancelled
				
			Test / SQL Schema Checks (push) Has been cancelled
				
			CLI Build / CLI Publish (push) Has been cancelled
				
			CLI Build / Docker (push) Has been cancelled
				
			* coreml * add test * use arena by default in native installation * fix tests * add env to docs * remove availability envs
This commit is contained in:
		| @@ -171,6 +171,7 @@ Redis (Sentinel) URL example JSON before encoding: | ||||
| | `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION`       | Set the maximum number of faces that will be processed at once by the facial recognition model      |  None (`1` if using OpenVINO)   | machine learning | | ||||
| | `MACHINE_LEARNING_RKNN`                                     | Enable RKNN hardware acceleration if supported                                                      |             `True`              | machine learning | | ||||
| | `MACHINE_LEARNING_RKNN_THREADS`                             | How many threads of RKNN runtime should be spinned up while inferencing.                            |               `1`               | machine learning | | ||||
| | `MACHINE_LEARNING_MODEL_ARENA`                              | Pre-allocates CPU memory to avoid memory fragmentation                                              |              true               | machine learning | | ||||
|  | ||||
| \*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones. | ||||
|  | ||||
|   | ||||
| @@ -70,7 +70,8 @@ RUN if [ "$DEVICE" = "rocm" ]; then \ | ||||
|  | ||||
| FROM python:3.11-slim-bookworm@sha256:873f91540d53b36327ed4fb018c9669107a4e2a676719720edb4209c4b15d029 AS prod-cpu | ||||
|  | ||||
| ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 | ||||
| ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \ | ||||
|     MACHINE_LEARNING_MODEL_ARENA=false | ||||
|  | ||||
| FROM python:3.11-slim-bookworm@sha256:873f91540d53b36327ed4fb018c9669107a4e2a676719720edb4209c4b15d029 AS prod-openvino | ||||
|  | ||||
| @@ -88,7 +89,8 @@ RUN apt-get update && \ | ||||
|  | ||||
| FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04@sha256:94c1577b2cd9dd6c0312dc04dff9cb2fdce2b268018abc3d7c2dbcacf1155000 AS prod-cuda | ||||
|  | ||||
| ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 | ||||
| ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \ | ||||
|     MACHINE_LEARNING_MODEL_ARENA=false | ||||
|  | ||||
| RUN apt-get update && \ | ||||
|     apt-get install --no-install-recommends -yqq libcudnn9-cuda-12 && \ | ||||
| @@ -104,7 +106,8 @@ FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:1f7e92ca7e3a3785680473329ed1091 | ||||
| FROM prod-cpu AS prod-armnn | ||||
|  | ||||
| ENV LD_LIBRARY_PATH=/opt/armnn \ | ||||
|     LD_PRELOAD=/usr/lib/libmimalloc.so.2 | ||||
|     LD_PRELOAD=/usr/lib/libmimalloc.so.2 \ | ||||
|     MACHINE_LEARNING_MODEL_ARENA=false | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends ocl-icd-libopencl1 mesa-opencl-icd libgomp1 && \ | ||||
|     rm -rf /var/lib/apt/lists/* && \ | ||||
| @@ -127,7 +130,8 @@ FROM prod-cpu AS prod-rknn | ||||
| # renovate: datasource=github-tags depName=airockchip/rknn-toolkit2 | ||||
| ARG RKNN_TOOLKIT_VERSION="v2.3.0" | ||||
|  | ||||
| ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 | ||||
| ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \ | ||||
|     MACHINE_LEARNING_MODEL_ARENA=false | ||||
|  | ||||
| ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 "https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/${RKNN_TOOLKIT_VERSION}/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so" /usr/lib/ | ||||
|  | ||||
|   | ||||
| @@ -61,6 +61,7 @@ class Settings(BaseSettings): | ||||
|     request_threads: int = os.cpu_count() or 4 | ||||
|     model_inter_op_threads: int = 0 | ||||
|     model_intra_op_threads: int = 0 | ||||
|     model_arena: bool = True | ||||
|     ann: bool = True | ||||
|     ann_fp16_turbo: bool = False | ||||
|     ann_tuning_level: int = 2 | ||||
|   | ||||
| @@ -79,6 +79,7 @@ SUPPORTED_PROVIDERS = [ | ||||
|     "CUDAExecutionProvider", | ||||
|     "ROCMExecutionProvider", | ||||
|     "OpenVINOExecutionProvider", | ||||
|     "CoreMLExecutionProvider", | ||||
|     "CPUExecutionProvider", | ||||
| ] | ||||
|  | ||||
|   | ||||
| @@ -96,6 +96,14 @@ class OrtSession: | ||||
|                         "precision": "FP32", | ||||
|                         "cache_dir": (self.model_path.parent / "openvino").as_posix(), | ||||
|                     } | ||||
|                 case "CoreMLExecutionProvider": | ||||
|                     options = { | ||||
|                         "ModelFormat": "MLProgram", | ||||
|                         "MLComputeUnits": "ALL", | ||||
|                         "SpecializationStrategy": "FastPrediction", | ||||
|                         "AllowLowPrecisionAccumulationOnGPU": "1", | ||||
|                         "ModelCacheDirectory": (self.model_path.parent / "coreml").as_posix(), | ||||
|                     } | ||||
|                 case _: | ||||
|                     options = {} | ||||
|             provider_options.append(options) | ||||
| @@ -115,7 +123,7 @@ class OrtSession: | ||||
|     @property | ||||
|     def _sess_options_default(self) -> ort.SessionOptions: | ||||
|         sess_options = ort.SessionOptions() | ||||
|         sess_options.enable_cpu_mem_arena = False | ||||
|         sess_options.enable_cpu_mem_arena = settings.model_arena | ||||
|  | ||||
|         # avoid thread contention between models | ||||
|         if settings.model_inter_op_threads > 0: | ||||
|   | ||||
| @@ -180,6 +180,7 @@ class TestOrtSession: | ||||
|     CUDA_EP_OUT_OF_ORDER = ["CPUExecutionProvider", "CUDAExecutionProvider"] | ||||
|     TRT_EP = ["TensorrtExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"] | ||||
|     ROCM_EP = ["ROCMExecutionProvider", "CPUExecutionProvider"] | ||||
|     COREML_EP = ["CoreMLExecutionProvider", "CPUExecutionProvider"] | ||||
|  | ||||
|     @pytest.mark.providers(CPU_EP) | ||||
|     def test_sets_cpu_provider(self, providers: list[str]) -> None: | ||||
| @@ -225,6 +226,12 @@ class TestOrtSession: | ||||
|  | ||||
|         assert session.providers == self.ROCM_EP | ||||
|  | ||||
|     @pytest.mark.providers(COREML_EP) | ||||
|     def test_uses_coreml(self, providers: list[str]) -> None: | ||||
|         session = OrtSession("ViT-B-32__openai") | ||||
|  | ||||
|         assert session.providers == self.COREML_EP | ||||
|  | ||||
|     def test_sets_provider_kwarg(self) -> None: | ||||
|         providers = ["CUDAExecutionProvider"] | ||||
|         session = OrtSession("ViT-B-32__openai", providers=providers) | ||||
| @@ -284,7 +291,6 @@ class TestOrtSession: | ||||
|         assert session.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL | ||||
|         assert session.sess_options.inter_op_num_threads == 1 | ||||
|         assert session.sess_options.intra_op_num_threads == 2 | ||||
|         assert session.sess_options.enable_cpu_mem_arena is False | ||||
|  | ||||
|     def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None: | ||||
|         session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) | ||||
| @@ -302,6 +308,26 @@ class TestOrtSession: | ||||
|         assert session.sess_options.inter_op_num_threads == 2 | ||||
|         assert session.sess_options.intra_op_num_threads == 4 | ||||
|  | ||||
|     def test_uses_arena_if_enabled(self, mocker: MockerFixture) -> None: | ||||
|         mock_settings = mocker.patch("immich_ml.sessions.ort.settings", autospec=True) | ||||
|         mock_settings.model_inter_op_threads = 0 | ||||
|         mock_settings.model_intra_op_threads = 0 | ||||
|         mock_settings.model_arena = True | ||||
|  | ||||
|         session = OrtSession("ViT-B-32__openai", providers=["CPUExecutionProvider"]) | ||||
|  | ||||
|         assert session.sess_options.enable_cpu_mem_arena | ||||
|  | ||||
|     def test_does_not_use_arena_if_disabled(self, mocker: MockerFixture) -> None: | ||||
|         mock_settings = mocker.patch("immich_ml.sessions.ort.settings", autospec=True) | ||||
|         mock_settings.model_inter_op_threads = 0 | ||||
|         mock_settings.model_intra_op_threads = 0 | ||||
|         mock_settings.model_arena = False | ||||
|  | ||||
|         session = OrtSession("ViT-B-32__openai", providers=["CPUExecutionProvider"]) | ||||
|  | ||||
|         assert not session.sess_options.enable_cpu_mem_arena | ||||
|  | ||||
|     def test_sets_sess_options_kwarg(self) -> None: | ||||
|         sess_options = ort.SessionOptions() | ||||
|         session = OrtSession( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user