forked from dark_thunder/immich
chore(deps): update machine-learning (#6302)
* chore(deps): update machine-learning * fix typing, use new lifespan syntax * wrap in try / finally * move log --------- Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
@ -1,4 +1,4 @@
|
|||||||
FROM python:3.11-bookworm@sha256:291405e32318285d8913b7b03293777c255fb1e89305c82aa495ac747b0049fe as builder
|
FROM python:3.11-bookworm@sha256:497c00ec2cff14316a6859c4e30fc88e7ab1f11dd254fb43b8f4b201ca657596 as builder
|
||||||
|
|
||||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
PYTHONUNBUFFERED=1 \
|
PYTHONUNBUFFERED=1 \
|
||||||
@ -27,7 +27,7 @@ RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
FROM python:3.11-slim-bookworm@sha256:8f64a67710f3d981cf3008d6f9f1dbe61accd7927f165f4e37ea3f8b883ccc3f
|
FROM python:3.11-slim-bookworm@sha256:637774748f62b832dc11e7b286e48cd716727ed04b45a0322776c01bc526afc3
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends tini libmimalloc2.0 && rm -rf /var/lib/apt/lists/*
|
RUN apt-get update && apt-get install -y --no-install-recommends tini libmimalloc2.0 && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from ctypes import CDLL, Array, c_bool, c_char_p, c_int, c_ulong, c_void_p
|
from ctypes import CDLL, Array, c_bool, c_char_p, c_int, c_ulong, c_void_p
|
||||||
from os.path import exists
|
from os.path import exists
|
||||||
from typing import Any, Generic, Protocol, Type, TypeVar
|
from typing import Any, Protocol, TypeVar
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from numpy.typing import NDArray
|
from numpy.typing import NDArray
|
||||||
|
@ -5,10 +5,10 @@ from unittest import mock
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
|
from numpy.typing import NDArray
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from .main import app
|
from .main import app
|
||||||
from .schemas import ndarray_f32
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@ -17,7 +17,7 @@ def pil_image() -> Image.Image:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def cv_image(pil_image: Image.Image) -> ndarray_f32:
|
def cv_image(pil_image: Image.Image) -> NDArray[np.float32]:
|
||||||
return np.asarray(pil_image)[:, :, ::-1] # PIL uses RGB while cv2 uses BGR
|
return np.asarray(pil_image)[:, :, ::-1] # PIL uses RGB while cv2 uses BGR
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,11 +2,11 @@ import asyncio
|
|||||||
import gc
|
import gc
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import sys
|
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from typing import Any, Iterator
|
from contextlib import asynccontextmanager
|
||||||
|
from typing import Any, AsyncGenerator, Iterator
|
||||||
from zipfile import BadZipFile
|
from zipfile import BadZipFile
|
||||||
|
|
||||||
import orjson
|
import orjson
|
||||||
@ -26,7 +26,6 @@ from .schemas import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
MultiPartParser.max_file_size = 2**26 # spools to disk if payload is 64 MiB or larger
|
MultiPartParser.max_file_size = 2**26 # spools to disk if payload is 64 MiB or larger
|
||||||
app = FastAPI()
|
|
||||||
|
|
||||||
model_cache = ModelCache(ttl=settings.model_ttl, revalidate=settings.model_ttl > 0)
|
model_cache = ModelCache(ttl=settings.model_ttl, revalidate=settings.model_ttl > 0)
|
||||||
thread_pool: ThreadPoolExecutor | None = None
|
thread_pool: ThreadPoolExecutor | None = None
|
||||||
@ -35,8 +34,8 @@ active_requests = 0
|
|||||||
last_called: float | None = None
|
last_called: float | None = None
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
@asynccontextmanager
|
||||||
def startup() -> None:
|
async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
|
||||||
global thread_pool
|
global thread_pool
|
||||||
log.info(
|
log.info(
|
||||||
(
|
(
|
||||||
@ -44,15 +43,16 @@ def startup() -> None:
|
|||||||
f"{f'after {settings.model_ttl}s of inactivity' if settings.model_ttl > 0 else 'disabled'}."
|
f"{f'after {settings.model_ttl}s of inactivity' if settings.model_ttl > 0 else 'disabled'}."
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if settings.request_threads > 0:
|
||||||
# asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
|
# asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
|
||||||
thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
|
thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
|
||||||
|
log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
|
||||||
if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
|
if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
|
||||||
asyncio.ensure_future(idle_shutdown_task())
|
asyncio.ensure_future(idle_shutdown_task())
|
||||||
log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
|
yield
|
||||||
|
finally:
|
||||||
|
|
||||||
@app.on_event("shutdown")
|
|
||||||
def shutdown() -> None:
|
|
||||||
log.handlers.clear()
|
log.handlers.clear()
|
||||||
for model in model_cache.cache._cache.values():
|
for model in model_cache.cache._cache.values():
|
||||||
del model
|
del model
|
||||||
@ -71,6 +71,9 @@ def update_state() -> Iterator[None]:
|
|||||||
active_requests -= 1
|
active_requests -= 1
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/", response_model=MessageResponse)
|
@app.get("/", response_model=MessageResponse)
|
||||||
async def root() -> dict[str, str]:
|
async def root() -> dict[str, str]:
|
||||||
return {"message": "Immich ML"}
|
return {"message": "Immich ML"}
|
||||||
|
@ -3,10 +3,10 @@ from __future__ import annotations
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, NamedTuple
|
from typing import Any, NamedTuple
|
||||||
|
|
||||||
from numpy import ascontiguousarray
|
import numpy as np
|
||||||
|
from numpy.typing import NDArray
|
||||||
|
|
||||||
from ann.ann import Ann
|
from ann.ann import Ann
|
||||||
from app.schemas import ndarray_f32, ndarray_i32
|
|
||||||
|
|
||||||
from ..config import log, settings
|
from ..config import log, settings
|
||||||
|
|
||||||
@ -56,10 +56,10 @@ class AnnSession:
|
|||||||
def run(
|
def run(
|
||||||
self,
|
self,
|
||||||
output_names: list[str] | None,
|
output_names: list[str] | None,
|
||||||
input_feed: dict[str, ndarray_f32] | dict[str, ndarray_i32],
|
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
|
||||||
run_options: Any = None,
|
run_options: Any = None,
|
||||||
) -> list[ndarray_f32]:
|
) -> list[NDArray[np.float32]]:
|
||||||
inputs: list[ndarray_f32] = [ascontiguousarray(v) for v in input_feed.values()]
|
inputs: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
|
||||||
return self.ann.execute(self.model, inputs)
|
return self.ann.execute(self.model, inputs)
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,12 +6,13 @@ from pathlib import Path
|
|||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from numpy.typing import NDArray
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from tokenizers import Encoding, Tokenizer
|
from tokenizers import Encoding, Tokenizer
|
||||||
|
|
||||||
from app.config import clean_name, log
|
from app.config import clean_name, log
|
||||||
from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy
|
from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy
|
||||||
from app.schemas import ModelType, ndarray_f32, ndarray_i32
|
from app.schemas import ModelType
|
||||||
|
|
||||||
from .base import InferenceModel
|
from .base import InferenceModel
|
||||||
|
|
||||||
@ -40,7 +41,7 @@ class BaseCLIPEncoder(InferenceModel):
|
|||||||
self.vision_model = self._make_session(self.visual_path)
|
self.vision_model = self._make_session(self.visual_path)
|
||||||
log.debug(f"Loaded clip vision model '{self.model_name}'")
|
log.debug(f"Loaded clip vision model '{self.model_name}'")
|
||||||
|
|
||||||
def _predict(self, image_or_text: Image.Image | str) -> ndarray_f32:
|
def _predict(self, image_or_text: Image.Image | str) -> NDArray[np.float32]:
|
||||||
if isinstance(image_or_text, bytes):
|
if isinstance(image_or_text, bytes):
|
||||||
image_or_text = Image.open(BytesIO(image_or_text))
|
image_or_text = Image.open(BytesIO(image_or_text))
|
||||||
|
|
||||||
@ -48,7 +49,7 @@ class BaseCLIPEncoder(InferenceModel):
|
|||||||
case Image.Image():
|
case Image.Image():
|
||||||
if self.mode == "text":
|
if self.mode == "text":
|
||||||
raise TypeError("Cannot encode image as text-only model")
|
raise TypeError("Cannot encode image as text-only model")
|
||||||
outputs: ndarray_f32 = self.vision_model.run(None, self.transform(image_or_text))[0][0]
|
outputs: NDArray[np.float32] = self.vision_model.run(None, self.transform(image_or_text))[0][0]
|
||||||
case str():
|
case str():
|
||||||
if self.mode == "vision":
|
if self.mode == "vision":
|
||||||
raise TypeError("Cannot encode text as vision-only model")
|
raise TypeError("Cannot encode text as vision-only model")
|
||||||
@ -59,11 +60,11 @@ class BaseCLIPEncoder(InferenceModel):
|
|||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
|
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
|
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -161,11 +162,11 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
|
|||||||
self.tokenizer.enable_truncation(max_length=context_length)
|
self.tokenizer.enable_truncation(max_length=context_length)
|
||||||
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
|
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
|
||||||
|
|
||||||
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
|
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
|
||||||
tokens: Encoding = self.tokenizer.encode(text)
|
tokens: Encoding = self.tokenizer.encode(text)
|
||||||
return {"text": np.array([tokens.ids], dtype=np.int32)}
|
return {"text": np.array([tokens.ids], dtype=np.int32)}
|
||||||
|
|
||||||
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
|
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
|
||||||
image = resize(image, self.size)
|
image = resize(image, self.size)
|
||||||
image = crop(image, self.size)
|
image = crop(image, self.size)
|
||||||
image_np = to_numpy(image)
|
image_np = to_numpy(image)
|
||||||
@ -174,7 +175,7 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
|
|||||||
|
|
||||||
|
|
||||||
class MCLIPEncoder(OpenCLIPEncoder):
|
class MCLIPEncoder(OpenCLIPEncoder):
|
||||||
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
|
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
|
||||||
tokens: Encoding = self.tokenizer.encode(text)
|
tokens: Encoding = self.tokenizer.encode(text)
|
||||||
return {
|
return {
|
||||||
"input_ids": np.array([tokens.ids], dtype=np.int32),
|
"input_ids": np.array([tokens.ids], dtype=np.int32),
|
||||||
|
@ -5,9 +5,10 @@ import cv2
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from insightface.model_zoo import ArcFaceONNX, RetinaFace
|
from insightface.model_zoo import ArcFaceONNX, RetinaFace
|
||||||
from insightface.utils.face_align import norm_crop
|
from insightface.utils.face_align import norm_crop
|
||||||
|
from numpy.typing import NDArray
|
||||||
|
|
||||||
from app.config import clean_name
|
from app.config import clean_name
|
||||||
from app.schemas import BoundingBox, Face, ModelType, ndarray_f32
|
from app.schemas import Face, ModelType, is_ndarray
|
||||||
|
|
||||||
from .base import InferenceModel
|
from .base import InferenceModel
|
||||||
|
|
||||||
@ -36,22 +37,25 @@ class FaceRecognizer(InferenceModel):
|
|||||||
)
|
)
|
||||||
self.rec_model.prepare(ctx_id=0)
|
self.rec_model.prepare(ctx_id=0)
|
||||||
|
|
||||||
def _predict(self, image: ndarray_f32 | bytes) -> list[Face]:
|
def _predict(self, image: NDArray[np.uint8] | bytes) -> list[Face]:
|
||||||
if isinstance(image, bytes):
|
if isinstance(image, bytes):
|
||||||
image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
|
decoded_image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
|
||||||
bboxes, kpss = self.det_model.detect(image)
|
else:
|
||||||
|
decoded_image = image
|
||||||
|
assert is_ndarray(decoded_image, np.uint8)
|
||||||
|
bboxes, kpss = self.det_model.detect(decoded_image)
|
||||||
if bboxes.size == 0:
|
if bboxes.size == 0:
|
||||||
return []
|
return []
|
||||||
assert isinstance(image, np.ndarray) and isinstance(kpss, np.ndarray)
|
assert is_ndarray(kpss, np.float32)
|
||||||
|
|
||||||
scores = bboxes[:, 4].tolist()
|
scores = bboxes[:, 4].tolist()
|
||||||
bboxes = bboxes[:, :4].round().tolist()
|
bboxes = bboxes[:, :4].round().tolist()
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
height, width, _ = image.shape
|
height, width, _ = decoded_image.shape
|
||||||
for (x1, y1, x2, y2), score, kps in zip(bboxes, scores, kpss):
|
for (x1, y1, x2, y2), score, kps in zip(bboxes, scores, kpss):
|
||||||
cropped_img = norm_crop(image, kps)
|
cropped_img = norm_crop(decoded_image, kps)
|
||||||
embedding: ndarray_f32 = self.rec_model.get_feat(cropped_img)[0]
|
embedding: NDArray[np.float32] = self.rec_model.get_feat(cropped_img)[0]
|
||||||
face: Face = {
|
face: Face = {
|
||||||
"imageWidth": width,
|
"imageWidth": width,
|
||||||
"imageHeight": height,
|
"imageHeight": height,
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
from numpy.typing import NDArray
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from app.schemas import ndarray_f32
|
|
||||||
|
|
||||||
_PIL_RESAMPLING_METHODS = {resampling.name.lower(): resampling for resampling in Image.Resampling}
|
_PIL_RESAMPLING_METHODS = {resampling.name.lower(): resampling for resampling in Image.Resampling}
|
||||||
|
|
||||||
|
|
||||||
@ -23,11 +22,13 @@ def crop(img: Image.Image, size: int) -> Image.Image:
|
|||||||
return img.crop((left, upper, right, lower))
|
return img.crop((left, upper, right, lower))
|
||||||
|
|
||||||
|
|
||||||
def to_numpy(img: Image.Image) -> ndarray_f32:
|
def to_numpy(img: Image.Image) -> NDArray[np.float32]:
|
||||||
return np.asarray(img.convert("RGB")).astype(np.float32) / 255.0
|
return np.asarray(img.convert("RGB")).astype(np.float32) / 255.0
|
||||||
|
|
||||||
|
|
||||||
def normalize(img: ndarray_f32, mean: float | ndarray_f32, std: float | ndarray_f32) -> ndarray_f32:
|
def normalize(
|
||||||
|
img: NDArray[np.float32], mean: float | NDArray[np.float32], std: float | NDArray[np.float32]
|
||||||
|
) -> NDArray[np.float32]:
|
||||||
return (img - mean) / std
|
return (img - mean) / std
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,13 +1,10 @@
|
|||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
from typing import Any, Protocol, TypeAlias, TypedDict, TypeGuard
|
from typing import Any, Protocol, TypedDict, TypeGuard
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import numpy.typing as npt
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
ndarray_f32: TypeAlias = np.ndarray[int, np.dtype[np.float32]]
|
|
||||||
ndarray_i64: TypeAlias = np.ndarray[int, np.dtype[np.int64]]
|
|
||||||
ndarray_i32: TypeAlias = np.ndarray[int, np.dtype[np.int32]]
|
|
||||||
|
|
||||||
|
|
||||||
class TextResponse(BaseModel):
|
class TextResponse(BaseModel):
|
||||||
__root__: str
|
__root__: str
|
||||||
@ -35,7 +32,7 @@ class HasProfiling(Protocol):
|
|||||||
|
|
||||||
class Face(TypedDict):
|
class Face(TypedDict):
|
||||||
boundingBox: BoundingBox
|
boundingBox: BoundingBox
|
||||||
embedding: ndarray_f32
|
embedding: npt.NDArray[np.float32]
|
||||||
imageWidth: int
|
imageWidth: int
|
||||||
imageHeight: int
|
imageHeight: int
|
||||||
score: float
|
score: float
|
||||||
@ -43,3 +40,7 @@ class Face(TypedDict):
|
|||||||
|
|
||||||
def has_profiling(obj: Any) -> TypeGuard[HasProfiling]:
|
def has_profiling(obj: Any) -> TypeGuard[HasProfiling]:
|
||||||
return hasattr(obj, "profiling") and isinstance(obj.profiling, dict)
|
return hasattr(obj, "profiling") and isinstance(obj.profiling, dict)
|
||||||
|
|
||||||
|
|
||||||
|
def is_ndarray(obj: Any, dtype: "type[np._DTypeScalar_co]") -> "TypeGuard[npt.NDArray[np._DTypeScalar_co]]":
|
||||||
|
return isinstance(obj, np.ndarray) and obj.dtype == dtype
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
FROM mambaorg/micromamba:bookworm-slim@sha256:a7033851ca71454ed8cfbc958fdd62a72e6fa90f71ccb3aa9b15e67a98f66a1e as builder
|
FROM mambaorg/micromamba:bookworm-slim@sha256:e2586bd4097636856cbf725304ce240c968bea4123bece888d9e02ca2572ea2d as builder
|
||||||
|
|
||||||
ENV NODE_ENV=production \
|
ENV NODE_ENV=production \
|
||||||
TRANSFORMERS_CACHE=/cache \
|
TRANSFORMERS_CACHE=/cache \
|
||||||
|
325
machine-learning/poetry.lock
generated
325
machine-learning/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -60,7 +60,6 @@ warn_untyped_fields = true
|
|||||||
line-length = 120
|
line-length = 120
|
||||||
target-version = "py311"
|
target-version = "py311"
|
||||||
select = ["E", "F", "I"]
|
select = ["E", "F", "I"]
|
||||||
ignore = ["F401"]
|
|
||||||
|
|
||||||
[tool.ruff.per-file-ignores]
|
[tool.ruff.per-file-ignores]
|
||||||
"test_main.py" = ["F403"]
|
"test_main.py" = ["F403"]
|
||||||
|
Reference in New Issue
Block a user