chore(deps): update machine-learning (#6302)

* chore(deps): update machine-learning

* fix typing, use new lifespan syntax

* wrap in try / finally

* move log

---------

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
renovate[bot]
2024-01-13 05:00:09 +00:00
committed by GitHub
parent bd5ae9f31e
commit 20be42cec0
12 changed files with 238 additions and 212 deletions

View File

@ -1,4 +1,4 @@
FROM python:3.11-bookworm@sha256:291405e32318285d8913b7b03293777c255fb1e89305c82aa495ac747b0049fe as builder
FROM python:3.11-bookworm@sha256:497c00ec2cff14316a6859c4e30fc88e7ab1f11dd254fb43b8f4b201ca657596 as builder
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
@ -27,7 +27,7 @@ RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
fi
FROM python:3.11-slim-bookworm@sha256:8f64a67710f3d981cf3008d6f9f1dbe61accd7927f165f4e37ea3f8b883ccc3f
FROM python:3.11-slim-bookworm@sha256:637774748f62b832dc11e7b286e48cd716727ed04b45a0322776c01bc526afc3
ARG TARGETPLATFORM
RUN apt-get update && apt-get install -y --no-install-recommends tini libmimalloc2.0 && rm -rf /var/lib/apt/lists/*

View File

@ -2,7 +2,7 @@ from __future__ import annotations
from ctypes import CDLL, Array, c_bool, c_char_p, c_int, c_ulong, c_void_p
from os.path import exists
from typing import Any, Generic, Protocol, Type, TypeVar
from typing import Any, Protocol, TypeVar
import numpy as np
from numpy.typing import NDArray

View File

@ -5,10 +5,10 @@ from unittest import mock
import numpy as np
import pytest
from fastapi.testclient import TestClient
from numpy.typing import NDArray
from PIL import Image
from .main import app
from .schemas import ndarray_f32
@pytest.fixture
@ -17,7 +17,7 @@ def pil_image() -> Image.Image:
@pytest.fixture
def cv_image(pil_image: Image.Image) -> ndarray_f32:
def cv_image(pil_image: Image.Image) -> NDArray[np.float32]:
return np.asarray(pil_image)[:, :, ::-1] # PIL uses RGB while cv2 uses BGR

View File

@ -2,11 +2,11 @@ import asyncio
import gc
import os
import signal
import sys
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Iterator
from contextlib import asynccontextmanager
from typing import Any, AsyncGenerator, Iterator
from zipfile import BadZipFile
import orjson
@ -26,7 +26,6 @@ from .schemas import (
)
MultiPartParser.max_file_size = 2**26 # spools to disk if payload is 64 MiB or larger
app = FastAPI()
model_cache = ModelCache(ttl=settings.model_ttl, revalidate=settings.model_ttl > 0)
thread_pool: ThreadPoolExecutor | None = None
@ -35,8 +34,8 @@ active_requests = 0
last_called: float | None = None
@app.on_event("startup")
def startup() -> None:
@asynccontextmanager
async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
global thread_pool
log.info(
(
@ -44,15 +43,16 @@ def startup() -> None:
f"{f'after {settings.model_ttl}s of inactivity' if settings.model_ttl > 0 else 'disabled'}."
)
)
try:
if settings.request_threads > 0:
# asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
asyncio.ensure_future(idle_shutdown_task())
log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
@app.on_event("shutdown")
def shutdown() -> None:
yield
finally:
log.handlers.clear()
for model in model_cache.cache._cache.values():
del model
@ -71,6 +71,9 @@ def update_state() -> Iterator[None]:
active_requests -= 1
app = FastAPI(lifespan=lifespan)
@app.get("/", response_model=MessageResponse)
async def root() -> dict[str, str]:
return {"message": "Immich ML"}

View File

@ -3,10 +3,10 @@ from __future__ import annotations
from pathlib import Path
from typing import Any, NamedTuple
from numpy import ascontiguousarray
import numpy as np
from numpy.typing import NDArray
from ann.ann import Ann
from app.schemas import ndarray_f32, ndarray_i32
from ..config import log, settings
@ -56,10 +56,10 @@ class AnnSession:
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, ndarray_f32] | dict[str, ndarray_i32],
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[ndarray_f32]:
inputs: list[ndarray_f32] = [ascontiguousarray(v) for v in input_feed.values()]
) -> list[NDArray[np.float32]]:
inputs: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
return self.ann.execute(self.model, inputs)

View File

@ -6,12 +6,13 @@ from pathlib import Path
from typing import Any, Literal
import numpy as np
from numpy.typing import NDArray
from PIL import Image
from tokenizers import Encoding, Tokenizer
from app.config import clean_name, log
from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy
from app.schemas import ModelType, ndarray_f32, ndarray_i32
from app.schemas import ModelType
from .base import InferenceModel
@ -40,7 +41,7 @@ class BaseCLIPEncoder(InferenceModel):
self.vision_model = self._make_session(self.visual_path)
log.debug(f"Loaded clip vision model '{self.model_name}'")
def _predict(self, image_or_text: Image.Image | str) -> ndarray_f32:
def _predict(self, image_or_text: Image.Image | str) -> NDArray[np.float32]:
if isinstance(image_or_text, bytes):
image_or_text = Image.open(BytesIO(image_or_text))
@ -48,7 +49,7 @@ class BaseCLIPEncoder(InferenceModel):
case Image.Image():
if self.mode == "text":
raise TypeError("Cannot encode image as text-only model")
outputs: ndarray_f32 = self.vision_model.run(None, self.transform(image_or_text))[0][0]
outputs: NDArray[np.float32] = self.vision_model.run(None, self.transform(image_or_text))[0][0]
case str():
if self.mode == "vision":
raise TypeError("Cannot encode text as vision-only model")
@ -59,11 +60,11 @@ class BaseCLIPEncoder(InferenceModel):
return outputs
@abstractmethod
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
pass
@abstractmethod
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
pass
@property
@ -161,11 +162,11 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
self.tokenizer.enable_truncation(max_length=context_length)
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
tokens: Encoding = self.tokenizer.encode(text)
return {"text": np.array([tokens.ids], dtype=np.int32)}
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
image = resize(image, self.size)
image = crop(image, self.size)
image_np = to_numpy(image)
@ -174,7 +175,7 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
class MCLIPEncoder(OpenCLIPEncoder):
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
tokens: Encoding = self.tokenizer.encode(text)
return {
"input_ids": np.array([tokens.ids], dtype=np.int32),

View File

@ -5,9 +5,10 @@ import cv2
import numpy as np
from insightface.model_zoo import ArcFaceONNX, RetinaFace
from insightface.utils.face_align import norm_crop
from numpy.typing import NDArray
from app.config import clean_name
from app.schemas import BoundingBox, Face, ModelType, ndarray_f32
from app.schemas import Face, ModelType, is_ndarray
from .base import InferenceModel
@ -36,22 +37,25 @@ class FaceRecognizer(InferenceModel):
)
self.rec_model.prepare(ctx_id=0)
def _predict(self, image: ndarray_f32 | bytes) -> list[Face]:
def _predict(self, image: NDArray[np.uint8] | bytes) -> list[Face]:
if isinstance(image, bytes):
image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
bboxes, kpss = self.det_model.detect(image)
decoded_image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
else:
decoded_image = image
assert is_ndarray(decoded_image, np.uint8)
bboxes, kpss = self.det_model.detect(decoded_image)
if bboxes.size == 0:
return []
assert isinstance(image, np.ndarray) and isinstance(kpss, np.ndarray)
assert is_ndarray(kpss, np.float32)
scores = bboxes[:, 4].tolist()
bboxes = bboxes[:, :4].round().tolist()
results = []
height, width, _ = image.shape
height, width, _ = decoded_image.shape
for (x1, y1, x2, y2), score, kps in zip(bboxes, scores, kpss):
cropped_img = norm_crop(image, kps)
embedding: ndarray_f32 = self.rec_model.get_feat(cropped_img)[0]
cropped_img = norm_crop(decoded_image, kps)
embedding: NDArray[np.float32] = self.rec_model.get_feat(cropped_img)[0]
face: Face = {
"imageWidth": width,
"imageHeight": height,

View File

@ -1,8 +1,7 @@
import numpy as np
from numpy.typing import NDArray
from PIL import Image
from app.schemas import ndarray_f32
_PIL_RESAMPLING_METHODS = {resampling.name.lower(): resampling for resampling in Image.Resampling}
@ -23,11 +22,13 @@ def crop(img: Image.Image, size: int) -> Image.Image:
return img.crop((left, upper, right, lower))
def to_numpy(img: Image.Image) -> ndarray_f32:
def to_numpy(img: Image.Image) -> NDArray[np.float32]:
return np.asarray(img.convert("RGB")).astype(np.float32) / 255.0
def normalize(img: ndarray_f32, mean: float | ndarray_f32, std: float | ndarray_f32) -> ndarray_f32:
def normalize(
img: NDArray[np.float32], mean: float | NDArray[np.float32], std: float | NDArray[np.float32]
) -> NDArray[np.float32]:
return (img - mean) / std

View File

@ -1,13 +1,10 @@
from enum import StrEnum
from typing import Any, Protocol, TypeAlias, TypedDict, TypeGuard
from typing import Any, Protocol, TypedDict, TypeGuard
import numpy as np
import numpy.typing as npt
from pydantic import BaseModel
ndarray_f32: TypeAlias = np.ndarray[int, np.dtype[np.float32]]
ndarray_i64: TypeAlias = np.ndarray[int, np.dtype[np.int64]]
ndarray_i32: TypeAlias = np.ndarray[int, np.dtype[np.int32]]
class TextResponse(BaseModel):
__root__: str
@ -35,7 +32,7 @@ class HasProfiling(Protocol):
class Face(TypedDict):
boundingBox: BoundingBox
embedding: ndarray_f32
embedding: npt.NDArray[np.float32]
imageWidth: int
imageHeight: int
score: float
@ -43,3 +40,7 @@ class Face(TypedDict):
def has_profiling(obj: Any) -> TypeGuard[HasProfiling]:
return hasattr(obj, "profiling") and isinstance(obj.profiling, dict)
def is_ndarray(obj: Any, dtype: "type[np._DTypeScalar_co]") -> "TypeGuard[npt.NDArray[np._DTypeScalar_co]]":
return isinstance(obj, np.ndarray) and obj.dtype == dtype

View File

@ -1,4 +1,4 @@
FROM mambaorg/micromamba:bookworm-slim@sha256:a7033851ca71454ed8cfbc958fdd62a72e6fa90f71ccb3aa9b15e67a98f66a1e as builder
FROM mambaorg/micromamba:bookworm-slim@sha256:e2586bd4097636856cbf725304ce240c968bea4123bece888d9e02ca2572ea2d as builder
ENV NODE_ENV=production \
TRANSFORMERS_CACHE=/cache \

File diff suppressed because it is too large Load Diff

View File

@ -60,7 +60,6 @@ warn_untyped_fields = true
line-length = 120
target-version = "py311"
select = ["E", "F", "I"]
ignore = ["F401"]
[tool.ruff.per-file-ignores]
"test_main.py" = ["F403"]