forked from dark_thunder/immich
refactor(ml): modularization and styling (#2835)
* basic refactor and styling * removed batching * module entrypoint * removed unused imports * model superclass, model cache now in app state * fixed cache dir and enforced abstract method --------- Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
This commit is contained in:
37
machine-learning/app/models/clip.py
Normal file
37
machine-learning/app/models/clip.py
Normal file
@ -0,0 +1,37 @@
|
||||
from pathlib import Path
|
||||
|
||||
from PIL.Image import Image
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
from ..schemas import ModelType
|
||||
from .base import InferenceModel
|
||||
|
||||
|
||||
class CLIPSTEncoder(InferenceModel):
|
||||
_model_type = ModelType.CLIP
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str,
|
||||
cache_dir: Path | None = None,
|
||||
**model_kwargs,
|
||||
):
|
||||
super().__init__(model_name, cache_dir)
|
||||
self.model = SentenceTransformer(
|
||||
self.model_name,
|
||||
cache_folder=self.cache_dir.as_posix(),
|
||||
**model_kwargs,
|
||||
)
|
||||
|
||||
def predict(self, image_or_text: Image | str) -> list[float]:
|
||||
return self.model.encode(image_or_text).tolist()
|
||||
|
||||
|
||||
# stubs to allow different behavior between the two in the future
|
||||
# and handle loading different image and text clip models
|
||||
class CLIPSTVisionEncoder(CLIPSTEncoder):
|
||||
_model_type = ModelType.CLIP_VISION
|
||||
|
||||
|
||||
class CLIPSTTextEncoder(CLIPSTEncoder):
|
||||
_model_type = ModelType.CLIP_TEXT
|
Reference in New Issue
Block a user