feat(ml)!: customizable ML settings (#3891)

* consolidated endpoints, added live configuration

* added ml settings to server

* added settings dashboard

* updated deps, fixed typos

* simplified modelconfig

updated tests

* Added ml setting accordion for admin page

updated tests

* merge `clipText` and `clipVision`

* added face distance setting

clarified setting

* add clip mode in request, dropdown for face models

* polished ml settings

updated descriptions

* update clip field on error

* removed unused import

* add description for image classification threshold

* pin safetensors for arm wheel

updated poetry lock

* moved dto

* set model type only in ml repository

* revert form-data package install

use fetch instead of axios

* added slotted description with link

updated facial recognition description

clarified effect of disabling tasks

* validation before model load

* removed unnecessary getconfig call

* added migration

* updated api

updated api

updated api

---------

Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
This commit is contained in:
Mert
2023-08-29 09:58:00 -04:00
committed by GitHub
parent 22f5e05060
commit bcc36d14a1
56 changed files with 2324 additions and 655 deletions

View File

@ -1,5 +1,6 @@
import os
import zipfile
from io import BytesIO
from typing import Any, Literal
import onnxruntime as ort
@ -8,7 +9,7 @@ from clip_server.model.clip import BICUBIC, _convert_image_to_rgb
from clip_server.model.clip_onnx import _MODELS, _S3_BUCKET_V2, CLIPOnnxModel, download_model
from clip_server.model.pretrained_models import _VISUAL_MODEL_IMAGE_SIZE
from clip_server.model.tokenization import Tokenizer
from PIL.Image import Image
from PIL import Image
from torchvision.transforms import CenterCrop, Compose, Normalize, Resize, ToTensor
from ..schemas import ModelType
@ -74,9 +75,12 @@ class CLIPEncoder(InferenceModel):
image_size = _VISUAL_MODEL_IMAGE_SIZE[CLIPOnnxModel.get_model_name(self.model_name)]
self.transform = _transform_pil_image(image_size)
def _predict(self, image_or_text: Image | str) -> list[float]:
def _predict(self, image_or_text: Image.Image | str) -> list[float]:
if isinstance(image_or_text, bytes):
image_or_text = Image.open(BytesIO(image_or_text))
match image_or_text:
case Image():
case Image.Image():
if self.mode == "text":
raise TypeError("Cannot encode image as text-only model")
pixel_values = self.transform(image_or_text)