fix(ml): load models in separate threads (#4034)

* load models in thread

* set clip mode logs to debug level

* updated tests

* made fixtures slightly less ugly

* moved responses to json file

* formatting
This commit is contained in:
Mert
2023-09-09 05:02:44 -04:00
committed by GitHub
parent f1db257628
commit 258b98c262
9 changed files with 1683 additions and 114 deletions

View File

@ -1,11 +1,11 @@
import json
import pickle
from io import BytesIO
from typing import TypeAlias
from typing import Any, TypeAlias
from unittest import mock
import cv2
import numpy as np
import onnxruntime as ort
import pytest
from fastapi.testclient import TestClient
from PIL import Image
@ -31,23 +31,6 @@ class TestImageClassifier:
{"label": "probably a virus", "score": 0.01},
]
def test_eager_init(self, mocker: MockerFixture) -> None:
mocker.patch.object(ImageClassifier, "download")
mock_load = mocker.patch.object(ImageClassifier, "load")
classifier = ImageClassifier("test_model_name", cache_dir="test_cache", eager=True, test_arg="test_arg")
assert classifier.model_name == "test_model_name"
mock_load.assert_called_once_with(test_arg="test_arg")
def test_lazy_init(self, mocker: MockerFixture) -> None:
mock_download = mocker.patch.object(ImageClassifier, "download")
mock_load = mocker.patch.object(ImageClassifier, "load")
face_model = ImageClassifier("test_model_name", cache_dir="test_cache", eager=False, test_arg="test_arg")
assert face_model.model_name == "test_model_name"
mock_download.assert_called_once_with(test_arg="test_arg")
mock_load.assert_not_called()
def test_min_score(self, pil_image: Image.Image, mocker: MockerFixture) -> None:
mocker.patch.object(ImageClassifier, "load")
classifier = ImageClassifier("test_model_name", min_score=0.0)
@ -74,23 +57,6 @@ class TestImageClassifier:
class TestCLIP:
embedding = np.random.rand(512).astype(np.float32)
def test_eager_init(self, mocker: MockerFixture) -> None:
mocker.patch.object(CLIPEncoder, "download")
mock_load = mocker.patch.object(CLIPEncoder, "load")
clip_model = CLIPEncoder("ViT-B-32::openai", cache_dir="test_cache", eager=True, test_arg="test_arg")
assert clip_model.model_name == "ViT-B-32::openai"
mock_load.assert_called_once_with(test_arg="test_arg")
def test_lazy_init(self, mocker: MockerFixture) -> None:
mock_download = mocker.patch.object(CLIPEncoder, "download")
mock_load = mocker.patch.object(CLIPEncoder, "load")
clip_model = CLIPEncoder("ViT-B-32::openai", cache_dir="test_cache", eager=False, test_arg="test_arg")
assert clip_model.model_name == "ViT-B-32::openai"
mock_download.assert_called_once_with(test_arg="test_arg")
mock_load.assert_not_called()
def test_basic_image(self, pil_image: Image.Image, mocker: MockerFixture) -> None:
mocker.patch.object(CLIPEncoder, "download")
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
@ -119,23 +85,6 @@ class TestCLIP:
class TestFaceRecognition:
def test_eager_init(self, mocker: MockerFixture) -> None:
mocker.patch.object(FaceRecognizer, "download")
mock_load = mocker.patch.object(FaceRecognizer, "load")
face_model = FaceRecognizer("test_model_name", cache_dir="test_cache", eager=True, test_arg="test_arg")
assert face_model.model_name == "test_model_name"
mock_load.assert_called_once_with(test_arg="test_arg")
def test_lazy_init(self, mocker: MockerFixture) -> None:
mock_download = mocker.patch.object(FaceRecognizer, "download")
mock_load = mocker.patch.object(FaceRecognizer, "load")
face_model = FaceRecognizer("test_model_name", cache_dir="test_cache", eager=False, test_arg="test_arg")
assert face_model.model_name == "test_model_name"
mock_download.assert_called_once_with(test_arg="test_arg")
mock_load.assert_not_called()
def test_set_min_score(self, mocker: MockerFixture) -> None:
mocker.patch.object(FaceRecognizer, "load")
face_recognizer = FaceRecognizer("test_model_name", cache_dir="test_cache", min_score=0.5)
@ -220,45 +169,64 @@ class TestCache:
reason="More time-consuming since it deploys the app and loads models.",
)
class TestEndpoints:
def test_tagging_endpoint(self, pil_image: Image.Image, deployed_app: TestClient) -> None:
def test_tagging_endpoint(
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
) -> None:
byte_image = BytesIO()
pil_image.save(byte_image, format="jpeg")
headers = {"Content-Type": "image/jpg"}
response = deployed_app.post(
"http://localhost:3003/image-classifier/tag-image",
content=byte_image.getvalue(),
headers=headers,
"http://localhost:3003/predict",
data={
"modelName": "microsoft/resnet-50",
"modelType": "image-classification",
"options": json.dumps({"minScore": 0.0}),
},
files={"image": byte_image.getvalue()},
)
assert response.status_code == 200
assert response.json() == responses["image-classification"]
def test_clip_image_endpoint(self, pil_image: Image.Image, deployed_app: TestClient) -> None:
def test_clip_image_endpoint(
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
) -> None:
byte_image = BytesIO()
pil_image.save(byte_image, format="jpeg")
headers = {"Content-Type": "image/jpg"}
response = deployed_app.post(
"http://localhost:3003/sentence-transformer/encode-image",
content=byte_image.getvalue(),
headers=headers,
"http://localhost:3003/predict",
data={"modelName": "ViT-B-32::openai", "modelType": "clip", "options": json.dumps({"mode": "vision"})},
files={"image": byte_image.getvalue()},
)
assert response.status_code == 200
assert response.json() == responses["clip"]["image"]
def test_clip_text_endpoint(self, deployed_app: TestClient) -> None:
def test_clip_text_endpoint(self, responses: dict[str, Any], deployed_app: TestClient) -> None:
response = deployed_app.post(
"http://localhost:3003/sentence-transformer/encode-text",
json={"text": "test search query"},
"http://localhost:3003/predict",
data={
"modelName": "ViT-B-32::openai",
"modelType": "clip",
"text": "test search query",
"options": json.dumps({"mode": "text"}),
},
)
assert response.status_code == 200
assert response.json() == responses["clip"]["text"]
def test_face_endpoint(self, pil_image: Image.Image, deployed_app: TestClient) -> None:
def test_face_endpoint(self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient) -> None:
byte_image = BytesIO()
pil_image.save(byte_image, format="jpeg")
headers = {"Content-Type": "image/jpg"}
response = deployed_app.post(
"http://localhost:3003/facial-recognition/detect-faces",
content=byte_image.getvalue(),
headers=headers,
"http://localhost:3003/predict",
data={
"modelName": "buffalo_l",
"modelType": "facial-recognition",
"options": json.dumps({"minScore": 0.034}),
},
files={"image": byte_image.getvalue()},
)
assert response.status_code == 200
assert response.json() == responses["facial-recognition"]
def test_sess_options() -> None: