diff --git a/nbs/serving.ipynb b/nbs/serving.ipynb index 830d5cc..433b736 100644 --- a/nbs/serving.ipynb +++ b/nbs/serving.ipynb @@ -2,333 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-11-01 11:26:11.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mxinfer.core\u001b[0m:\u001b[36mlist_models\u001b[0m:\u001b[36m55\u001b[0m - \u001b[1mShowing interactive table in Jupyter Notebook. Type in the search bar to filter the models.\u001b[0m\n" - ] - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - "\n", - "\n", - "This is the init_notebook_mode cell from ITables v2.2.2\n", - "(you should not see this message - is your notebook trusted?)\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " Implementation\n", - " Model ID\n", - " Input --> Output\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - "\n", - "\n", - "Loading ITables v2.2.2 from the init_notebook_mode cell...\n", - "(need help?)\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - " Implementation Model ID \\\n", - "0 timm timm/bat_resnext26ts.ch_in1k \n", - "1 timm timm/beit_base_patch16_224.in22k_ft_in22k_in1k \n", - "2 timm timm/beit_base_patch16_384.in22k_ft_in22k_in1k \n", - "3 timm timm/beit_large_patch16_224.in22k_ft_in22k_in1k \n", - "4 timm timm/beit_large_patch16_384.in22k_ft_in22k_in1k \n", - "... ... ... \n", - "1291 vllm vllm/allenai/Molmo-7B-D-0924 \n", - "1292 vllm vllm/allenai/Molmo-7B-O-0924 \n", - "1293 vllm vllm/allenai/Molmo-72B-0924 \n", - "1294 vllm vllm/microsoft/Phi-3.5-vision-instruct \n", - "1295 vllm vllm/microsoft/Phi-3-vision-128k-instruct \n", - "\n", - " Input --> Output \n", - "0 image --> categories \n", - "1 image --> categories \n", - "2 image --> categories \n", - "3 image --> categories \n", - "4 image --> categories \n", - "... ... \n", - "1291 image-text --> text \n", - "1292 image-text --> text \n", - "1293 image-text --> text \n", - "1294 image-text --> text \n", - "1295 image-text --> text \n", - "\n", - "[1296 rows x 3 columns]" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import xinfer\n", "\n", @@ -337,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -346,21 +22,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO 2024-11-01 11:28:21,402 serve 9335 api.py:277 - Started Serve in namespace \"serve\".\n", - "INFO 2024-11-01 11:28:21,408 serve 9335 api.py:259 - Connecting to existing Serve app in namespace \"serve\". New http options will not be applied.\n", - "WARNING 2024-11-01 11:28:21,409 serve 9335 api.py:85 - The new client HTTP config differs from the existing one in the following fields: ['location']. The new HTTP config is ignored.\n", - "INFO 2024-11-01 11:28:33,499 serve 9335 client.py:492 - Deployment 'XInferModel:ovj6xoa4' is ready at `http://127.0.0.1:8000/`. component=serve deployment=XInferModel\n", - "INFO 2024-11-01 11:28:33,501 serve 9335 api.py:549 - Deployed app 'default' successfully.\n" - ] - } - ], + "outputs": [], "source": [ "xinfer.serve_model(\n", " \"vikhyatk/moondream2\",\n", diff --git a/xinfer/serve.py b/xinfer/serve.py index 1be11bc..499c0ec 100644 --- a/xinfer/serve.py +++ b/xinfer/serve.py @@ -1,3 +1,5 @@ +import time + from fastapi import FastAPI from loguru import logger from pydantic import BaseModel @@ -25,7 +27,10 @@ def __init__( model_id, **kwargs, ): - self.model = create_model(model_id, **kwargs) + try: + self.model = create_model(model_id, **kwargs) + except Exception as e: + raise RuntimeError(f"Failed to load model {model_id}: {str(e)}") @app.post("/infer") async def infer(self, request: InferRequest) -> dict: @@ -45,6 +50,16 @@ async def infer_batch(self, request: InferBatchRequest) -> list[dict]: except Exception as e: return [{"error": f"An error occurred: {str(e)}"}] + @app.get("/health") + async def health(self): + return { + "status": "healthy", + "timestamp": time.time(), + "model_id": self.model.model_id, + "device": self.model.device, + "dtype": str(self.model.dtype), + } + def serve_model( model_id: str,
init_notebook_mode