add gradio demo

dnth · Oct 25, 2024 · 17ec6e4 · 17ec6e4
1 parent 52a2291
commit 17ec6e4
Show file tree

Hide file tree

Showing 3 changed files with 212 additions and 8 deletions.
diff --git a/nbs/gradio.ipynb b/nbs/gradio.ipynb
@@ -0,0 +1,118 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7860\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"1000\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-10-25 17:19:58.510\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m63\u001b[0m - \u001b[1mModel: Salesforce/blip2-opt-2.7b\u001b[0m\n",
+      "\u001b[32m2024-10-25 17:19:58.511\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mDevice: cuda\u001b[0m\n",
+      "\u001b[32m2024-10-25 17:19:58.512\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m65\u001b[0m - \u001b[1mDtype: float16\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "73edff5854de4815b7bbfeb87b59a2ab",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Expanding inputs for image tokens in BLIP-2 should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.\n",
+      "Expanding inputs for image tokens in BLIP-2 should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import xinfer\n",
+    "\n",
+    "xinfer.launch_gradio_demo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xinfer\n",
+    "\n",
+    "model = xinfer.create_model(\"resnet18.a1_in1k\")\n",
+    "model.launch_gradio()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xinfer\n",
+    "xinfer.list_models()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "xinfer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/xinfer/__init__.py b/xinfer/__init__.py
@@ -23,10 +23,14 @@
 if vllm_available:
     from .vllm import *
 
+
+from .viz import launch_gradio_demo
+
 __all__ = [
     "create_model",
     "list_models",
     "register_model",
     "BaseModel",
     "ModelInputOutput",
+    "launch_gradio_demo",
 ]
diff --git a/xinfer/viz.py b/xinfer/viz.py
@@ -1,5 +1,8 @@
+import json
+
 import gradio as gr
 
+from .core import create_model
 from .model_registry import ModelInputOutput, model_registry
 from .models import BaseModel
 
@@ -8,18 +11,25 @@ def launch_gradio(model: BaseModel, **gradio_launch_kwargs):
     model_info = model_registry.get_model_info(model.model_id)
 
     def infer(image, prompt=None):
-        if prompt is not None:
-            result = model.infer(image, prompt)
-        else:
-            result = model.infer(image)
+        try:
+            if prompt is not None:
+                result = model.infer(image, prompt)
+            else:
+                result = model.infer(image)
 
-        return result
+            # Convert result to string if it's not already
+            if isinstance(result, str):
+                return result
+            else:
+                return json.dumps(result, indent=2)
+        except Exception as e:
+            return f"Error during inference: {str(e)}"
 
     if model_info.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT:
         iface = gr.Interface(
             fn=infer,
             inputs=[gr.Image(type="filepath"), gr.Textbox(label="Prompt")],
-            outputs=gr.Textbox(label="Generated Text"),
+            outputs=gr.Textbox(label="Result", lines=10),
             title=f"Inference with {model.model_id}",
             description="Upload an image and provide a prompt to generate a description.",
         )
@@ -28,7 +38,7 @@ def infer(image, prompt=None):
         iface = gr.Interface(
             fn=infer,
             inputs=gr.Image(type="filepath"),
-            outputs=gr.JSON(label="Detection Results"),
+            outputs=gr.Textbox(label="Result", lines=10),
             title=f"Object Detection with {model.model_id}",
             description="Upload an image to detect objects.",
         )
@@ -37,7 +47,7 @@ def infer(image, prompt=None):
         iface = gr.Interface(
             fn=infer,
             inputs=gr.Image(type="filepath"),
-            outputs=gr.JSON(label="Classification Result"),
+            outputs=gr.Textbox(label="Result", lines=10),
             title=f"Image Classification with {model.model_id}",
             description="Upload an image to classify.",
         )
@@ -47,3 +57,75 @@ def infer(image, prompt=None):
         gradio_launch_kwargs["height"] = 1000
 
     iface.launch(**gradio_launch_kwargs)
+
+
+def launch_gradio_demo():
+    """
+    Launch an interactive demo with a dropdown to select a model from all supported models,
+    and a button to run inference.
+    """
+    available_models = [model.id for model in model_registry.list_models()]
+
+    def load_model_and_infer(model_id, image, prompt, device, dtype):
+        model = create_model(model_id, device=device, dtype=dtype)
+        model_info = model_registry.get_model_info(model_id)
+
+        try:
+            if model_info.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT:
+                result = model.infer(image, prompt)
+            elif model_info.input_output in [
+                ModelInputOutput.IMAGE_TO_BOXES,
+                ModelInputOutput.IMAGE_TO_CATEGORIES,
+            ]:
+                result = model.infer(image)
+            else:
+                return "Unsupported model type"
+
+            # Convert result to string if it's not already
+            if isinstance(result, str):
+                return result
+            else:
+                return json.dumps(result, indent=2)
+        except Exception as e:
+            return f"Error during inference: {str(e)}"
+
+    with gr.Blocks() as demo:
+        gr.Markdown("# x.infer Gradio Demo")
+
+        model_dropdown = gr.Dropdown(choices=available_models, label="Select a model")
+        image_input = gr.Image(type="filepath", label="Input Image")
+        prompt_input = gr.Textbox(
+            label="Prompt (for image-text to text models)", visible=False
+        )
+        device_dropdown = gr.Dropdown(
+            choices=["cuda", "cpu"], label="Device", value="cuda"
+        )
+        dtype_dropdown = gr.Dropdown(
+            choices=["float32", "float16", "bfloat16"], label="Dtype", value="float16"
+        )
+        run_button = gr.Button("Run Inference")
+        output = gr.Textbox(label="Result", lines=10)
+
+        def update_prompt_visibility(model_id):
+            model_info = model_registry.get_model_info(model_id)
+            return gr.update(
+                visible=model_info.input_output == ModelInputOutput.IMAGE_TEXT_TO_TEXT
+            )
+
+        model_dropdown.change(
+            update_prompt_visibility, inputs=[model_dropdown], outputs=[prompt_input]
+        )
+
+        run_button.click(
+            load_model_and_infer,
+            inputs=[
+                model_dropdown,
+                image_input,
+                prompt_input,
+                device_dropdown,
+                dtype_dropdown,
+            ],
+            outputs=[output],
+        )
+
+    demo.launch(height=1000)