add ability to pass in gradio kwargs to launch_gradio

dnth · Oct 22, 2024 · 8d50ed1 · 8d50ed1
1 parent d749356
commit 8d50ed1
Show file tree

Hide file tree

Showing 3 changed files with 76 additions and 32 deletions.
diff --git a/nbs/quickstart.ipynb b/nbs/quickstart.ipynb
@@ -32,11 +32,22 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 1,
       "metadata": {
         "id": "5_DEOCy61Mlg"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "'2.2.0+cu121'"
+            ]
+          },
+          "execution_count": 1,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "import torch\n",
         "\n",
@@ -52,15 +63,26 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 2,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "1MCW7-AN16Rq",
         "outputId": "9520b079-79a0-45f1-c7a8-7f0deb3cfe68"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "execution_count": 2,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
         "torch.cuda.is_available()"
       ]
@@ -124,7 +146,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": 3,
       "metadata": {},
       "outputs": [
         {
@@ -219,7 +241,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": 4,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -233,9 +255,9 @@
           "name": "stderr",
           "output_type": "stream",
           "text": [
-            "\u001b[32m2024-10-22 19:19:04.484\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mModel: vikhyatk/moondream2\u001b[0m\n",
-            "\u001b[32m2024-10-22 19:19:04.485\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m23\u001b[0m - \u001b[1mDevice: cuda\u001b[0m\n",
-            "\u001b[32m2024-10-22 19:19:04.485\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1mDtype: torch.float16\u001b[0m\n",
+            "\u001b[32m2024-10-22 19:40:56.017\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1mModel: vikhyatk/moondream2\u001b[0m\n",
+            "\u001b[32m2024-10-22 19:40:56.018\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m21\u001b[0m - \u001b[1mDevice: cuda\u001b[0m\n",
+            "\u001b[32m2024-10-22 19:40:56.018\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mxinfer.models\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[1mDtype: torch.float16\u001b[0m\n",
             "PhiForCausalLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.\n",
             "  - If you're using `trust_remote_code=True`, you can get rid of this warning by loading the model with an auto class. See https://huggingface.co/docs/transformers/en/model_doc/auto#auto-classes\n",
             "  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).\n",
@@ -256,7 +278,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": 5,
       "metadata": {},
       "outputs": [
         {
@@ -267,7 +289,7 @@
               "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=773x767>"
             ]
           },
-          "execution_count": 3,
+          "execution_count": 5,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -289,7 +311,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": 6,
       "metadata": {},
       "outputs": [
         {
@@ -298,7 +320,7 @@
               "'An anime-style illustration depicts a young girl with white hair and green eyes, wearing a white jacket, holding a large burger in her hands and smiling.'"
             ]
           },
-          "execution_count": 4,
+          "execution_count": 6,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -319,7 +341,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": 7,
       "metadata": {},
       "outputs": [
         {
@@ -328,7 +350,7 @@
               "'The image depicts a young girl with long, white hair and blue eyes sitting at a table, holding a large burger in her hands. The background shows a cozy indoor setting with a window and a chair visible.'"
             ]
           },
-          "execution_count": 8,
+          "execution_count": 7,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -349,7 +371,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 11,
+      "execution_count": 8,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -362,30 +384,30 @@
         {
           "data": {
             "text/html": [
-              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-style: italic\">                    Model Stats                    </span>\n",
+              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-style: italic\">                    Model Info                     </span>\n",
               "╭───────────────────────────┬─────────────────────╮\n",
               "│<span style=\"font-weight: bold\"> Attribute                 </span>│<span style=\"font-weight: bold\"> Value               </span>│\n",
               "├───────────────────────────┼─────────────────────┤\n",
               "│<span style=\"color: #008080; text-decoration-color: #008080\"> Model ID                  </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> vikhyatk/moondream2 </span>│\n",
               "│<span style=\"color: #008080; text-decoration-color: #008080\"> Device                    </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> cuda                </span>│\n",
               "│<span style=\"color: #008080; text-decoration-color: #008080\"> Dtype                     </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> torch.float16       </span>│\n",
-              "│<span style=\"color: #008080; text-decoration-color: #008080\"> Number of Inferences      </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 5                   </span>│\n",
-              "│<span style=\"color: #008080; text-decoration-color: #008080\"> Total Inference Time (ms) </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 4262.5508           </span>│\n",
-              "│<span style=\"color: #008080; text-decoration-color: #008080\"> Average Latency (ms)      </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 852.5102            </span>│\n",
+              "│<span style=\"color: #008080; text-decoration-color: #008080\"> Number of Inferences      </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 2                   </span>│\n",
+              "│<span style=\"color: #008080; text-decoration-color: #008080\"> Total Inference Time (ms) </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 2030.7102           </span>│\n",
+              "│<span style=\"color: #008080; text-decoration-color: #008080\"> Average Latency (ms)      </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 1015.3551           </span>│\n",
               "╰───────────────────────────┴─────────────────────╯\n",
               "</pre>\n"
             ],
             "text/plain": [
-              "\u001b[3m                    Model Stats                    \u001b[0m\n",
+              "\u001b[3m                    Model Info                     \u001b[0m\n",
               "╭───────────────────────────┬─────────────────────╮\n",
               "│\u001b[1m \u001b[0m\u001b[1mAttribute                \u001b[0m\u001b[1m \u001b[0m│\u001b[1m \u001b[0m\u001b[1mValue              \u001b[0m\u001b[1m \u001b[0m│\n",
               "├───────────────────────────┼─────────────────────┤\n",
               "│\u001b[36m \u001b[0m\u001b[36mModel ID                 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mvikhyatk/moondream2\u001b[0m\u001b[35m \u001b[0m│\n",
               "│\u001b[36m \u001b[0m\u001b[36mDevice                   \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mcuda               \u001b[0m\u001b[35m \u001b[0m│\n",
               "│\u001b[36m \u001b[0m\u001b[36mDtype                    \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35mtorch.float16      \u001b[0m\u001b[35m \u001b[0m│\n",
-              "│\u001b[36m \u001b[0m\u001b[36mNumber of Inferences     \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m5                  \u001b[0m\u001b[35m \u001b[0m│\n",
-              "│\u001b[36m \u001b[0m\u001b[36mTotal Inference Time (ms)\u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m4262.5508          \u001b[0m\u001b[35m \u001b[0m│\n",
-              "│\u001b[36m \u001b[0m\u001b[36mAverage Latency (ms)     \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m852.5102           \u001b[0m\u001b[35m \u001b[0m│\n",
+              "│\u001b[36m \u001b[0m\u001b[36mNumber of Inferences     \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m2                  \u001b[0m\u001b[35m \u001b[0m│\n",
+              "│\u001b[36m \u001b[0m\u001b[36mTotal Inference Time (ms)\u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m2030.7102          \u001b[0m\u001b[35m \u001b[0m│\n",
+              "│\u001b[36m \u001b[0m\u001b[36mAverage Latency (ms)     \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m1015.3551          \u001b[0m\u001b[35m \u001b[0m│\n",
               "╰───────────────────────────┴─────────────────────╯\n"
             ]
           },
@@ -408,7 +430,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 10,
+      "execution_count": 9,
       "metadata": {},
       "outputs": [
         {
@@ -418,7 +440,7 @@
               " 'The image depicts a young girl with long, white hair and blue eyes sitting at a table, holding a large burger in her hands. The background shows a cozy indoor setting with a window and a chair visible.']"
             ]
           },
-          "execution_count": 10,
+          "execution_count": 9,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -436,9 +458,31 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 12,
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "* Running on local URL:  http://127.0.0.1:7860\n",
+            "\n",
+            "To create a public link, set `share=True` in `launch()`.\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
       "source": [
         "model.launch_gradio()"
       ]

diff --git a/xinfer/models.py b/xinfer/models.py
@@ -33,11 +33,11 @@ def infer(self, image: str, prompt: str):
     def infer_batch(self, images: list[str], prompts: list[str]):
         pass
 
-    def launch_gradio(self):
+    def launch_gradio(self, **gradio_launch_kwargs):
         # Importing here to avoid circular import
         from .viz import launch_gradio
 
-        launch_gradio(self)
+        launch_gradio(self, **gradio_launch_kwargs)
 
     @contextmanager
     def track_inference_time(self):

diff --git a/xinfer/viz.py b/xinfer/viz.py
@@ -4,7 +4,7 @@
 from .models import BaseModel
 
 
-def launch_gradio(model: BaseModel):
+def launch_gradio(model: BaseModel, **gradio_launch_kwargs):
     model_info = model_registry.get_model_info(model.model_id)
 
     def infer(image, prompt=None):
@@ -42,4 +42,4 @@ def infer(image, prompt=None):
             description="Upload an image to classify.",
         )
 
-    iface.launch()
+    iface.launch(**gradio_launch_kwargs)