Merge pull request #1 from vaibhavjainwiz/add-caikit-tgis-serving

Add Build file require to create CAIKIT-TGIS build
Jooho · Jul 4, 2023 · 2f1668f · 2f1668f
2 parents 28a15b7 + 9de0014
commit 2f1668f
Show file tree

Hide file tree

Showing 7 changed files with 246 additions and 1 deletion.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,27 @@
+FROM quay.io/opendatahub/text-generation-inference
+
+ARG CAIKIT_NLP_REPO=https://github.com/caikit/caikit-nlp
+
+# caikit-nlp has caikit and caikit-tgis-backend as dependencies
+# In future this will be replaced with just standard pip installs
+RUN yum -y install git && \
+    git clone ${CAIKIT_NLP_REPO} && \
+    pip install --no-cache-dir ./caikit-nlp && \
+    mkdir -p /opt/models && \
+    mkdir -p /caikit/config && \
+    adduser caikit
+
+# Copy config file template into place, this config
+# covers enabling TGIS
+COPY caikit-tgis.template.yml /caikit/config
+# start-serving.sh 
+COPY start-serving.sh /
+
+RUN chown -R caikit:caikit /caikit
+
+USER caikit
+
+ENV RUNTIME_LIBRARY='caikit_nlp' \
+    RUNTIME_LOCAL_MODELS_DIR='/opt/models'
+
+CMD [ "/start-serving.sh" ]
diff --git a/README.md b/README.md
@@ -1 +1,121 @@
-# caikit-tgis-serving
+## Installation
+
+Setting up an OpenShift cluster is outside the scope of this document
+
+1. Set up Istio: [Istio install doc](https://knative.dev/docs/install/installing-istio)
+2. Set up KNative Serving: [Knative Serving install doc](https://knative.dev/docs/install/yaml-install/serving/install-serving-with-yaml/)
+3. Install Cert Manager: [Cert Manager install doc](https://cert-manager.io/docs/installation/)
+4. Install KServe:
+
+```bash
+kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.10.0/kserve.yaml
+```
+
+## Setting up Caikit
+
+1. Set up servingruntime
+
+The following servingruntime configures caikit
+
+```bash
+oc apply -f caikit-servingruntime.yaml
+```
+
+Now you have the ability to create inference services for caikit format models
+
+## Converting Model Using Caikit-NLP
+
+Caikit does not have the ability to load generic models, but they can be converted (mostly) from existing HuggingFace models
+
+2. Ensure git/git-lfs is installed
+
+```
+yum -y install git git-lfs
+git lfs install
+```
+
+3. Clone given model (note that the git repo for flan-t5-xl requires roughly 64Gb of storage)
+
+```bash
+git clone https://huggingface.co/google/flan-t5-xl
+```
+
+- An alternative method that may be a bit smaller of a download
+
+```python
+import transformers
+
+pipeline = transformers.pipeline(model="google/flan-tf-xl")
+
+# Model files will be under ~/.cache/huggingface
+```
+
+1. Create virtualenv
+
+```bash
+python3 -m virtualenv venv
+source venv/bin/activate
+```
+
+2. Clone caikit-nlp and install
+
+```bash
+git clone https://github.com/Xaenalt/caikit-nlp
+pip install ./caikit-nlp
+```
+
+3. Convert model
+
+```python
+import caikit_nlp
+
+base_model_path = "flan-t5-xl"
+saved_model_path = "flan-t5-xl-caikit"
+
+# This step imports the model into caikit_nlp and configures in caikit format
+model = caikit_nlp.text_generation.TextGeneration.bootstrap(model_path)
+
+# This saves the model out to disk in caikit format. It will consist of a directory with a config.yml and an artifacts directory
+model.save(model_path=model_save_path)
+```
+
+## Inference with Caikit-Serving
+
+1. Create inferenceservice
+
+```bash
+# Edit the yaml to include the storage path of the caikit-format model
+
+oc apply -f caikit-isvc.yaml
+```
+
+2. Determine endpoint
+
+```bash
+oc get isvc
+
+# Take note of the URL, it will be of the format: isvc-name.project.apps.cluster-name.openshiftapps.com
+```
+
+3. Use gRPC to do inference
+
+```bash
+# -insecure because the cert is self-signed in this demo environment
+# The header mm-model-id is the name of the model loaded in caikit, named the same as the directory the caikit model resides in
+
+grpcurl -insecure -d '{"text": "At what temperature does liquid Nitrogen boil?"}' -H "mm-model-id: flan-t5-xl-caikit" isvc-name.project.apps.cluster-name.openshiftapps.com:443 caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict
+```
+
+Output will be similar to (may not be identical, and like sample output may be incorrect):
+
+```json
+{
+  "generated_token_count": "20",
+  "text": " The boiling point of Nitrogen is about -78.0°C, which is the boiling point of",
+  "stop_reason": "MAX_TOKENS",
+  "producer_id": {
+    "name": "Text Generation",
+    "version": "0.1.0"
+  }
+}
+```
diff --git a/caikit-isvc.yaml b/caikit-isvc.yaml
@@ -0,0 +1,15 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    serving.knative.openshift.io/enablePassthrough: "true"
+    sidecar.istio.io/inject: "true"
+    sidecar.istio.io/rewriteAppHTTPProbers: "true"
+  name: caikit-example-isvc
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: caikit
+      runtime: caikit-runtime
+      storageUri: <Path to location with config.yml and artifacts directory>
diff --git a/caikit-servingruntime.yaml b/caikit-servingruntime.yaml
@@ -0,0 +1,26 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  name: caikit-runtime
+spec:
+  containers:
+  - env:
+    - name: RUNTIME_LOCAL_MODELS_DIR
+      value: /mnt/models
+    # TODO: This will eventually point to the official image
+    image: quay.io/spryor/caikit-serving@sha256:6c5358edc460436880d87fa40f76e1fdeb5a0410d87824c0eb8965cfde641c0d
+    name: kserve-container
+    ports:
+    # Note, KServe only allows a single port, this is the gRPC port. Subject to change in the future
+    - containerPort: 8085
+      name: h2c
+      protocol: TCP
+    resources:
+      requests:
+        cpu: 8
+        memory: 16Gi
+  multiModel: false
+  supportedModelFormats:
+  # Note: this currently *only* supports caikit format models
+  - autoSelect: true
+    name: caikit
diff --git a/caikit-tgis.template.yml b/caikit-tgis.template.yml
@@ -0,0 +1,28 @@
+# its contents configure the TGIS server & caikit
+jvm_options: []
+
+runtime:
+  batching:
+    standalone-model:
+      size: 0 # Set to batch size for batching
+  server_thread_pool_size: 16
+
+inference_plugin:
+  model_mesh:
+    max_loading_concurrency: 1
+    latency_based_autoscaling_enabled: false
+
+module_backends:
+  load_priority:
+    - type: TGIS
+      config:
+        local:
+          load_timeout: 120
+          grpc_port: null
+          http_port: null
+          health_poll_delay: 1.0
+        connection:
+          hostname: "TGIS_HOSTNAME"
+          ca_cert_file: null
+          client_cert_file: null
+          client_key_file: null
diff --git a/convert.py b/convert.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import caikit_nlp
+import argparse
+
+parser = argparse.ArgumentParser(prog="convert.py")
+parser.add_argument("--model-path", help="Path of the base HuggingFace model", )
+parser.add_argument("--model-save-path", help="Path to save the Caikit format model to")
+
+args = parser.parse_args()
+
+model = caikit_nlp.text_generation.TextGeneration.bootstrap(args.model_path)
+
+model.save(model_path=args.model_save_path)
diff --git a/start-serving.sh b/start-serving.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# This script is necessary to set the hostname for TGIS
+
+# In the case where it's local, having TGIS_HOSTNAME env variable
+# unset will cause it to attempt to start TGIS locally to the
+# container
+
+TGIS_CONFIG_TEMPLATE='/caikit/config/caikit-tgis.template.yml'
+TGIS_CONFIG_FILE='/caikit/config/caikit-tgis.yml'
+
+sed "s/TGIS_HOSTNAME/${TGIS_HOSTNAME}/" "${TGIS_CONFIG_TEMPLATE}" > "${TGIS_CONFIG_FILE}"
+export CONFIG_FILES="${TGIS_CONFIG_FILE}"
+
+exec python3 -m caikit.runtime.grpc_server