From 0629696333ef554e512489a47d66688e6f446c79 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Thu, 5 Sep 2024 18:05:11 +0800 Subject: [PATCH] K8S manifest: Update ChatQnA/CodeGen/CodeTrans/DocSum - Update ChatQnA/CodeGen/CodeTrans/DocSum k8s manifest to avoid requiring creating directory for cache model. - Add chatqna-guardrails manifest files. - Fix bug #752 introduced by PR #669 Signed-off-by: Lianhao Lu --- .github/workflows/_manifest-e2e.yml | 8 +- .github/workflows/scripts/k8s-utils.sh | 71 + .../manifests/gaudi/chatqna-guardrails.yaml | 1518 +++++++++++++++++ .../kubernetes/manifests/gaudi/chatqna.yaml | 160 +- .../manifests/xeon/chatqna-guardrails.yaml | 1513 ++++++++++++++++ .../kubernetes/manifests/xeon/chatqna.yaml | 170 +- ChatQnA/tests/test_manifest_on_gaudi.sh | 74 +- ChatQnA/tests/test_manifest_on_xeon.sh | 84 +- .../kubernetes/manifests/gaudi/codegen.yaml | 46 +- .../kubernetes/manifests/xeon/codegen.yaml | 46 +- .../kubernetes/manifests/gaudi/codetrans.yaml | 43 +- .../kubernetes/manifests/xeon/codetrans.yaml | 46 +- DocSum/kubernetes/manifests/gaudi/docsum.yaml | 46 +- DocSum/kubernetes/manifests/xeon/docsum.yaml | 46 +- 14 files changed, 3574 insertions(+), 297 deletions(-) create mode 100755 .github/workflows/scripts/k8s-utils.sh create mode 100644 ChatQnA/kubernetes/manifests/gaudi/chatqna-guardrails.yaml create mode 100644 ChatQnA/kubernetes/manifests/xeon/chatqna-guardrails.yaml diff --git a/.github/workflows/_manifest-e2e.yml b/.github/workflows/_manifest-e2e.yml index 3974e4ca6..b0c61b1d9 100644 --- a/.github/workflows/_manifest-e2e.yml +++ b/.github/workflows/_manifest-e2e.yml @@ -80,6 +80,7 @@ jobs: echo "skip_validate=false" >> $GITHUB_ENV else echo "Timeout waiting for pods in namespace $NAMESPACE to be ready!" + .github/workflows/scripts/k8s-utils.sh dump_pods_status $NAMESPACE exit 1 fi sleep 60 @@ -91,7 +92,12 @@ jobs: if $skip_validate; then echo "Skip validate" else - ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE + if ${{ github.workspace }}/${{ inputs.example }}/tests/test_manifest_on_${{ inputs.hardware }}.sh validate_${{ inputs.example }} $NAMESPACE ; then + echo "Validate ${{ inputs.example }} successful!" + else + echo "Validate ${{ inputs.example }} failure!!!" + .github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE + fi fi - name: Kubectl uninstall diff --git a/.github/workflows/scripts/k8s-utils.sh b/.github/workflows/scripts/k8s-utils.sh new file mode 100755 index 000000000..0c9367a6c --- /dev/null +++ b/.github/workflows/scripts/k8s-utils.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +#set -xe + +function dump_pod_log() { + pod_name=$1 + namespace=$2 + echo "-----------Pod: $pod_name---------" + echo "#kubectl describe pod $pod_name -n $namespace" + kubectl describe pod $pod_name -n $namespace + echo "-----------------------------------" + echo "#kubectl logs $pod_name -n $namespace" + kubectl logs $pod_name -n $namespace + echo "-----------------------------------" +} + +function dump_pods_status() { + namespace=$1 + echo "-----DUMP POD STATUS in NS $namespace------" + kubectl get pods -n $namespace -o wide + echo "-----------------------------------" + + # Get all pods in the namespace and their statuses + pods=$(kubectl get pods -n $namespace --no-headers) + + # Loop through each pod + echo "$pods" | while read -r line; do + pod_name=$(echo $line | awk '{print $1}') + ready=$(echo $line | awk '{print $2}') + status=$(echo $line | awk '{print $3}') + + # Extract the READY count + ready_count=$(echo $ready | cut -d'/' -f1) + required_count=$(echo $ready | cut -d'/' -f2) + + # Check if the pod is not in "Running" status or READY count is less than required + if [[ "$status" != "Running" || "$ready_count" -lt "$required_count" ]]; then + dump_pod_log $pod_name $namespace + fi + done +} + +function dump_all_pod_logs() { + namespace=$1 + echo "-----DUMP POD STATUS AND LOG in NS $namespace------" + + pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}') + for pod_name in $pods + do + dump_pod_log $pod_name $namespace + done +} + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +case "$1" in + dump_pods_status) + dump_pods_status $2 + ;; + dump_all_pod_logs) + dump_all_pod_logs $2 + ;; + *) + echo "Unknown function: $1" + ;; +esac diff --git a/ChatQnA/kubernetes/manifests/gaudi/chatqna-guardrails.yaml b/ChatQnA/kubernetes/manifests/gaudi/chatqna-guardrails.yaml new file mode 100644 index 000000000..b79877660 --- /dev/null +++ b/ChatQnA/kubernetes/manifests/gaudi/chatqna-guardrails.yaml @@ -0,0 +1,1518 @@ +--- +# Source: chatqna/charts/data-prep/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-data-prep-config + labels: + helm.sh/chart: data-prep-1.0.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_ENDPOINT: "http://chatqna-tei" + EMBED_MODEL: "" + REDIS_URL: "redis://chatqna-redis-vector-db:6379" + INDEX_NAME: "rag-redis" + KEY_INDEX_NAME: "file-keys" + SEARCH_BATCH_SIZE: "10" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-embedding-usvc-config + labels: + helm.sh/chart: embedding-usvc-1.0.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-guardrails-usvc-config + labels: + helm.sh/chart: guardrails-usvc-1.0.0 + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + SAFETY_GUARD_ENDPOINT: "http://chatqna-tgi-guardrails" + SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + LOGFLAG: "" + http_proxy: "" + https_proxy: "" + no_proxy: "" +--- +# Source: chatqna/charts/llm-uservice/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-llm-uservice-config + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TGI_LLM_ENDPOINT: "http://chatqna-tgi" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-reranking-usvc-config + labels: + helm.sh/chart: reranking-usvc-1.0.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-retriever-usvc-config + labels: + helm.sh/chart: retriever-usvc-1.0.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" + EMBED_MODEL: "" + REDIS_URL: "redis://chatqna-redis-vector-db:6379" + INDEX_NAME: "rag-redis" + EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HF_HOME: "/tmp/.cache/huggingface" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + LOGFLAG: "" +--- +# Source: chatqna/charts/tei/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tei-config + labels: + helm.sh/chart: tei-1.0.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "BAAI/bge-base-en-v1.5" + PORT: "2081" + http_proxy: "" + https_proxy: "" + no_proxy: "" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" + MAX_WARMUP_SEQUENCE_LENGTH: "512" +--- +# Source: chatqna/charts/teirerank/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-teirerank-config + labels: + helm.sh/chart: teirerank-1.0.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "BAAI/bge-reranker-base" + PORT: "2082" + http_proxy: "" + https_proxy: "" + no_proxy: "" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: chatqna/charts/tgi-guardrails/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tgi-guardrails-config + labels: + helm.sh/chart: tgi-guardrails-1.0.0 + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" + PORT: "2080" + HF_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" +--- +# Source: chatqna/charts/tgi/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tgi-config + labels: + helm.sh/chart: tgi-1.0.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "Intel/neural-chat-7b-v3-3" + PORT: "2080" + HF_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" +--- +# Source: chatqna/charts/data-prep/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-data-prep + labels: + helm.sh/chart: data-prep-1.0.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6007 + targetPort: 6007 + protocol: TCP + name: data-prep + selector: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/embedding-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-embedding-usvc + labels: + helm.sh/chart: embedding-usvc-1.0.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6000 + targetPort: 6000 + protocol: TCP + name: embedding-usvc + selector: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/guardrails-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-guardrails-usvc + labels: + helm.sh/chart: guardrails-usvc-1.0.0 + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9090 + targetPort: 9090 + protocol: TCP + name: guardrails-usvc + selector: + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-llm-uservice + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/redis-vector-db/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-redis-vector-db + labels: + helm.sh/chart: redis-vector-db-1.0.0 + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "7.2.0-v9" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6379 + targetPort: 6379 + protocol: TCP + name: redis-service + - port: 8001 + targetPort: 8001 + protocol: TCP + name: redis-insight + selector: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/reranking-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-reranking-usvc + labels: + helm.sh/chart: reranking-usvc-1.0.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8000 + targetPort: 8000 + protocol: TCP + name: reranking-usvc + selector: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/retriever-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-retriever-usvc + labels: + helm.sh/chart: retriever-usvc-1.0.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 7000 + targetPort: 7000 + protocol: TCP + name: retriever-usvc + selector: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tei/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tei + labels: + helm.sh/chart: tei-1.0.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2081 + protocol: TCP + name: tei + selector: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/teirerank/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-teirerank + labels: + helm.sh/chart: teirerank-1.0.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2082 + protocol: TCP + name: teirerank + selector: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tgi-guardrails/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tgi-guardrails + labels: + helm.sh/chart: tgi-guardrails-1.0.0 + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tgi + labels: + helm.sh/chart: tgi-1.0.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna + labels: + helm.sh/chart: chatqna-1.0.0 + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + name: chatqna + selector: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/data-prep/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-data-prep + labels: + helm.sh/chart: data-prep-1.0.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-data-prep-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/dataprep-redis:latest" + imagePullPolicy: IfNotPresent + ports: + - name: data-prep + containerPort: 6007 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-embedding-usvc + labels: + helm.sh/chart: embedding-usvc-1.0.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-embedding-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/embedding-tei:latest" + imagePullPolicy: IfNotPresent + ports: + - name: embedding-usvc + containerPort: 6000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/guardrails-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-guardrails-usvc + labels: + helm.sh/chart: guardrails-usvc-1.0.0 + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-guardrails-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/guardrails-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: guardrails-usvc + containerPort: 9090 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-llm-uservice + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-llm-uservice-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/llm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-redis-vector-db + labels: + helm.sh/chart: redis-vector-db-1.0.0 + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "7.2.0-v9" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: redis-vector-db + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "redis/redis-stack:7.2.0-v9" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: data-volume + - mountPath: /redisinsight + name: redisinsight-volume + - mountPath: /tmp + name: tmp + ports: + - name: redis-service + containerPort: 6379 + protocol: TCP + - name: redis-insight + containerPort: 8001 + protocol: TCP + startupProbe: + tcpSocket: + port: 6379 # Probe the Redis port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} + volumes: + - name: data-volume + emptyDir: {} + - name: redisinsight-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-reranking-usvc + labels: + helm.sh/chart: reranking-usvc-1.0.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-reranking-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/reranking-tei:latest" + imagePullPolicy: IfNotPresent + ports: + - name: reranking-usvc + containerPort: 8000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-retriever-usvc + labels: + helm.sh/chart: retriever-usvc-1.0.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-retriever-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/retriever-redis:latest" + imagePullPolicy: IfNotPresent + ports: + - name: retriever-usvc + containerPort: 7000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tei/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tei + labels: + helm.sh/chart: tei-1.0.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tei + envFrom: + - configMapRef: + name: chatqna-tei-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16" + imagePullPolicy: IfNotPresent + args: + - "--auto-truncate" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2081 + protocol: TCP + livenessProbe: + failureThreshold: 24 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + emptyDir: {} + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/teirerank/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-teirerank + labels: + helm.sh/chart: teirerank-1.0.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: teirerank + envFrom: + - configMapRef: + name: chatqna-teirerank-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" + imagePullPolicy: IfNotPresent + args: + - "--auto-truncate" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2082 + protocol: TCP + livenessProbe: + failureThreshold: 24 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: model-volume + emptyDir: {} + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tgi-guardrails/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tgi-guardrails + labels: + helm.sh/chart: tgi-guardrails-1.0.0 + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tgi-guardrails + envFrom: + - configMapRef: + name: chatqna-tgi-guardrails-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + livenessProbe: + failureThreshold: 24 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tgi + labels: + helm.sh/chart: tgi-1.0.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: chatqna-tgi-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + livenessProbe: + failureThreshold: 24 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + resources: + limits: + habana.ai/gaudi: 1 + volumes: + - name: model-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Source: chatqna/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna + labels: + helm.sh/chart: chatqna-1.0.0 + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + spec: + securityContext: + null + containers: + - name: chatqna + env: + - name: LLM_SERVICE_HOST_IP + value: chatqna-llm-uservice + - name: RERANK_SERVICE_HOST_IP + value: chatqna-reranking-usvc + - name: RETRIEVER_SERVICE_HOST_IP + value: chatqna-retriever-usvc + - name: EMBEDDING_SERVICE_HOST_IP + value: chatqna-embedding-usvc + - name: GUARDRAIL_SERVICE_HOST_IP + value: chatqna-guardrails-usvc + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/chatqna-guardrails:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: chatqna + containerPort: 8888 + protocol: TCP + # startupProbe: + # httpGet: + # host: chatqna-llm-uservice + # port: 9000 + # path: / + # initialDelaySeconds: 5 + # periodSeconds: 5 + # failureThreshold: 120 + # livenessProbe: + # httpGet: + # path: / + # port: 8888 + # readinessProbe: + # httpGet: + # path: / + # port: 8888 + resources: + null + volumes: + - name: tmp + emptyDir: {} diff --git a/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml b/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml index 02ee4a8da..b45118c0b 100644 --- a/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml +++ b/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml @@ -8,24 +8,24 @@ kind: ConfigMap metadata: name: chatqna-data-prep-config labels: - helm.sh/chart: data-prep-0.8.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_ENDPOINT: "http://chatqna-tei" EMBED_MODEL: "" REDIS_URL: "redis://chatqna-redis-vector-db:6379" INDEX_NAME: "rag-redis" + KEY_INDEX_NAME: "file-keys" + SEARCH_BATCH_SIZE: "10" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" HF_HOME: "/tmp/.cache/huggingface" http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: "insert-your-langchain-key-here" - LANGCHAIN_PROJECT: "opea-dataprep-service" + LOGFLAG: "" --- # Source: chatqna/charts/embedding-usvc/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -36,19 +36,17 @@ kind: ConfigMap metadata: name: chatqna-embedding-usvc-config labels: - helm.sh/chart: embedding-usvc-0.8.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-embedding-service" + LOGFLAG: "" --- # Source: chatqna/charts/llm-uservice/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -59,10 +57,10 @@ kind: ConfigMap metadata: name: chatqna-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://chatqna-tgi" @@ -71,9 +69,7 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-llm-uservice" + LOGFLAG: "" --- # Source: chatqna/charts/reranking-usvc/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -84,19 +80,17 @@ kind: ConfigMap metadata: name: chatqna-reranking-usvc-config labels: - helm.sh/chart: reranking-usvc-0.8.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank" http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: "insert-your-langchain-key-here" - LANGCHAIN_PROJECT: "opea-reranking-service" + LOGFLAG: "" --- # Source: chatqna/charts/retriever-usvc/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -107,10 +101,10 @@ kind: ConfigMap metadata: name: chatqna-retriever-usvc-config labels: - helm.sh/chart: retriever-usvc-0.8.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" @@ -121,11 +115,9 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: "insert-your-langchain-key-here" - LANGCHAIN_PROJECT: "opea-retriever-service" HF_HOME: "/tmp/.cache/huggingface" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + LOGFLAG: "" --- # Source: chatqna/charts/tei/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -136,7 +128,7 @@ kind: ConfigMap metadata: name: chatqna-tei-config labels: - helm.sh/chart: tei-0.8.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" @@ -161,7 +153,7 @@ kind: ConfigMap metadata: name: chatqna-teirerank-config labels: - helm.sh/chart: teirerank-0.8.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" @@ -185,7 +177,7 @@ kind: ConfigMap metadata: name: chatqna-tgi-config labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "2.1.0" @@ -199,7 +191,6 @@ data: no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" @@ -213,10 +204,10 @@ kind: Service metadata: name: chatqna-data-prep labels: - helm.sh/chart: data-prep-0.8.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -238,10 +229,10 @@ kind: Service metadata: name: chatqna-embedding-usvc labels: - helm.sh/chart: embedding-usvc-0.8.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -263,10 +254,10 @@ kind: Service metadata: name: chatqna-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -288,7 +279,7 @@ kind: Service metadata: name: chatqna-redis-vector-db labels: - helm.sh/chart: redis-vector-db-0.8.0 + helm.sh/chart: redis-vector-db-1.0.0 app.kubernetes.io/name: redis-vector-db app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "7.2.0-v9" @@ -317,10 +308,10 @@ kind: Service metadata: name: chatqna-reranking-usvc labels: - helm.sh/chart: reranking-usvc-0.8.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -342,10 +333,10 @@ kind: Service metadata: name: chatqna-retriever-usvc labels: - helm.sh/chart: retriever-usvc-0.8.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -367,7 +358,7 @@ kind: Service metadata: name: chatqna-tei labels: - helm.sh/chart: tei-0.8.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" @@ -392,7 +383,7 @@ kind: Service metadata: name: chatqna-teirerank labels: - helm.sh/chart: teirerank-0.8.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" @@ -417,7 +408,7 @@ kind: Service metadata: name: chatqna-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "2.1.0" @@ -442,10 +433,10 @@ kind: Service metadata: name: chatqna labels: - helm.sh/chart: chatqna-0.8.0 + helm.sh/chart: chatqna-1.0.0 app.kubernetes.io/name: chatqna app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -467,10 +458,10 @@ kind: Deployment metadata: name: chatqna-data-prep labels: - helm.sh/chart: data-prep-0.8.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -545,10 +536,10 @@ kind: Deployment metadata: name: chatqna-embedding-usvc labels: - helm.sh/chart: embedding-usvc-0.8.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -623,10 +614,10 @@ kind: Deployment metadata: name: chatqna-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -701,7 +692,7 @@ kind: Deployment metadata: name: chatqna-redis-vector-db labels: - helm.sh/chart: redis-vector-db-0.8.0 + helm.sh/chart: redis-vector-db-1.0.0 app.kubernetes.io/name: redis-vector-db app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "7.2.0-v9" @@ -773,10 +764,10 @@ kind: Deployment metadata: name: chatqna-reranking-usvc labels: - helm.sh/chart: reranking-usvc-0.8.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -851,10 +842,10 @@ kind: Deployment metadata: name: chatqna-retriever-usvc labels: - helm.sh/chart: retriever-usvc-0.8.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -929,12 +920,13 @@ kind: Deployment metadata: name: chatqna-tei labels: - helm.sh/chart: tei-0.8.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -954,7 +946,15 @@ spec: - configMapRef: name: chatqna-tei-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/tei-gaudi:synapse_1.16" imagePullPolicy: IfNotPresent args: @@ -995,9 +995,7 @@ spec: habana.ai/gaudi: 1 volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: shm emptyDir: medium: Memory @@ -1014,12 +1012,13 @@ kind: Deployment metadata: name: chatqna-teirerank labels: - helm.sh/chart: teirerank-0.8.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -1039,7 +1038,15 @@ spec: - configMapRef: name: chatqna-teirerank-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" imagePullPolicy: IfNotPresent args: @@ -1079,9 +1086,7 @@ spec: {} volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: shm emptyDir: medium: Memory @@ -1098,12 +1103,13 @@ kind: Deployment metadata: name: chatqna-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -1123,7 +1129,15 @@ spec: - configMapRef: name: chatqna-tgi-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" imagePullPolicy: IfNotPresent volumeMounts: @@ -1157,9 +1171,7 @@ spec: habana.ai/gaudi: 1 volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: tmp emptyDir: {} --- @@ -1172,10 +1184,10 @@ kind: Deployment metadata: name: chatqna labels: - helm.sh/chart: chatqna-0.8.0 + helm.sh/chart: chatqna-1.0.0 app.kubernetes.io/name: chatqna app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -1202,6 +1214,8 @@ spec: value: chatqna-retriever-usvc - name: EMBEDDING_SERVICE_HOST_IP value: chatqna-embedding-usvc + - name: GUARDRAIL_SERVICE_HOST_IP + value: chatqna-guardrails-usvc securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/ChatQnA/kubernetes/manifests/xeon/chatqna-guardrails.yaml b/ChatQnA/kubernetes/manifests/xeon/chatqna-guardrails.yaml new file mode 100644 index 000000000..87353f4cd --- /dev/null +++ b/ChatQnA/kubernetes/manifests/xeon/chatqna-guardrails.yaml @@ -0,0 +1,1513 @@ +--- +# Source: chatqna/charts/data-prep/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-data-prep-config + labels: + helm.sh/chart: data-prep-1.0.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_ENDPOINT: "http://chatqna-tei" + EMBED_MODEL: "" + REDIS_URL: "redis://chatqna-redis-vector-db:6379" + INDEX_NAME: "rag-redis" + KEY_INDEX_NAME: "file-keys" + SEARCH_BATCH_SIZE: "10" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-embedding-usvc-config + labels: + helm.sh/chart: embedding-usvc-1.0.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-guardrails-usvc-config + labels: + helm.sh/chart: guardrails-usvc-1.0.0 + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + SAFETY_GUARD_ENDPOINT: "http://chatqna-tgi-guardrails" + SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + LOGFLAG: "" + http_proxy: "" + https_proxy: "" + no_proxy: "" +--- +# Source: chatqna/charts/llm-uservice/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-llm-uservice-config + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TGI_LLM_ENDPOINT: "http://chatqna-tgi" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-reranking-usvc-config + labels: + helm.sh/chart: reranking-usvc-1.0.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-retriever-usvc-config + labels: + helm.sh/chart: retriever-usvc-1.0.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" + EMBED_MODEL: "" + REDIS_URL: "redis://chatqna-redis-vector-db:6379" + INDEX_NAME: "rag-redis" + EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HF_HOME: "/tmp/.cache/huggingface" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + LOGFLAG: "" +--- +# Source: chatqna/charts/tei/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tei-config + labels: + helm.sh/chart: tei-1.0.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "BAAI/bge-base-en-v1.5" + PORT: "2081" + http_proxy: "" + https_proxy: "" + no_proxy: "" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" + MAX_WARMUP_SEQUENCE_LENGTH: "512" +--- +# Source: chatqna/charts/teirerank/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-teirerank-config + labels: + helm.sh/chart: teirerank-1.0.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "BAAI/bge-reranker-base" + PORT: "2082" + http_proxy: "" + https_proxy: "" + no_proxy: "" + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" +--- +# Source: chatqna/charts/tgi-guardrails/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tgi-guardrails-config + labels: + helm.sh/chart: tgi-guardrails-1.0.0 + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" + PORT: "2080" + HF_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + CUDA_GRAPHS: "0" +--- +# Source: chatqna/charts/tgi/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: chatqna-tgi-config + labels: + helm.sh/chart: tgi-1.0.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +data: + MODEL_ID: "Intel/neural-chat-7b-v3-3" + PORT: "2080" + HF_TOKEN: "insert-your-huggingface-token-here" + http_proxy: "" + https_proxy: "" + no_proxy: "" + HABANA_LOGS: "/tmp/habana_logs" + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + CUDA_GRAPHS: "0" +--- +# Source: chatqna/charts/data-prep/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-data-prep + labels: + helm.sh/chart: data-prep-1.0.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6007 + targetPort: 6007 + protocol: TCP + name: data-prep + selector: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/embedding-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-embedding-usvc + labels: + helm.sh/chart: embedding-usvc-1.0.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6000 + targetPort: 6000 + protocol: TCP + name: embedding-usvc + selector: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/guardrails-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-guardrails-usvc + labels: + helm.sh/chart: guardrails-usvc-1.0.0 + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9090 + targetPort: 9090 + protocol: TCP + name: guardrails-usvc + selector: + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-llm-uservice + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/redis-vector-db/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-redis-vector-db + labels: + helm.sh/chart: redis-vector-db-1.0.0 + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "7.2.0-v9" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 6379 + targetPort: 6379 + protocol: TCP + name: redis-service + - port: 8001 + targetPort: 8001 + protocol: TCP + name: redis-insight + selector: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/reranking-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-reranking-usvc + labels: + helm.sh/chart: reranking-usvc-1.0.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8000 + targetPort: 8000 + protocol: TCP + name: reranking-usvc + selector: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/retriever-usvc/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-retriever-usvc + labels: + helm.sh/chart: retriever-usvc-1.0.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 7000 + targetPort: 7000 + protocol: TCP + name: retriever-usvc + selector: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tei/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tei + labels: + helm.sh/chart: tei-1.0.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2081 + protocol: TCP + name: tei + selector: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/teirerank/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-teirerank + labels: + helm.sh/chart: teirerank-1.0.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2082 + protocol: TCP + name: teirerank + selector: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tgi-guardrails/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tgi-guardrails + labels: + helm.sh/chart: tgi-guardrails-1.0.0 + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/tgi/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna-tgi + labels: + helm.sh/chart: tgi-1.0.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 2080 + protocol: TCP + name: tgi + selector: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: chatqna + labels: + helm.sh/chart: chatqna-1.0.0 + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + name: chatqna + selector: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna +--- +# Source: chatqna/charts/data-prep/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-data-prep + labels: + helm.sh/chart: data-prep-1.0.0 + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: data-prep + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-data-prep-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/dataprep-redis:latest" + imagePullPolicy: IfNotPresent + ports: + - name: data-prep + containerPort: 6007 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-embedding-usvc + labels: + helm.sh/chart: embedding-usvc-1.0.0 + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: embedding-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-embedding-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/embedding-tei:latest" + imagePullPolicy: IfNotPresent + ports: + - name: embedding-usvc + containerPort: 6000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/guardrails-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-guardrails-usvc + labels: + helm.sh/chart: guardrails-usvc-1.0.0 + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: guardrails-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-guardrails-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/guardrails-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: guardrails-usvc + containerPort: 9090 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-llm-uservice + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-llm-uservice-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/llm-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-redis-vector-db + labels: + helm.sh/chart: redis-vector-db-1.0.0 + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "7.2.0-v9" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: redis-vector-db + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: redis-vector-db + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "redis/redis-stack:7.2.0-v9" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: data-volume + - mountPath: /redisinsight + name: redisinsight-volume + - mountPath: /tmp + name: tmp + ports: + - name: redis-service + containerPort: 6379 + protocol: TCP + - name: redis-insight + containerPort: 8001 + protocol: TCP + startupProbe: + tcpSocket: + port: 6379 # Probe the Redis port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {} + volumes: + - name: data-volume + emptyDir: {} + - name: redisinsight-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-reranking-usvc + labels: + helm.sh/chart: reranking-usvc-1.0.0 + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: reranking-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-reranking-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/reranking-tei:latest" + imagePullPolicy: IfNotPresent + ports: + - name: reranking-usvc + containerPort: 8000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-retriever-usvc + labels: + helm.sh/chart: retriever-usvc-1.0.0 + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: retriever-usvc + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: chatqna + envFrom: + - configMapRef: + name: chatqna-retriever-usvc-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/retriever-redis:latest" + imagePullPolicy: IfNotPresent + ports: + - name: retriever-usvc + containerPort: 7000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tei/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tei + labels: + helm.sh/chart: tei-1.0.0 + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tei + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tei + envFrom: + - configMapRef: + name: chatqna-tei-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" + imagePullPolicy: IfNotPresent + args: + - "--auto-truncate" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2081 + protocol: TCP + livenessProbe: + failureThreshold: 24 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: model-volume + emptyDir: {} + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/teirerank/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-teirerank + labels: + helm.sh/chart: teirerank-1.0.0 + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "cpu-1.5" + app.kubernetes.io/managed-by: Helm +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: teirerank + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: teirerank + envFrom: + - configMapRef: + name: chatqna-teirerank-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" + imagePullPolicy: IfNotPresent + args: + - "--auto-truncate" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2082 + protocol: TCP + livenessProbe: + failureThreshold: 24 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: model-volume + emptyDir: {} + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tgi-guardrails/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tgi-guardrails + labels: + helm.sh/chart: tgi-guardrails-1.0.0 + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tgi-guardrails + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tgi-guardrails + envFrom: + - configMapRef: + name: chatqna-tgi-guardrails-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/text-generation-inference:2.2.0" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + livenessProbe: + failureThreshold: 24 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + resources: + {} + volumes: + - name: model-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Source: chatqna/charts/tgi/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-tgi + labels: + helm.sh/chart: tgi-1.0.0 + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "2.1.0" + app.kubernetes.io/managed-by: Helm +spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: tgi + app.kubernetes.io/instance: chatqna + spec: + securityContext: + {} + containers: + - name: tgi + envFrom: + - configMapRef: + name: chatqna-tgi-config + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "ghcr.io/huggingface/text-generation-inference:2.2.0" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: 2080 + protocol: TCP + livenessProbe: + failureThreshold: 24 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + resources: + {} + volumes: + - name: model-volume + emptyDir: {} + - name: tmp + emptyDir: {} +--- +# Source: chatqna/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna + labels: + helm.sh/chart: chatqna-1.0.0 + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + template: + metadata: + labels: + app.kubernetes.io/name: chatqna + app.kubernetes.io/instance: chatqna + spec: + securityContext: + null + containers: + - name: chatqna + env: + - name: LLM_SERVICE_HOST_IP + value: chatqna-llm-uservice + - name: RERANK_SERVICE_HOST_IP + value: chatqna-reranking-usvc + - name: RETRIEVER_SERVICE_HOST_IP + value: chatqna-retriever-usvc + - name: EMBEDDING_SERVICE_HOST_IP + value: chatqna-embedding-usvc + - name: GUARDRAIL_SERVICE_HOST_IP + value: chatqna-guardrails-usvc + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/chatqna-guardrails:latest" + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: chatqna + containerPort: 8888 + protocol: TCP + # startupProbe: + # httpGet: + # host: chatqna-llm-uservice + # port: 9000 + # path: / + # initialDelaySeconds: 5 + # periodSeconds: 5 + # failureThreshold: 120 + # livenessProbe: + # httpGet: + # path: / + # port: 8888 + # readinessProbe: + # httpGet: + # path: / + # port: 8888 + resources: + null + volumes: + - name: tmp + emptyDir: {} diff --git a/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml b/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml index a0e9706cc..95fb52a97 100644 --- a/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml +++ b/ChatQnA/kubernetes/manifests/xeon/chatqna.yaml @@ -25,7 +25,7 @@ data: } location / { - proxy_pass http://chatqna-ui.default.svc.cluster.local:5173; + proxy_pass http://chatqna-ui:5173; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -33,7 +33,7 @@ data: } location /v1/chatqna { - proxy_pass http://chatqna.default.svc.cluster.local:8888; + proxy_pass http://chatqna:8888; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -41,7 +41,7 @@ data: } location /v1/dataprep { - proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007; + proxy_pass http://chatqna-data-prep:6007; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -49,7 +49,7 @@ data: } location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007; + proxy_pass http://chatqna-data-prep:6007; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -57,7 +57,7 @@ data: } location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep.default.svc.cluster.local:6007; + proxy_pass http://chatqna-data-prep:6007; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; @@ -74,24 +74,24 @@ kind: ConfigMap metadata: name: chatqna-data-prep-config labels: - helm.sh/chart: data-prep-0.8.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_ENDPOINT: "http://chatqna-tei" EMBED_MODEL: "" REDIS_URL: "redis://chatqna-redis-vector-db:6379" INDEX_NAME: "rag-redis" + KEY_INDEX_NAME: "file-keys" + SEARCH_BATCH_SIZE: "10" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" HF_HOME: "/tmp/.cache/huggingface" http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: "insert-your-langchain-key-here" - LANGCHAIN_PROJECT: "opea-dataprep-service" + LOGFLAG: "" --- # Source: chatqna/charts/embedding-usvc/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -102,19 +102,17 @@ kind: ConfigMap metadata: name: chatqna-embedding-usvc-config labels: - helm.sh/chart: embedding-usvc-0.8.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-embedding-service" + LOGFLAG: "" --- # Source: chatqna/charts/llm-uservice/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -125,10 +123,10 @@ kind: ConfigMap metadata: name: chatqna-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://chatqna-tgi" @@ -137,9 +135,7 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-llm-uservice" + LOGFLAG: "" --- # Source: chatqna/charts/reranking-usvc/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -150,19 +146,17 @@ kind: ConfigMap metadata: name: chatqna-reranking-usvc-config labels: - helm.sh/chart: reranking-usvc-0.8.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank" http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: "insert-your-langchain-key-here" - LANGCHAIN_PROJECT: "opea-reranking-service" + LOGFLAG: "" --- # Source: chatqna/charts/retriever-usvc/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -173,10 +167,10 @@ kind: ConfigMap metadata: name: chatqna-retriever-usvc-config labels: - helm.sh/chart: retriever-usvc-0.8.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" @@ -187,11 +181,9 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: "insert-your-langchain-key-here" - LANGCHAIN_PROJECT: "opea-retriever-service" HF_HOME: "/tmp/.cache/huggingface" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + LOGFLAG: "" --- # Source: chatqna/charts/tei/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -202,7 +194,7 @@ kind: ConfigMap metadata: name: chatqna-tei-config labels: - helm.sh/chart: tei-0.8.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" @@ -227,7 +219,7 @@ kind: ConfigMap metadata: name: chatqna-teirerank-config labels: - helm.sh/chart: teirerank-0.8.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" @@ -251,7 +243,7 @@ kind: ConfigMap metadata: name: chatqna-tgi-config labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "2.1.0" @@ -265,7 +257,6 @@ data: no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" CUDA_GRAPHS: "0" --- @@ -278,10 +269,10 @@ kind: Service metadata: name: chatqna-data-prep labels: - helm.sh/chart: data-prep-0.8.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -303,10 +294,10 @@ kind: Service metadata: name: chatqna-embedding-usvc labels: - helm.sh/chart: embedding-usvc-0.8.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -328,10 +319,10 @@ kind: Service metadata: name: chatqna-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -353,7 +344,7 @@ kind: Service metadata: name: chatqna-redis-vector-db labels: - helm.sh/chart: redis-vector-db-0.8.0 + helm.sh/chart: redis-vector-db-1.0.0 app.kubernetes.io/name: redis-vector-db app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "7.2.0-v9" @@ -382,10 +373,10 @@ kind: Service metadata: name: chatqna-reranking-usvc labels: - helm.sh/chart: reranking-usvc-0.8.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -407,10 +398,10 @@ kind: Service metadata: name: chatqna-retriever-usvc labels: - helm.sh/chart: retriever-usvc-0.8.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -432,7 +423,7 @@ kind: Service metadata: name: chatqna-tei labels: - helm.sh/chart: tei-0.8.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" @@ -457,7 +448,7 @@ kind: Service metadata: name: chatqna-teirerank labels: - helm.sh/chart: teirerank-0.8.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" @@ -482,7 +473,7 @@ kind: Service metadata: name: chatqna-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "2.1.0" @@ -507,10 +498,10 @@ kind: Service metadata: name: chatqna labels: - helm.sh/chart: chatqna-0.8.0 + helm.sh/chart: chatqna-1.0.0 app.kubernetes.io/name: chatqna app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -557,10 +548,10 @@ kind: Deployment metadata: name: chatqna-data-prep labels: - helm.sh/chart: data-prep-0.8.0 + helm.sh/chart: data-prep-1.0.0 app.kubernetes.io/name: data-prep app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -635,10 +626,10 @@ kind: Deployment metadata: name: chatqna-embedding-usvc labels: - helm.sh/chart: embedding-usvc-0.8.0 + helm.sh/chart: embedding-usvc-1.0.0 app.kubernetes.io/name: embedding-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -713,10 +704,10 @@ kind: Deployment metadata: name: chatqna-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -791,7 +782,7 @@ kind: Deployment metadata: name: chatqna-redis-vector-db labels: - helm.sh/chart: redis-vector-db-0.8.0 + helm.sh/chart: redis-vector-db-1.0.0 app.kubernetes.io/name: redis-vector-db app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "7.2.0-v9" @@ -863,10 +854,10 @@ kind: Deployment metadata: name: chatqna-reranking-usvc labels: - helm.sh/chart: reranking-usvc-0.8.0 + helm.sh/chart: reranking-usvc-1.0.0 app.kubernetes.io/name: reranking-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -941,10 +932,10 @@ kind: Deployment metadata: name: chatqna-retriever-usvc labels: - helm.sh/chart: retriever-usvc-0.8.0 + helm.sh/chart: retriever-usvc-1.0.0 app.kubernetes.io/name: retriever-usvc app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -1062,12 +1053,13 @@ kind: Deployment metadata: name: chatqna-tei labels: - helm.sh/chart: tei-0.8.0 + helm.sh/chart: tei-1.0.0 app.kubernetes.io/name: tei app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -1087,7 +1079,15 @@ spec: - configMapRef: name: chatqna-tei-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" imagePullPolicy: IfNotPresent args: @@ -1127,9 +1127,7 @@ spec: {} volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: shm emptyDir: medium: Memory @@ -1146,12 +1144,13 @@ kind: Deployment metadata: name: chatqna-teirerank labels: - helm.sh/chart: teirerank-0.8.0 + helm.sh/chart: teirerank-1.0.0 app.kubernetes.io/name: teirerank app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "cpu-1.5" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -1171,7 +1170,15 @@ spec: - configMapRef: name: chatqna-teirerank-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" imagePullPolicy: IfNotPresent args: @@ -1211,9 +1218,7 @@ spec: {} volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: shm emptyDir: medium: Memory @@ -1230,12 +1235,13 @@ kind: Deployment metadata: name: chatqna-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: chatqna app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -1255,7 +1261,15 @@ spec: - configMapRef: name: chatqna-tgi-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: @@ -1288,9 +1302,7 @@ spec: {} volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: tmp emptyDir: {} --- @@ -1303,10 +1315,10 @@ kind: Deployment metadata: name: chatqna labels: - helm.sh/chart: chatqna-0.8.0 + helm.sh/chart: chatqna-1.0.0 app.kubernetes.io/name: chatqna app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -1333,6 +1345,8 @@ spec: value: chatqna-retriever-usvc - name: EMBEDDING_SERVICE_HOST_IP value: chatqna-embedding-usvc + - name: GUARDRAIL_SERVICE_HOST_IP + value: chatqna-guardrails-usvc securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/ChatQnA/tests/test_manifest_on_gaudi.sh b/ChatQnA/tests/test_manifest_on_gaudi.sh index f96bd81dd..ae30465b9 100755 --- a/ChatQnA/tests/test_manifest_on_gaudi.sh +++ b/ChatQnA/tests/test_manifest_on_gaudi.sh @@ -9,6 +9,9 @@ MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub IMAGE_REPO=${IMAGE_REPO:-} IMAGE_TAG=${IMAGE_TAG:-latest} +ROLLOUT_TIMEOUT_SECONDS="1800s" +KUBECTL_TIMEOUT_SECONDS="60s" + function init_chatqna() { # replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT" find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \; @@ -27,7 +30,7 @@ function init_chatqna() { function install_chatqna { echo "namespace is $NAMESPACE" - kubectl apply -f . -n $NAMESPACE + kubectl apply -f chatqna.yaml -n $NAMESPACE # Sleep enough time for retreiver-usvc to be ready sleep 60 } @@ -40,13 +43,15 @@ function get_end_point() { } function validate_chatqna() { + local ns=$1 + local log=$2 max_retry=20 # make sure microservice retriever-usvc is ready # try to curl retriever-svc for max_retry times test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") for ((i=1; i<=max_retry; i++)) do - endpoint_url=$(get_end_point "chatqna-retriever-usvc" $NAMESPACE) + endpoint_url=$(get_end_point "chatqna-retriever-usvc" $ns) curl http://$endpoint_url/v1/retrieval -X POST \ -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \ -H 'Content-Type: application/json' && break @@ -55,32 +60,32 @@ function validate_chatqna() { # if i is bigger than max_retry, then exit with error if [ $i -gt $max_retry ]; then echo "Microservice retriever failed, exit with error." - exit 1 + return 1 fi # make sure microservice tgi-svc is ready for ((i=1; i<=max_retry; i++)) do - endpoint_url=$(get_end_point "chatqna-tgi" $NAMESPACE) + endpoint_url=$(get_end_point "chatqna-tgi" $ns) curl http://$endpoint_url/generate -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' && break - sleep 10 + sleep 30 done # if i is bigger than max_retry, then exit with error if [ $i -gt $max_retry ]; then echo "Microservice tgi failed, exit with error." - exit 1 + return 1 fi # check megaservice works # generate a random logfile name to avoid conflict among multiple runners - LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log - endpoint_url=$(get_end_point "chatqna" $NAMESPACE) + LOGFILE=$LOG_PATH/curlmega_$log.log + endpoint_url=$(get_end_point "chatqna" $ns) curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE exit_code=$? if [ $exit_code -ne 0 ]; then echo "Megaservice failed, please check the logs in $LOGFILE!" - exit 1 + return ${exit_code} fi echo "Checking response results, make sure the output is reasonable. " @@ -91,10 +96,49 @@ function validate_chatqna() { fi if [ $status == false ]; then echo "Response check failed, please check the logs in artifacts!" - exit 1 + return 1 else echo "Response check succeed!" fi + return 0 +} + + +function _cleanup_ns() { + local ns=$1 + if kubectl get ns $ns; then + if ! kubectl delete ns $ns --timeout=$KUBECTL_TIMEOUT_SECONDS; then + kubectl delete pods --namespace $ns --force --grace-period=0 --all + kubectl delete ns $ns --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS + fi + fi +} + +function install_and_validate_chatqna_guardrail() { + echo "Testing manifests chatqna_guardrils" + local ns=${NAMESPACE}-gaurdrails + _cleanup_ns $ns + kubectl create namespace $ns + # install guardrail + kubectl apply -f chatqna-guardrails.yaml -n $ns + # Sleep enough time for chatqna_guardrail to be ready + sleep 60 + if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then + echo "Waiting for cahtqna_guardrail pod ready done!" + else + echo "Timeout waiting for chatqna_guardrail pod ready!" + _cleanup_ns $ns + exit 1 + fi + + # validate guardrail + validate_chatqna $ns chatqna-guardrails + local ret=$? + if [ $ret -ne 0 ]; then + _cleanup_ns $ns + exit 1 + fi + _cleanup_ns $ns } if [ $# -eq 0 ]; then @@ -117,7 +161,15 @@ case "$1" in validate_ChatQnA) NAMESPACE=$2 SERVICE_NAME=chatqna - validate_chatqna + validate_chatqna $NAMESPACE chatqna + ret=$? + if [ $ret -ne 0 ]; then + exit $ret + fi + pushd ChatQnA/kubernetes/manifests/gaudi + set +e + install_and_validate_chatqna_guardrail + popd ;; *) echo "Unknown function: $1" diff --git a/ChatQnA/tests/test_manifest_on_xeon.sh b/ChatQnA/tests/test_manifest_on_xeon.sh index a75bfb945..6b82019d0 100755 --- a/ChatQnA/tests/test_manifest_on_xeon.sh +++ b/ChatQnA/tests/test_manifest_on_xeon.sh @@ -9,6 +9,9 @@ MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub IMAGE_REPO=${IMAGE_REPO:-} IMAGE_TAG=${IMAGE_TAG:-latest} +ROLLOUT_TIMEOUT_SECONDS="1800s" +KUBECTL_TIMEOUT_SECONDS="60s" + function init_chatqna() { # replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT" find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \; @@ -27,19 +30,29 @@ function init_chatqna() { function install_chatqna { echo "namespace is $NAMESPACE" - kubectl apply -f . -n $NAMESPACE + kubectl apply -f chatqna.yaml -n $NAMESPACE # Sleep enough time for retreiver-usvc to be ready sleep 60 } +function get_end_point() { + # $1 is service name, $2 is namespace + ip_address=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.clusterIP}') + port=$(kubectl get svc $1 -n $2 -o jsonpath='{.spec.ports[0].port}') + echo "$ip_address:$port" +} + function validate_chatqna() { + local ns=$1 + local log=$2 max_retry=20 # make sure microservice retriever-usvc is ready # try to curl retriever-svc for max_retry times test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") for ((i=1; i<=max_retry; i++)) do - curl http://chatqna-retriever-usvc.$NAMESPACE:7000/v1/retrieval -X POST \ + endpoint_url=$(get_end_point "chatqna-retriever-usvc" $ns) + curl http://$endpoint_url/v1/retrieval -X POST \ -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \ -H 'Content-Type: application/json' && break sleep 30 @@ -47,30 +60,32 @@ function validate_chatqna() { # if i is bigger than max_retry, then exit with error if [ $i -gt $max_retry ]; then echo "Microservice retriever failed, exit with error." - exit 1 + return 1 fi # make sure microservice tgi-svc is ready for ((i=1; i<=max_retry; i++)) do - curl http://chatqna-tgi.$NAMESPACE:80/generate -X POST \ + endpoint_url=$(get_end_point "chatqna-tgi" $ns) + curl http://$endpoint_url/generate -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' && break - sleep 10 + sleep 30 done # if i is bigger than max_retry, then exit with error if [ $i -gt $max_retry ]; then echo "Microservice tgi failed, exit with error." - exit 1 + return 1 fi # check megaservice works # generate a random logfile name to avoid conflict among multiple runners - LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log - curl http://chatqna.$NAMESPACE:8888/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE + LOGFILE=$LOG_PATH/curlmega_$log.log + endpoint_url=$(get_end_point "chatqna" $ns) + curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE exit_code=$? if [ $exit_code -ne 0 ]; then echo "Megaservice failed, please check the logs in $LOGFILE!" - exit 1 + return ${exit_code} fi echo "Checking response results, make sure the output is reasonable. " @@ -81,10 +96,49 @@ function validate_chatqna() { fi if [ $status == false ]; then echo "Response check failed, please check the logs in artifacts!" - exit 1 + return 1 else echo "Response check succeed!" fi + return 0 +} + + +function _cleanup_ns() { + local ns=$1 + if kubectl get ns $ns; then + if ! kubectl delete ns $ns --timeout=$KUBECTL_TIMEOUT_SECONDS; then + kubectl delete pods --namespace $ns --force --grace-period=0 --all + kubectl delete ns $ns --force --grace-period=0 --timeout=$KUBECTL_TIMEOUT_SECONDS + fi + fi +} + +function install_and_validate_chatqna_guardrail() { + echo "Testing manifests chatqna_guardrils" + local ns=${NAMESPACE}-gaurdrails + _cleanup_ns $ns + kubectl create namespace $ns + # install guardrail + kubectl apply -f chatqna-guardrails.yaml -n $ns + # Sleep enough time for chatqna_guardrail to be ready + sleep 60 + if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then + echo "Waiting for cahtqna_guardrail pod ready done!" + else + echo "Timeout waiting for chatqna_guardrail pod ready!" + _cleanup_ns $ns + exit 1 + fi + + # validate guardrail + validate_chatqna $ns chatqna-guardrails + local ret=$? + if [ $ret -ne 0 ]; then + _cleanup_ns $ns + exit 1 + fi + _cleanup_ns $ns } if [ $# -eq 0 ]; then @@ -107,7 +161,15 @@ case "$1" in validate_ChatQnA) NAMESPACE=$2 SERVICE_NAME=chatqna - validate_chatqna + validate_chatqna $NAMESPACE chatqna + ret=$? + if [ $ret -ne 0 ]; then + exit $ret + fi + pushd ChatQnA/kubernetes/manifests/xeon + set +e + install_and_validate_chatqna_guardrail + popd ;; *) echo "Unknown function: $1" diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml index b671594ca..c1270f418 100644 --- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml +++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: codegen-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://codegen-tgi" @@ -20,9 +20,7 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-llm-uservice" + LOGFLAG: "" --- # Source: codegen/charts/tgi/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -33,7 +31,7 @@ kind: ConfigMap metadata: name: codegen-tgi-config labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codegen app.kubernetes.io/version: "2.1.0" @@ -47,7 +45,6 @@ data: no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" @@ -61,10 +58,10 @@ kind: Service metadata: name: codegen-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -86,7 +83,7 @@ kind: Service metadata: name: codegen-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codegen app.kubernetes.io/version: "2.1.0" @@ -111,10 +108,10 @@ kind: Service metadata: name: codegen labels: - helm.sh/chart: codegen-0.8.0 + helm.sh/chart: codegen-1.0.0 app.kubernetes.io/name: codegen app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -136,10 +133,10 @@ kind: Deployment metadata: name: codegen-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -214,12 +211,13 @@ kind: Deployment metadata: name: codegen-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codegen app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -239,7 +237,15 @@ spec: - configMapRef: name: codegen-tgi-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" imagePullPolicy: IfNotPresent volumeMounts: @@ -275,9 +281,7 @@ spec: hugepages-2Mi: 500Mi volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: tmp emptyDir: {} --- @@ -290,10 +294,10 @@ kind: Deployment metadata: name: codegen labels: - helm.sh/chart: codegen-0.8.0 + helm.sh/chart: codegen-1.0.0 app.kubernetes.io/name: codegen app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/CodeGen/kubernetes/manifests/xeon/codegen.yaml b/CodeGen/kubernetes/manifests/xeon/codegen.yaml index 55fbc6ffb..9bfd0ed41 100644 --- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml +++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: codegen-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://codegen-tgi" @@ -20,9 +20,7 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-llm-uservice" + LOGFLAG: "" --- # Source: codegen/charts/tgi/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -33,7 +31,7 @@ kind: ConfigMap metadata: name: codegen-tgi-config labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codegen app.kubernetes.io/version: "2.1.0" @@ -47,7 +45,6 @@ data: no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" CUDA_GRAPHS: "0" --- @@ -60,10 +57,10 @@ kind: Service metadata: name: codegen-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -85,7 +82,7 @@ kind: Service metadata: name: codegen-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codegen app.kubernetes.io/version: "2.1.0" @@ -110,10 +107,10 @@ kind: Service metadata: name: codegen labels: - helm.sh/chart: codegen-0.8.0 + helm.sh/chart: codegen-1.0.0 app.kubernetes.io/name: codegen app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -135,10 +132,10 @@ kind: Deployment metadata: name: codegen-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -213,12 +210,13 @@ kind: Deployment metadata: name: codegen-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codegen app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -238,7 +236,15 @@ spec: - configMapRef: name: codegen-tgi-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: @@ -271,9 +277,7 @@ spec: {} volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: tmp emptyDir: {} --- @@ -286,10 +290,10 @@ kind: Deployment metadata: name: codegen labels: - helm.sh/chart: codegen-0.8.0 + helm.sh/chart: codegen-1.0.0 app.kubernetes.io/name: codegen app.kubernetes.io/instance: codegen - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/CodeTrans/kubernetes/manifests/gaudi/codetrans.yaml b/CodeTrans/kubernetes/manifests/gaudi/codetrans.yaml index 1d55f6072..5f03c40a5 100644 --- a/CodeTrans/kubernetes/manifests/gaudi/codetrans.yaml +++ b/CodeTrans/kubernetes/manifests/gaudi/codetrans.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: codetrans-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://codetrans-tgi" @@ -20,6 +20,7 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" + LOGFLAG: "" --- # Source: codetrans/charts/tgi/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -30,7 +31,7 @@ kind: ConfigMap metadata: name: codetrans-tgi-config labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "2.1.0" @@ -44,7 +45,6 @@ data: no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" @@ -58,10 +58,10 @@ kind: Service metadata: name: codetrans-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -83,7 +83,7 @@ kind: Service metadata: name: codetrans-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "2.1.0" @@ -108,10 +108,10 @@ kind: Service metadata: name: codetrans labels: - helm.sh/chart: codetrans-0.8.0 + helm.sh/chart: codetrans-1.0.0 app.kubernetes.io/name: codetrans app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -133,10 +133,10 @@ kind: Deployment metadata: name: codetrans-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -211,12 +211,13 @@ kind: Deployment metadata: name: codetrans-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -236,7 +237,15 @@ spec: - configMapRef: name: codetrans-tgi-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" imagePullPolicy: IfNotPresent volumeMounts: @@ -270,9 +279,7 @@ spec: habana.ai/gaudi: 1 volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: tmp emptyDir: {} --- @@ -285,10 +292,10 @@ kind: Deployment metadata: name: codetrans labels: - helm.sh/chart: codetrans-0.8.0 + helm.sh/chart: codetrans-1.0.0 app.kubernetes.io/name: codetrans app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml b/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml index 21c44b785..7cc99aaa8 100644 --- a/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml +++ b/CodeTrans/kubernetes/manifests/xeon/codetrans.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: codetrans-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://codetrans-tgi" @@ -20,9 +20,7 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-llm-uservice" + LOGFLAG: "" --- # Source: codetrans/charts/tgi/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -33,7 +31,7 @@ kind: ConfigMap metadata: name: codetrans-tgi-config labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "2.1.0" @@ -47,7 +45,6 @@ data: no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" CUDA_GRAPHS: "0" --- @@ -60,10 +57,10 @@ kind: Service metadata: name: codetrans-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -85,7 +82,7 @@ kind: Service metadata: name: codetrans-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "2.1.0" @@ -110,10 +107,10 @@ kind: Service metadata: name: codetrans labels: - helm.sh/chart: codetrans-0.8.0 + helm.sh/chart: codetrans-1.0.0 app.kubernetes.io/name: codetrans app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -135,10 +132,10 @@ kind: Deployment metadata: name: codetrans-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -213,12 +210,13 @@ kind: Deployment metadata: name: codetrans-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: codetrans app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -238,7 +236,15 @@ spec: - configMapRef: name: codetrans-tgi-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: @@ -271,9 +277,7 @@ spec: {} volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: tmp emptyDir: {} --- @@ -286,10 +290,10 @@ kind: Deployment metadata: name: codetrans labels: - helm.sh/chart: codetrans-0.8.0 + helm.sh/chart: codetrans-1.0.0 app.kubernetes.io/name: codetrans app.kubernetes.io/instance: codetrans - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/DocSum/kubernetes/manifests/gaudi/docsum.yaml b/DocSum/kubernetes/manifests/gaudi/docsum.yaml index 9b74549f5..c4d3d01da 100644 --- a/DocSum/kubernetes/manifests/gaudi/docsum.yaml +++ b/DocSum/kubernetes/manifests/gaudi/docsum.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: docsum-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://docsum-tgi" @@ -20,9 +20,7 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-llm-uservice" + LOGFLAG: "" --- # Source: docsum/charts/tgi/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -33,7 +31,7 @@ kind: ConfigMap metadata: name: docsum-tgi-config labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: docsum app.kubernetes.io/version: "2.1.0" @@ -47,7 +45,6 @@ data: no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" @@ -61,10 +58,10 @@ kind: Service metadata: name: docsum-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -86,7 +83,7 @@ kind: Service metadata: name: docsum-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: docsum app.kubernetes.io/version: "2.1.0" @@ -111,10 +108,10 @@ kind: Service metadata: name: docsum labels: - helm.sh/chart: docsum-0.8.0 + helm.sh/chart: docsum-1.0.0 app.kubernetes.io/name: docsum app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -136,10 +133,10 @@ kind: Deployment metadata: name: docsum-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -214,12 +211,13 @@ kind: Deployment metadata: name: docsum-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: docsum app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -239,7 +237,15 @@ spec: - configMapRef: name: docsum-tgi-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" imagePullPolicy: IfNotPresent volumeMounts: @@ -273,9 +279,7 @@ spec: habana.ai/gaudi: 1 volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: tmp emptyDir: {} --- @@ -288,10 +292,10 @@ kind: Deployment metadata: name: docsum labels: - helm.sh/chart: docsum-0.8.0 + helm.sh/chart: docsum-1.0.0 app.kubernetes.io/name: docsum app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 diff --git a/DocSum/kubernetes/manifests/xeon/docsum.yaml b/DocSum/kubernetes/manifests/xeon/docsum.yaml index 77f38b6ab..f1a40250e 100644 --- a/DocSum/kubernetes/manifests/xeon/docsum.yaml +++ b/DocSum/kubernetes/manifests/xeon/docsum.yaml @@ -8,10 +8,10 @@ kind: ConfigMap metadata: name: docsum-llm-uservice-config labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: TGI_LLM_ENDPOINT: "http://docsum-tgi" @@ -20,9 +20,7 @@ data: http_proxy: "" https_proxy: "" no_proxy: "" - LANGCHAIN_TRACING_V2: "false" - LANGCHAIN_API_KEY: insert-your-langchain-key-here - LANGCHAIN_PROJECT: "opea-llm-uservice" + LOGFLAG: "" --- # Source: docsum/charts/tgi/templates/configmap.yaml # Copyright (C) 2024 Intel Corporation @@ -33,7 +31,7 @@ kind: ConfigMap metadata: name: docsum-tgi-config labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: docsum app.kubernetes.io/version: "2.1.0" @@ -47,7 +45,6 @@ data: no_proxy: "" HABANA_LOGS: "/tmp/habana_logs" NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" CUDA_GRAPHS: "0" --- @@ -60,10 +57,10 @@ kind: Service metadata: name: docsum-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -85,7 +82,7 @@ kind: Service metadata: name: docsum-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: docsum app.kubernetes.io/version: "2.1.0" @@ -110,10 +107,10 @@ kind: Service metadata: name: docsum labels: - helm.sh/chart: docsum-0.8.0 + helm.sh/chart: docsum-1.0.0 app.kubernetes.io/name: docsum app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: type: ClusterIP @@ -135,10 +132,10 @@ kind: Deployment metadata: name: docsum-llm-uservice labels: - helm.sh/chart: llm-uservice-0.8.0 + helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1 @@ -213,12 +210,13 @@ kind: Deployment metadata: name: docsum-tgi labels: - helm.sh/chart: tgi-0.8.0 + helm.sh/chart: tgi-1.0.0 app.kubernetes.io/name: tgi app.kubernetes.io/instance: docsum app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm spec: + # use explicit replica counts only of HorizontalPodAutoscaler is disabled replicas: 1 selector: matchLabels: @@ -238,7 +236,15 @@ spec: - configMapRef: name: docsum-tgi-config securityContext: - {} + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: @@ -271,9 +277,7 @@ spec: {} volumes: - name: model-volume - hostPath: - path: /mnt/opea-models - type: Directory + emptyDir: {} - name: tmp emptyDir: {} --- @@ -286,10 +290,10 @@ kind: Deployment metadata: name: docsum labels: - helm.sh/chart: docsum-0.8.0 + helm.sh/chart: docsum-1.0.0 app.kubernetes.io/name: docsum app.kubernetes.io/instance: docsum - app.kubernetes.io/version: "v0.8" + app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: replicas: 1