Merge branch 'main' into static_batching

opea-project · Oct 18, 2024 · 785908a · 785908a
2 parents 0316fa5 + 37c74b2
commit 785908a
Show file tree

Hide file tree

Showing 33 changed files with 144 additions and 175 deletions.
diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml
@@ -12,6 +12,10 @@ on:
       example:
         required: true
         type: string
+      services:
+        default: ""
+        required: false
+        type: string
       tag:
         default: "latest"
         required: false
@@ -77,6 +81,7 @@ jobs:
         with:
           work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
           docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
+          service_list: ${{ inputs.services }}
           registry: ${OPEA_IMAGE_REPO}opea
           tag: ${{ inputs.tag }}
 
@@ -105,7 +110,6 @@ jobs:
       example: ${{ inputs.example }}
       hardware: ${{ inputs.node }}
       tag: ${{ inputs.tag }}
-      context: "CD"
     secrets: inherit
 
 ####################################################################################################

diff --git a/.github/workflows/_manifest-e2e.yml b/.github/workflows/_manifest-e2e.yml
@@ -20,11 +20,6 @@ on:
         description: "Tag to apply to images, default is latest"
         required: false
         type: string
-      context:
-        default: "CI"
-        description: "CI or CD"
-        required: false
-        type: string
 
 jobs:
   manifest-test:
@@ -51,7 +46,7 @@ jobs:
 
       - name: Set variables
         run: |
-          echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
+          echo "IMAGE_REPO=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
           echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
           lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
           echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
@@ -60,7 +55,6 @@ jobs:
           echo "continue_test=true" >> $GITHUB_ENV
           echo "should_cleanup=false" >> $GITHUB_ENV
           echo "skip_validate=true" >> $GITHUB_ENV
-          echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
           echo "NAMESPACE=$NAMESPACE"
 
       - name: Kubectl install

diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml
@@ -118,6 +118,7 @@ jobs:
           GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
           GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
           PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
+          PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
           IMAGE_REPO: ${{ inputs.registry }}
           IMAGE_TAG: ${{ inputs.tag }}
           example: ${{ inputs.example }}

diff --git a/.github/workflows/manual-image-build.yml b/.github/workflows/manual-image-build.yml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Build specific images on manual event
+on:
+  workflow_dispatch:
+    inputs:
+      nodes:
+        default: "gaudi,xeon"
+        description: "Hardware to run test"
+        required: true
+        type: string
+      example:
+        default: "ChatQnA"
+        description: 'Build images belong to which example?'
+        required: true
+        type: string
+      services:
+        default: "chatqna,chatqna-without-rerank"
+        description: 'Service list to build'
+        required: true
+        type: string
+      tag:
+        default: "latest"
+        description: "Tag to apply to images"
+        required: true
+        type: string
+      opea_branch:
+        default: "main"
+        description: 'OPEA branch for image build'
+        required: false
+        type: string
+jobs:
+  get-test-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      nodes: ${{ steps.get-matrix.outputs.nodes }}
+    steps:
+      - name: Create Matrix
+        id: get-matrix
+        run: |
+          nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
+          nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
+
+  image-build:
+    needs: get-test-matrix
+    strategy:
+      matrix:
+        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
+      fail-fast: false
+    uses: ./.github/workflows/_example-workflow.yml
+    with:
+      node: ${{ matrix.node }}
+      example: ${{ inputs.example }}
+      services: ${{ inputs.services }}
+      tag: ${{ inputs.tag }}
+      opea_branch: ${{ inputs.opea_branch }}
+    secrets: inherit
diff --git a/.github/workflows/pr-bum_list_check.yml b/.github/workflows/pr-bum_list_check.yml
diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml
@@ -23,12 +23,10 @@ jobs:
   image-build:
     needs: job1
     strategy:
-      matrix:
-        example: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
-        node: ["gaudi","xeon"]
+      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
       fail-fast: false
     uses: ./.github/workflows/_example-workflow.yml
     with:
-      node: ${{ matrix.node }}
+      node: ${{ matrix.hardware }}
       example: ${{ matrix.example }}
     secrets: inherit
diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py
@@ -67,8 +67,8 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         next_inputs["top_p"] = llm_parameters_dict["top_p"]
         next_inputs["stream"] = inputs["streaming"]
         next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
-        next_inputs["presence_penalty"] = inputs["presence_penalty"]
-        next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
+        # next_inputs["presence_penalty"] = inputs["presence_penalty"]
+        # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
         next_inputs["temperature"] = inputs["temperature"]
         inputs = next_inputs
     return inputs
@@ -136,14 +136,14 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
             prompt_template = PromptTemplate.from_template(chat_template)
             input_variables = prompt_template.input_variables
             if sorted(input_variables) == ["context", "question"]:
-                prompt = prompt_template.format(question=prompt, context="\n".join(docs))
+                prompt = prompt_template.format(question=prompt, context="\n".join(reranked_docs))
             elif input_variables == ["question"]:
                 prompt = prompt_template.format(question=prompt)
             else:
                 print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
-                prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
+                prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
         else:
-            prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
+            prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
 
         next_data["inputs"] = prompt
 

diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -53,6 +53,7 @@ services:
       QDRANT_HOST: ${QDRANT_HOST}
       QDRANT_PORT: 6333
       INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
   tei-reranking-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -59,6 +59,7 @@ services:
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
   tei-reranking-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -96,6 +96,7 @@ services:
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
   tei-reranking-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
@@ -57,6 +57,7 @@ services:
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
   tei-reranking-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
@@ -57,6 +57,7 @@ services:
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
   tei-reranking-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -59,6 +59,7 @@ services:
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
   tgi-service:
     image: ghcr.io/huggingface/tgi-gaudi:2.0.5

diff --git a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
@@ -60,6 +60,7 @@ services:
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
   tei-reranking-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
@@ -557,7 +557,7 @@ spec:
           imagePullPolicy: IfNotPresent
           ports:
             - name: ui
-              containerPort: 80
+              containerPort: 5173
               protocol: TCP
           resources:
             {}

diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -457,7 +457,7 @@ spec:
           imagePullPolicy: IfNotPresent
           ports:
             - name: ui
-              containerPort: 80
+              containerPort: 5173
               protocol: TCP
           resources:
             {}

diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
@@ -458,7 +458,7 @@ spec:
           imagePullPolicy: IfNotPresent
           ports:
             - name: ui
-              containerPort: 80
+              containerPort: 5173
               protocol: TCP
           resources:
             {}

diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml
@@ -559,7 +559,7 @@ spec:
           imagePullPolicy: IfNotPresent
           ports:
             - name: ui
-              containerPort: 80
+              containerPort: 5173
               protocol: TCP
           resources:
             {}

diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml
@@ -595,7 +595,7 @@ spec:
           imagePullPolicy: IfNotPresent
           ports:
             - name: ui
-              containerPort: 80
+              containerPort: 5173
               protocol: TCP
           resources:
             {}

diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml
@@ -458,7 +458,7 @@ spec:
           imagePullPolicy: IfNotPresent
           ports:
             - name: ui
-              containerPort: 80
+              containerPort: 5173
               protocol: TCP
           resources:
             {}

diff --git a/ChatQnA/tests/test_manifest_on_gaudi.sh b/ChatQnA/tests/test_manifest_on_gaudi.sh
@@ -6,7 +6,7 @@ set -xe
 USER_ID=$(whoami)
 LOG_PATH=/home/$(whoami)/logs
 MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
-IMAGE_REPO=${IMAGE_REPO:-}
+IMAGE_REPO=${IMAGE_REPO:-opea}
 IMAGE_TAG=${IMAGE_TAG:-latest}
 
 ROLLOUT_TIMEOUT_SECONDS="1800s"
@@ -15,15 +15,10 @@ KUBECTL_TIMEOUT_SECONDS="60s"
 function init_chatqna() {
     # replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT"
     find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
-    if [ $CONTEXT == "CI" ]; then
-        # replace megaservice image tag
-        find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/chatqna:latest#image: \"opea/chatqna:${IMAGE_TAG}#g" {} \;
-    else
-        # replace microservice image tag
-        find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
-    fi
-    # replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
-    find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
+    # replace microservice image tag
+    find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
+    # replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
+    find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
     # set huggingface token
     find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
 }

diff --git a/ChatQnA/tests/test_manifest_on_xeon.sh b/ChatQnA/tests/test_manifest_on_xeon.sh
@@ -6,7 +6,7 @@ set -xe
 USER_ID=$(whoami)
 LOG_PATH=/home/$(whoami)/logs
 MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
-IMAGE_REPO=${IMAGE_REPO:-}
+IMAGE_REPO=${IMAGE_REPO:-opea}
 IMAGE_TAG=${IMAGE_TAG:-latest}
 
 ROLLOUT_TIMEOUT_SECONDS="1800s"
@@ -15,15 +15,10 @@ KUBECTL_TIMEOUT_SECONDS="60s"
 function init_chatqna() {
     # replace the mount dir "path: /mnt/opea-models" with "path: $CHART_MOUNT"
     find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
-    if [ $CONTEXT == "CI" ]; then
-        # replace megaservice image tag
-        find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/chatqna:latest#image: \"opea/chatqna:${IMAGE_TAG}#g" {} \;
-    else
-        # replace microservice image tag
-        find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
-    fi
-    # replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
-    find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
+    # replace microservice image tag
+    find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
+    # replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
+    find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
     # set huggingface token
     find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
 }