From a9536321a0c87dcc82aedf7b81f166c429c36aa2 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Tue, 22 Oct 2024 12:11:22 +0300 Subject: [PATCH] added the tuned tgi params --- .../benchmark/performance/helm_charts/deployment.py | 3 ++- .../performance/helm_charts/templates/deployment.yaml | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ChatQnA/benchmark/performance/helm_charts/deployment.py b/ChatQnA/benchmark/performance/helm_charts/deployment.py index bdb84cdde..e5ae0a838 100644 --- a/ChatQnA/benchmark/performance/helm_charts/deployment.py +++ b/ChatQnA/benchmark/performance/helm_charts/deployment.py @@ -26,7 +26,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): if num_nodes == 1: replicas = [ {"name": "chatqna-backend-server-deploy", "replicas": 2}, - {"name": "embedding-dependency-deploy", "replicas": 2}, + {"name": "embedding-dependency-deploy", "replicas": 1}, {"name": "reranking-dependency-deploy", "replicas": 1} if with_rerank else None, {"name": "llm-dependency-deploy", "replicas": 7 if with_rerank else 8}, {"name": "dataprep-deploy", "replicas": 1}, @@ -69,6 +69,7 @@ def generate_yaml(num_nodes, mode="oob", with_rerank="True"): { "name": "llm-dependency-deploy", "args": [ + {"name": "--model-id", "value": '$(LLM_MODEL_ID)'}, {"name": "--max-input-length", "value": 1280}, {"name": "--max-total-tokens", "value": 2048}, {"name": "--max-batch-total-tokens", "value": 35536}, diff --git a/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml b/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml index 4fe4fc581..c41f9840d 100644 --- a/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml +++ b/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml @@ -31,9 +31,17 @@ spec: - envFrom: - configMapRef: name: {{ $global.config.CONFIG_MAP_NAME }} + + {{- $args := $microservice.args }} + {{- range $podSpec := $global.podSpecs }} + {{- if eq $podSpec.name $microservice.name }} + {{- $args = $podSpec.args | default $microservice.args }} + {{- end }} + {{- end }} + {{- if $microservice.args }} args: - {{- range $arg := $microservice.args }} + {{- range $arg := $args }} {{- if $arg.name }} - {{ $arg.name }} {{- end }}