From 14e0054da23270345e2341a22e7b8e17e32f7bff Mon Sep 17 00:00:00 2001 From: Xinyao Wang Date: Tue, 29 Oct 2024 13:18:07 +0800 Subject: [PATCH 1/3] fix vllm output in chatqna Signed-off-by: Xinyao Wang --- ChatQnA/chatqna.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index d168d1055..269361c71 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -166,7 +166,7 @@ def align_generator(self, gen, **kwargs): try: # sometimes yield empty chunk, do a fallback here json_data = json.loads(json_str) - if json_data["choices"][0]["finish_reason"] != "eos_token": + if json_data["choices"][0]["finish_reason"] != "eos_token" and 'content' in json_data['choices'][0]['delta']: yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" except Exception as e: yield f"data: {repr(json_str.encode('utf-8'))}\n\n" From 284c666889eb9550e4289c6cd592456a625e00ba Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 06:07:54 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ChatQnA/chatqna.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index 269361c71..f48a69dc0 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -166,7 +166,10 @@ def align_generator(self, gen, **kwargs): try: # sometimes yield empty chunk, do a fallback here json_data = json.loads(json_str) - if json_data["choices"][0]["finish_reason"] != "eos_token" and 'content' in json_data['choices'][0]['delta']: + if ( + json_data["choices"][0]["finish_reason"] != "eos_token" + and "content" in json_data["choices"][0]["delta"] + ): yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" except Exception as e: yield f"data: {repr(json_str.encode('utf-8'))}\n\n" From 377faf742817ac3c5b34aea0eeb21e7886f826e1 Mon Sep 17 00:00:00 2001 From: Xinyao Wang Date: Thu, 31 Oct 2024 10:10:43 +0800 Subject: [PATCH 3/3] remove vllm-ray wrapper Signed-off-by: Xinyao Wang --- ChatQnA/docker_image_build/build.yaml | 6 ------ docker_images_list.md | 1 - 2 files changed, 7 deletions(-) diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 390231320..ca3139de4 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -83,12 +83,6 @@ services: dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu extends: chatqna image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest} - llm-vllm-ray: - build: - context: GenAIComps - dockerfile: comps/llms/text-generation/vllm/ray/Dockerfile - extends: chatqna - image: ${REGISTRY:-opea}/llm-vllm-ray:${TAG:-latest} llm-vllm-ray-hpu: build: context: GenAIComps diff --git a/docker_images_list.md b/docker_images_list.md index b0bff43ec..d90ae08dc 100644 --- a/docker_images_list.md +++ b/docker_images_list.md @@ -80,7 +80,6 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the | [opea/llm-vllm-hpu](https://hub.docker.com/r/opea/llm-vllm-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu) | The docker image exposed the OPEA LLM microservice upon vLLM docker image for use by GenAI apps on the Gaudi | | [opea/llm-vllm-llamaindex](https://hub.docker.com/r/opea/llm-vllm-llamaindex) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/llama_index/Dockerfile) | This docker image exposes OPEA LLM microservices to the llamaindex framework's vLLM Docker image for use by GenAI applications | | [opea/llm-vllm-llamaindex-hpu](https://hub.docker.com/r/opea/llm-vllm-llamaindex-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/llama_index/dependency/Dockerfile.intel_hpu) | This docker image exposes OPEA LLM microservices to the llamaindex framework's vLLM Docker image for use by GenAI applications on the gaudi | -| [opea/llm-vllm-ray](https://hub.docker.com/r/opea/llm-vllm-ray) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/ray/Dockerfile) | The docker image exposes the OPEA LLM microservices Ray-based upon the vLLM Docker image for GenAI application use | | [opea/llm-vllm-ray-hpu](https://hub.docker.com/r/opea/llm-vllm-ray-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/ray/dependency/Dockerfile) | The docker image exposes Ray-based OPEA LLM microservices upon the vLLM Docker image for use by GenAI applications on the Gaudi | | [opea/llava-hpu](https://hub.docker.com/r/opea/llava-hpu) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/dependency/Dockerfile.intel_hpu) | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi | | [opea/lvm-tgi](https://hub.docker.com/r/opea/lvm-tgi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/tgi-llava/Dockerfile) | This docker image is designed to build a large visual model (LVM) microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a answer to question. |