From 129c67391a70242b31a4305d064c7a18286edace Mon Sep 17 00:00:00 2001 From: Li Gang Date: Mon, 9 Sep 2024 18:03:56 +0800 Subject: [PATCH] Add vllm Arc Dockerfile support Support vllm inference on Intel ARC GPU Signed-off-by: Li Gang Co-authored-by: Chen, Hu1 --- .../text-generation/vllm/docker/Dockerfile.arc | 10 ++++++++++ comps/llms/text-generation/vllm/vllm_arc.sh | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 comps/llms/text-generation/vllm/docker/Dockerfile.arc create mode 100755 comps/llms/text-generation/vllm/vllm_arc.sh diff --git a/comps/llms/text-generation/vllm/docker/Dockerfile.arc b/comps/llms/text-generation/vllm/docker/Dockerfile.arc new file mode 100644 index 000000000..4d8d921e9 --- /dev/null +++ b/comps/llms/text-generation/vllm/docker/Dockerfile.arc @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM intelanalytics/ipex-llm-serving-vllm-xpu-experiment:2.1.0b2 + +COPY comps/llms/text-generation/vllm/vllm_arc.sh /llm + +RUN chmod +x /llm/vllm_arc.sh + +ENTRYPOINT ["/llm/vllm_arc.sh"] diff --git a/comps/llms/text-generation/vllm/vllm_arc.sh b/comps/llms/text-generation/vllm/vllm_arc.sh new file mode 100755 index 000000000..cb0518431 --- /dev/null +++ b/comps/llms/text-generation/vllm/vllm_arc.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +LLM_MODEL_ID="${LLM_MODEL_ID:=Intel/neural-chat-7b-v3-3}" + +source /opt/intel/oneapi/setvars.sh +source /opt/intel/1ccl-wks/setvars.sh + +python -m ipex_llm.vllm.xpu.entrypoints.openai.api_server \ + --port 9009 \ + --model ${LLM_MODEL_ID} \ + --trust-remote-code \ + --gpu-memory-utilization 0.9 \ + --device xpu \ + --enforce-eager \ + $@