diff --git a/comps/llms/text-generation/vllm/docker/Dockerfile.arc b/comps/llms/text-generation/vllm/docker/Dockerfile.arc new file mode 100644 index 000000000..4d8d921e9 --- /dev/null +++ b/comps/llms/text-generation/vllm/docker/Dockerfile.arc @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM intelanalytics/ipex-llm-serving-vllm-xpu-experiment:2.1.0b2 + +COPY comps/llms/text-generation/vllm/vllm_arc.sh /llm + +RUN chmod +x /llm/vllm_arc.sh + +ENTRYPOINT ["/llm/vllm_arc.sh"] diff --git a/comps/llms/text-generation/vllm/vllm_arc.sh b/comps/llms/text-generation/vllm/vllm_arc.sh new file mode 100755 index 000000000..cb0518431 --- /dev/null +++ b/comps/llms/text-generation/vllm/vllm_arc.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +LLM_MODEL_ID="${LLM_MODEL_ID:=Intel/neural-chat-7b-v3-3}" + +source /opt/intel/oneapi/setvars.sh +source /opt/intel/1ccl-wks/setvars.sh + +python -m ipex_llm.vllm.xpu.entrypoints.openai.api_server \ + --port 9009 \ + --model ${LLM_MODEL_ID} \ + --trust-remote-code \ + --gpu-memory-utilization 0.9 \ + --device xpu \ + --enforce-eager \ + $@