opea-project · gavinlichn · Sep 9, 2024 · Sep 9, 2024 · chensuyue · Sep 11, 2024
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM intelanalytics/ipex-llm-serving-vllm-xpu-experiment:2.1.0b2
+
+COPY comps/llms/text-generation/vllm/vllm_arc.sh /llm
+
+RUN chmod +x /llm/vllm_arc.sh
+
+ENTRYPOINT ["/llm/vllm_arc.sh"]
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+LLM_MODEL_ID="${LLM_MODEL_ID:=Intel/neural-chat-7b-v3-3}"
+
+source /opt/intel/oneapi/setvars.sh
+source /opt/intel/1ccl-wks/setvars.sh
+
+python -m ipex_llm.vllm.xpu.entrypoints.openai.api_server \
+  --port 9009 \
+  --model ${LLM_MODEL_ID} \
+  --trust-remote-code \
+  --gpu-memory-utilization 0.9 \
+  --device xpu \
+  --enforce-eager \
+  $@