Skip to content

Commit

Permalink
Merge pull request #248 from ROCm/upstream_merge_24_10_28
Browse files Browse the repository at this point in the history
Upstream merge 24 10 28
  • Loading branch information
gshtras authored Oct 29, 2024
2 parents 5974cc3 + cfd7388 commit 7aa6982
Show file tree
Hide file tree
Showing 279 changed files with 9,696 additions and 3,186 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1
model_name: "neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8"
tasks:
- name: "gsm8k"
metrics:
- name: "exact_match,strict-match"
value: 0.356
- name: "exact_match,flexible-extract"
value: 0.358
limit: 1000
num_fewshot: 5
2 changes: 1 addition & 1 deletion .buildkite/lm-eval-harness/configs/models-small.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Meta-Llama-3-8B-Instruct.yaml
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
Expand Down
57 changes: 57 additions & 0 deletions .github/mergify.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
pull_request_rules:
- name: label-documentation
description: Automatically apply documentation label
conditions:
- or:
- files~=^[^/]+\.md$
- files~=^docs/
actions:
label:
add:
- documentation

- name: label-ci-build
description: Automatically apply ci/build label
conditions:
- files~=^\.github/
- files~=\.buildkite/
- files~=^cmake/
- files=CMakeLists.txt
- files~=^Dockerfile
- files~=^requirements.*\.txt
- files=setup.py
actions:
label:
add:
- ci/build

- name: label-frontend
description: Automatically apply frontend label
conditions:
- files~=^vllm/entrypoints/
actions:
label:
add:
- frontend

- name: ping author on conflicts and add 'needs-rebase' label
conditions:
- conflict
- -closed
actions:
label:
add:
- needs-rebase
comment:
message: |
This pull request has merge conflicts that must be resolved before it can be
merged. @{{author}} please rebase it. https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork
- name: remove 'needs-rebase' label when conflict is resolved
conditions:
- -conflict
- -closed
actions:
label:
remove:
- needs-rebase
52 changes: 52 additions & 0 deletions .github/workflows/stale.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: 'Close inactive issues and PRs'

on:
schedule:
# Daily at 1:30 AM UTC
- cron: '30 1 * * *'

jobs:
close-issues-and-pull-requests:
permissions:
issues: write
pull-requests: write
actions: write
runs-on: ubuntu-latest
steps:
- uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0
with:
# Increasing this value ensures that changes to this workflow
# propagate to all issues and PRs in days rather than months
operations-per-run: 1000

exempt-draft-pr: true
exempt-issue-labels: 'keep-open'
exempt-pr-labels: 'keep-open'

labels-to-add-when-unstale: 'unstale'
labels-to-remove-when-stale: 'unstale'

days-before-issue-stale: 90
days-before-issue-close: 30
stale-issue-label: 'stale'
stale-issue-message: >
This issue has been automatically marked as stale because it has not
had any activity within 90 days. It will be automatically closed if no
further activity occurs within 30 days. Leave a comment if
you feel this issue should remain open. Thank you!
close-issue-message: >
This issue has been automatically closed due to inactivity. Please
feel free to reopen if you feel it is still relevant. Thank you!
days-before-pr-stale: 90
days-before-pr-close: 30
stale-pr-label: 'stale'
stale-pr-message: >
This pull request has been automatically marked as stale because it
has not had any activity within 90 days. It will be automatically
closed if no further activity occurs within 30 days. Leave a comment
if you feel this pull request should remain open. Thank you!
close-pr-message: >
This pull request has been automatically closed due to inactivity.
Please feel free to reopen if you intend to continue working on it.
Thank you!
20 changes: 11 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0")
set(TORCH_SUPPORTED_VERSION_CUDA "2.5.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.5.0")

#
Expand Down Expand Up @@ -196,12 +196,12 @@ endif()

#
# Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process.
# Configure it to place files in vllm/.deps, in order to play nicely with sccache.
# setup.py will override FETCHCONTENT_BASE_DIR to play nicely with sccache.
# Each dependency that produces build artifacts should override its BINARY_DIR to avoid
# conflicts between build types. It should instead be set to ${CMAKE_BINARY_DIR}/<dependency>.
#
include(FetchContent)
get_filename_component(PROJECT_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
file(MAKE_DIRECTORY "${FETCHCONTENT_BASE_DIR}")
set(FETCHCONTENT_BASE_DIR "${PROJECT_ROOT_DIR}/.deps")
file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")

#
Expand Down Expand Up @@ -229,7 +229,6 @@ set(VLLM_EXT_SRC
"csrc/quantization/compressed_tensors/int8_quant_kernels.cu"
"csrc/quantization/fp8/common.cu"
"csrc/cuda_utils_kernels.cu"
"csrc/moe_align_block_size_kernels.cu"
"csrc/prepare_inputs/advance_step.cu"
"csrc/torch_bindings.cpp")

Expand Down Expand Up @@ -286,7 +285,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin kernels for archs: ${MARLIN_ARCHS}")
else()
message(STATUS "Not building Marlin kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()

#
Expand Down Expand Up @@ -444,6 +443,7 @@ target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1)

set(VLLM_MOE_EXT_SRC
"csrc/moe/torch_bindings.cpp"
"csrc/moe/moe_align_sum_kernels.cu"
"csrc/moe/topk_softmax_kernels.cu")

set_gencode_flags_for_srcs(
Expand Down Expand Up @@ -471,7 +471,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
message(STATUS "Building Marlin MOE kernels for archs: ${MARLIN_MOE_ARCHS}")
else()
message(STATUS "Not building Marlin MOE kernels as no compatible archs found"
"in CUDA target architectures")
" in CUDA target architectures")
endif()
endif()

Expand Down Expand Up @@ -549,8 +549,10 @@ else()
FetchContent_Declare(
vllm-flash-attn
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
GIT_TAG 013f0c4fc47e6574060879d9734c1df8c5c273bd
GIT_TAG 5259c586c403a4e4d8bf69973c159b40cc346fb9
GIT_PROGRESS TRUE
# Don't share the vllm-flash-attn build between build types
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn
)
endif()

Expand Down
8 changes: 4 additions & 4 deletions Dockerfile.openvino
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ RUN --mount=type=bind,source=.git,target=.git \
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi

# install build requirements
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/vllm/requirements-build.txt
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/requirements-build.txt
# build vLLM with OpenVINO backend
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace/vllm/
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace

COPY examples/ /workspace/vllm/examples
COPY benchmarks/ /workspace/vllm/benchmarks
COPY examples/ /workspace/examples
COPY benchmarks/ /workspace/benchmarks

CMD ["/bin/bash"]
Loading

0 comments on commit 7aa6982

Please sign in to comment.