From e970933b207ce7ce13a5c7ae519d07cd0dfe5487 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Thu, 17 Oct 2024 20:48:38 +0000 Subject: [PATCH 1/9] upgrade to pytorch 2.5 (still needs newer xformers Signed-off-by: Bill Nell --- CMakeLists.txt | 2 +- pyproject.toml | 2 +- requirements-build.txt | 2 +- requirements-cpu.txt | 2 +- requirements-cuda.txt | 6 +++--- requirements-openvino.txt | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f6d1c66b2cf7..25934994c07e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11 # requirements.txt files and should be kept consistent. The ROCm torch # versions are derived from Dockerfile.rocm # -set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0") +set(TORCH_SUPPORTED_VERSION_CUDA "2.5.0") set(TORCH_SUPPORTED_VERSION_ROCM "2.5.0") # diff --git a/pyproject.toml b/pyproject.toml index e0c56ab79cad0..e78f5652f486b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "packaging", "setuptools>=61", "setuptools-scm>=8.0", - "torch == 2.4.0", + "torch == 2.5.0", "wheel", "jinja2", ] diff --git a/requirements-build.txt b/requirements-build.txt index 6144a56da8c47..ea2b688bb3108 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -4,6 +4,6 @@ ninja packaging setuptools>=61 setuptools-scm>=8 -torch==2.4.0 +torch==2.5.0 wheel jinja2 diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 27ca8ca5dbc58..8e47a850fc029 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -2,5 +2,5 @@ -r requirements-common.txt # Dependencies for x86_64 CPUs -torch == 2.4.0+cpu; platform_machine != "ppc64le" +torch == 2.5.0+cpu; platform_machine != "ppc64le" torchvision; platform_machine != "ppc64le" # required for the image processor of phi3v, this must be updated alongside torch diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 3b3c2f876919e..ff4117f0c9af6 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -4,7 +4,7 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 nvidia-ml-py # for pynvml package -torch == 2.4.0 +torch == 2.5.0 # These must be updated alongside torch -torchvision == 0.19 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version -xformers == 0.0.27.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.4.0 +torchvision == 0.20 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version +xformers == 0.0.28.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.4.0 diff --git a/requirements-openvino.txt b/requirements-openvino.txt index ac54cf0c3288f..7ad0d1e7f704b 100644 --- a/requirements-openvino.txt +++ b/requirements-openvino.txt @@ -1,7 +1,7 @@ # Common dependencies -r requirements-common.txt -torch == 2.4.0 # should be aligned with "common" vLLM torch version +torch == 2.5.0 # should be aligned with "common" vLLM torch version openvino >= 2024.4.0 # since 2024.4.0 both CPU and GPU support Paged Attention optimum @ git+https://github.com/huggingface/optimum.git@main # latest optimum is used to support latest transformers version From 5a54b488461b005d241a9845715bb5b401d1c529 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Tue, 22 Oct 2024 14:49:51 +0000 Subject: [PATCH 2/9] update xformers Signed-off-by: Bill Nell --- requirements-cuda.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-cuda.txt b/requirements-cuda.txt index ff4117f0c9af6..92fa303d687a2 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -7,4 +7,4 @@ nvidia-ml-py # for pynvml package torch == 2.5.0 # These must be updated alongside torch torchvision == 0.20 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version -xformers == 0.0.28.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.4.0 +xformers == 0.0.28.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.0 From 1ea632b00d0763167202e913b9619fe14947b34a Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Fri, 25 Oct 2024 18:01:24 +0000 Subject: [PATCH 3/9] use better method for specifying cuda libs Signed-off-by: Bill Nell --- cmake/utils.cmake | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 24bb7299338ac..40430dae10c5b 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -424,11 +424,7 @@ function (define_gpu_extension_target GPU_MOD_NAME) # Don't use `TORCH_LIBRARIES` for CUDA since it pulls in a bunch of # dependencies that are not necessary and may not be installed. if (GPU_LANGUAGE STREQUAL "CUDA") - if ("${CUDA_CUDA_LIB}" STREQUAL "") - set(CUDA_CUDA_LIB "${CUDA_CUDA_LIBRARY}") - endif() - target_link_libraries(${GPU_MOD_NAME} PRIVATE ${CUDA_CUDA_LIB} - ${CUDA_LIBRARIES}) + target_link_libraries(${GPU_MOD_NAME} PRIVATE CUDA::cudart CUDA::cuda_driver) else() target_link_libraries(${GPU_MOD_NAME} PRIVATE ${TORCH_LIBRARIES}) endif() From c00efe32d68ed25321d21c3c8dd4a2adaed0e49a Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Fri, 25 Oct 2024 18:54:56 +0000 Subject: [PATCH 4/9] try to make pytorch 2.5 use CUDA 12.1 Signed-off-by: Bill Nell --- pyproject.toml | 2 +- requirements-build.txt | 2 +- requirements-cpu.txt | 4 ++-- requirements-cuda.txt | 8 +++++--- requirements-openvino.txt | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e78f5652f486b..84fcd8950ab28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "packaging", "setuptools>=61", "setuptools-scm>=8.0", - "torch == 2.5.0", + "torch == 2.5.0 --extra-index-url https://download.pytorch.org/whl/cu121", "wheel", "jinja2", ] diff --git a/requirements-build.txt b/requirements-build.txt index ea2b688bb3108..7d22707216eee 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -4,6 +4,6 @@ ninja packaging setuptools>=61 setuptools-scm>=8 -torch==2.5.0 +torch==2.5.0 --extra-index-url https://download.pytorch.org/whl/cu121 wheel jinja2 diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 8e47a850fc029..e7543f9f98823 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -2,5 +2,5 @@ -r requirements-common.txt # Dependencies for x86_64 CPUs -torch == 2.5.0+cpu; platform_machine != "ppc64le" -torchvision; platform_machine != "ppc64le" # required for the image processor of phi3v, this must be updated alongside torch +torch == 2.5.0+cpu --extra-index-url https://download.pytorch.org/whl/cu121; platform_machine != "ppc64le" +torchvision --extra-index-url https://download.pytorch.org/whl/cu121; platform_machine != "ppc64le" # required for the image processor of phi3v, this must be updated alongside torch diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 92fa303d687a2..85c1e595e5e60 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -4,7 +4,9 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 nvidia-ml-py # for pynvml package -torch == 2.5.0 +torch == 2.5.0 --extra-index-url https://download.pytorch.org/whl/cu121 # These must be updated alongside torch -torchvision == 0.20 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version -xformers == 0.0.28.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.0 +# Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version +torchvision == 0.20 --extra-index-url https://download.pytorch.org/whl/cu121 +# Requires PyTorch 2.5.0 +xformers == 0.0.28.post2 --extra-index-url https://download.pytorch.org/whl/cu121; platform_system == 'Linux' and platform_machine == 'x86_64' diff --git a/requirements-openvino.txt b/requirements-openvino.txt index 7ad0d1e7f704b..5b154ec38ef9e 100644 --- a/requirements-openvino.txt +++ b/requirements-openvino.txt @@ -1,7 +1,7 @@ # Common dependencies -r requirements-common.txt -torch == 2.5.0 # should be aligned with "common" vLLM torch version +torch == 2.5.0 --extra-index-url https://download.pytorch.org/whl/cu121 # should be aligned with "common" vLLM torch version openvino >= 2024.4.0 # since 2024.4.0 both CPU and GPU support Paged Attention optimum @ git+https://github.com/huggingface/optimum.git@main # latest optimum is used to support latest transformers version From c4ebdbab87855a7ebd9bef77f49956a2396ac43c Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Fri, 25 Oct 2024 19:43:07 +0000 Subject: [PATCH 5/9] remove extra index 12.1 for now Signed-off-by: Bill Nell --- pyproject.toml | 2 +- requirements-build.txt | 2 +- requirements-cpu.txt | 4 ++-- requirements-cuda.txt | 8 +++----- requirements-openvino.txt | 2 +- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 84fcd8950ab28..e78f5652f486b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "packaging", "setuptools>=61", "setuptools-scm>=8.0", - "torch == 2.5.0 --extra-index-url https://download.pytorch.org/whl/cu121", + "torch == 2.5.0", "wheel", "jinja2", ] diff --git a/requirements-build.txt b/requirements-build.txt index 7d22707216eee..ea2b688bb3108 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -4,6 +4,6 @@ ninja packaging setuptools>=61 setuptools-scm>=8 -torch==2.5.0 --extra-index-url https://download.pytorch.org/whl/cu121 +torch==2.5.0 wheel jinja2 diff --git a/requirements-cpu.txt b/requirements-cpu.txt index e7543f9f98823..8e47a850fc029 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -2,5 +2,5 @@ -r requirements-common.txt # Dependencies for x86_64 CPUs -torch == 2.5.0+cpu --extra-index-url https://download.pytorch.org/whl/cu121; platform_machine != "ppc64le" -torchvision --extra-index-url https://download.pytorch.org/whl/cu121; platform_machine != "ppc64le" # required for the image processor of phi3v, this must be updated alongside torch +torch == 2.5.0+cpu; platform_machine != "ppc64le" +torchvision; platform_machine != "ppc64le" # required for the image processor of phi3v, this must be updated alongside torch diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 85c1e595e5e60..92fa303d687a2 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -4,9 +4,7 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 nvidia-ml-py # for pynvml package -torch == 2.5.0 --extra-index-url https://download.pytorch.org/whl/cu121 +torch == 2.5.0 # These must be updated alongside torch -# Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version -torchvision == 0.20 --extra-index-url https://download.pytorch.org/whl/cu121 -# Requires PyTorch 2.5.0 -xformers == 0.0.28.post2 --extra-index-url https://download.pytorch.org/whl/cu121; platform_system == 'Linux' and platform_machine == 'x86_64' +torchvision == 0.20 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version +xformers == 0.0.28.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.0 diff --git a/requirements-openvino.txt b/requirements-openvino.txt index 5b154ec38ef9e..7ad0d1e7f704b 100644 --- a/requirements-openvino.txt +++ b/requirements-openvino.txt @@ -1,7 +1,7 @@ # Common dependencies -r requirements-common.txt -torch == 2.5.0 --extra-index-url https://download.pytorch.org/whl/cu121 # should be aligned with "common" vLLM torch version +torch == 2.5.0 # should be aligned with "common" vLLM torch version openvino >= 2024.4.0 # since 2024.4.0 both CPU and GPU support Paged Attention optimum @ git+https://github.com/huggingface/optimum.git@main # latest optimum is used to support latest transformers version From 39f9c34d01ed24faaf62e1ab057983782884f434 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Fri, 25 Oct 2024 19:45:28 +0000 Subject: [PATCH 6/9] update flash attn git hash Signed-off-by: Bill Nell --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 25934994c07e3..72cf678f9cb16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -507,7 +507,7 @@ else() FetchContent_Declare( vllm-flash-attn GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git - GIT_TAG 013f0c4fc47e6574060879d9734c1df8c5c273bd + GIT_TAG 5259c586c403a4e4d8bf69973c159b40cc346fb9 GIT_PROGRESS TRUE ) endif() From f1f3caa9a669608c305b61fa5e7b9283ec3376d7 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Fri, 25 Oct 2024 20:44:57 +0000 Subject: [PATCH 7/9] downgrade cpu pytorch version Signed-off-by: Bill Nell --- requirements-cpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 8e47a850fc029..27ca8ca5dbc58 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -2,5 +2,5 @@ -r requirements-common.txt # Dependencies for x86_64 CPUs -torch == 2.5.0+cpu; platform_machine != "ppc64le" +torch == 2.4.0+cpu; platform_machine != "ppc64le" torchvision; platform_machine != "ppc64le" # required for the image processor of phi3v, this must be updated alongside torch From 0068133747c608093a0aa421ee26d70ec635552b Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sat, 26 Oct 2024 20:06:31 -0700 Subject: [PATCH 8/9] fix sdpa cudnn Signed-off-by: youkaichao --- vllm/platforms/cuda.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 30bbf5107475d..9c5212ace1346 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -7,6 +7,7 @@ from typing import Callable, List, Tuple, TypeVar import pynvml +import torch from typing_extensions import ParamSpec from vllm.logger import init_logger @@ -26,6 +27,10 @@ " and cause errors. See https://pypi.org/project/pynvml " "for more information.") +# pytorch 2.5 uses cudnn sdpa by default, which will cause crash on some models +# see https://github.com/huggingface/diffusers/issues/9704 for details +torch.backends.cuda.enable_cudnn_sdp(False) + # NVML utils # Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`, # all the related functions work on real physical device ids. From 5250a65d70abf40b8fa4d363290fc9f9e46aa7d2 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 27 Oct 2024 00:35:47 -0700 Subject: [PATCH 9/9] fix tests Signed-off-by: youkaichao --- .../decoder_only/language/test_big_models.py | 46 ++++++++++++++----- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/tests/models/decoder_only/language/test_big_models.py b/tests/models/decoder_only/language/test_big_models.py index 75625b35209ce..fcfc159e4f5a0 100644 --- a/tests/models/decoder_only/language/test_big_models.py +++ b/tests/models/decoder_only/language/test_big_models.py @@ -8,7 +8,7 @@ from vllm.platforms import current_platform -from ...utils import check_outputs_equal +from ...utils import check_logprobs_close, check_outputs_equal MODELS = [ "meta-llama/Llama-2-7b-hf", @@ -43,18 +43,40 @@ def test_models( dtype: str, max_tokens: int, ) -> None: - with hf_runner(model, dtype=dtype) as hf_model: - hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens) - with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model: - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - - check_outputs_equal( - outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_outputs, - name_0="hf", - name_1="vllm", - ) + if model == "openbmb/MiniCPM3-4B": + # the output becomes slightly different when upgrading to + # pytorch 2.5 . Changing to logprobs checks instead of exact + # output checks. + NUM_LOG_PROBS = 8 + with hf_runner(model, dtype=dtype) as hf_model: + hf_outputs = hf_model.generate_greedy_logprobs_limit( + example_prompts, max_tokens, NUM_LOG_PROBS) + + with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model: + vllm_outputs = vllm_model.generate_greedy_logprobs( + example_prompts, max_tokens, NUM_LOG_PROBS) + + check_logprobs_close( + outputs_0_lst=hf_outputs, + outputs_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + ) + else: + with hf_runner(model, dtype=dtype) as hf_model: + hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens) + + with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model: + vllm_outputs = vllm_model.generate_greedy(example_prompts, + max_tokens) + + check_outputs_equal( + outputs_0_lst=hf_outputs, + outputs_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + ) @pytest.mark.parametrize("model", MODELS)