From 682c38ee69c3d7a7418daaeb03095916afdd3672 Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Wed, 19 Jun 2024 16:36:41 -0500 Subject: [PATCH] Fix for yapf --- benchmarks/benchmark_latency.py | 2 ++ vllm/distributed/device_communicators/custom_all_reduce.py | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py index 2aca1b23f9b6f..0c3c204937f02 100644 --- a/benchmarks/benchmark_latency.py +++ b/benchmarks/benchmark_latency.py @@ -37,6 +37,7 @@ def main(args: argparse.Namespace): enable_chunked_prefill=args.enable_chunked_prefill, download_dir=args.download_dir, block_size=args.block_size, + disable_custom_all_reduce=args.disable_custom_all_reduce, gpu_memory_utilization=args.gpu_memory_utilization) sampling_params = SamplingParams( @@ -221,6 +222,7 @@ def run_to_completion(profile_dir: Optional[str] = None): type=str, default=None, help='Path to save the latency results in JSON format.') + parser.add_argument('--disable_custom_all_reduce', action='store_true') parser.add_argument('--gpu-memory-utilization', type=float, default=0.9, diff --git a/vllm/distributed/device_communicators/custom_all_reduce.py b/vllm/distributed/device_communicators/custom_all_reduce.py index 41885851560db..c0bbcee184019 100644 --- a/vllm/distributed/device_communicators/custom_all_reduce.py +++ b/vllm/distributed/device_communicators/custom_all_reduce.py @@ -201,8 +201,7 @@ def __init__(self, logger.warning( "Custom allreduce is disabled because your platform lacks " "GPU P2P capability or P2P test failed. To silence this " - "warning, specify disable_custom_all_reduce=True explicitly." - ) + "warning, specify disable_custom_all_reduce=True explicitly.") return self.disabled = False