From a87dc6967987648215c76d43155c4dfff99e1385 Mon Sep 17 00:00:00 2001 From: seungrokjung Date: Wed, 16 Oct 2024 14:39:06 +0000 Subject: [PATCH] cuda graph + num-scheduler-steps bug fix --- vllm/attention/backends/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index 56d073356cbfc..4951636c64cb0 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -228,7 +228,8 @@ def build(self, seq_lens: List[int], query_lens: List[int], # It may be possible to have more blocks allocated due # to lookahead slots of multi-step, however, they are # not used anyway, so can be safely ignored. - input_block_tables[i, :max_blocks] = block_table[:max_blocks] + input_block_tables[ + i, :max_blocks] = block_table[:max_blocks] block_tables = torch.from_numpy(input_block_tables).to( device, non_blocking=True) else: