From a87dc6967987648215c76d43155c4dfff99e1385 Mon Sep 17 00:00:00 2001
From: seungrokjung <seungrok.jung@amd.com>
Date: Wed, 16 Oct 2024 14:39:06 +0000
Subject: [PATCH] cuda graph + num-scheduler-steps bug fix

---
 vllm/attention/backends/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py
index 56d073356cbfc..4951636c64cb0 100644
--- a/vllm/attention/backends/utils.py
+++ b/vllm/attention/backends/utils.py
@@ -228,7 +228,8 @@ def build(self, seq_lens: List[int], query_lens: List[int],
                         # It may be possible to have more blocks allocated due
                         # to lookahead slots of multi-step, however, they are
                         # not used anyway, so can be safely ignored.
-                        input_block_tables[i, :max_blocks] = block_table[:max_blocks]
+                        input_block_tables[
+                                i, :max_blocks] = block_table[:max_blocks]
             block_tables = torch.from_numpy(input_block_tables).to(
                 device, non_blocking=True)
         else: