cuda graph + num-scheduler-steps bug fix

ROCm · Oct 16, 2024 · a87dc69 · a87dc69
1 parent ad54724
commit a87dc69
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py
@@ -228,7 +228,8 @@ def build(self, seq_lens: List[int], query_lens: List[int],
                         # It may be possible to have more blocks allocated due
                         # to lookahead slots of multi-step, however, they are
                         # not used anyway, so can be safely ignored.
-                        input_block_tables[i, :max_blocks] = block_table[:max_blocks]
+                        input_block_tables[
+                                i, :max_blocks] = block_table[:max_blocks]
             block_tables = torch.from_numpy(input_block_tables).to(
                 device, non_blocking=True)
         else: