diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index 56d073356cbfc..4951636c64cb0 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -228,7 +228,8 @@ def build(self, seq_lens: List[int], query_lens: List[int], # It may be possible to have more blocks allocated due # to lookahead slots of multi-step, however, they are # not used anyway, so can be safely ignored. - input_block_tables[i, :max_blocks] = block_table[:max_blocks] + input_block_tables[ + i, :max_blocks] = block_table[:max_blocks] block_tables = torch.from_numpy(input_block_tables).to( device, non_blocking=True) else: