From e73ed0f1c624f85d348c0709c256a0ae6627986b Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 22 Apr 2024 00:54:16 -0700 Subject: [PATCH] [Bugfix] Fix type annotations in CPU model runner (#4256) --- vllm/worker/cpu_model_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py index a82373d3d1626..bf0a6c84e6f07 100644 --- a/vllm/worker/cpu_model_runner.py +++ b/vllm/worker/cpu_model_runner.py @@ -73,7 +73,8 @@ def load_model(self) -> None: def _prepare_prompt( self, seq_group_metadata_list: List[SequenceGroupMetadata], - ) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, List[int]]: + ) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, List[int], + Optional[torch.Tensor]]: assert len(seq_group_metadata_list) > 0 input_tokens: List[int] = [] input_positions: List[int] = [] @@ -347,8 +348,8 @@ def _prepare_sample( def prepare_input_tensors( self, seq_group_metadata_list: List[SequenceGroupMetadata], - ) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, - SamplingMetadata]: + ) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, SamplingMetadata, + Optional[torch.Tensor]]: multi_modal_input = None if self.is_driver_worker: # NOTE: We assume that all sequences in the group are all prompts or