diff --git a/server/text_generation_server/server.py b/server/text_generation_server/server.py index 3a2e5c2c..f70cb5df 100644 --- a/server/text_generation_server/server.py +++ b/server/text_generation_server/server.py @@ -79,7 +79,7 @@ async def ModelInfo(self, request: generate_pb2.ModelInfoRequest, context) -> ge return generate_pb2.ModelInfoResponse( model_type=ModelInfoResponse.ModelType.SEQ2SEQ_LM if isinstance(self.model, Seq2SeqLM) else ModelInfoResponse.ModelType.CAUSAL_LM, - eos_token=self.model.config.eos_token_id, + eos_token=getattr(self.model.tokenizer, 'model_eos_token_id', self.model.tokenizer.eos_token_id), batch_padding=not isinstance(self.model, FlashCausalLM), memory_scaling_model=self.memory_scaling_model, )