From a7d89dcf445feceec0546f3befa0ce94073694b9 Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Fri, 9 Aug 2024 11:23:02 -0600 Subject: [PATCH] :bug: add backwards compatibility Signed-off-by: Joe Runde --- src/vllm_tgis_adapter/__main__.py | 24 ++++++--------------- src/vllm_tgis_adapter/http.py | 35 +++++++++++++++++++------------ 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/src/vllm_tgis_adapter/__main__.py b/src/vllm_tgis_adapter/__main__.py index 8021dde..cbc577c 100644 --- a/src/vllm_tgis_adapter/__main__.py +++ b/src/vllm_tgis_adapter/__main__.py @@ -2,7 +2,6 @@ import asyncio import contextlib -import signal from concurrent.futures import FIRST_COMPLETED from typing import TYPE_CHECKING @@ -35,6 +34,7 @@ async def start_servers(args: argparse.Namespace) -> None: run_http_server(args, engine), name="http_server", ) + # The http server task will catch interrupt signals for us tasks.append(http_server_task) grpc_server_task = loop.create_task( @@ -43,29 +43,17 @@ async def start_servers(args: argparse.Namespace) -> None: ) tasks.append(grpc_server_task) - def signal_handler() -> None: - # prevents the uvicorn signal handler to exit early - for task in tasks: - task.cancel() - - async def override_signal_handler() -> None: - loop = asyncio.get_running_loop() - - for sig in (signal.SIGINT, signal.SIGTERM): - loop.add_signal_handler(sig, signal_handler) - - await override_signal_handler() - with contextlib.suppress(asyncio.CancelledError): # Both server tasks will exit normally on shutdown, so we await # FIRST_COMPLETED to catch either one shutting down. - await asyncio.wait( - tasks, - return_when=FIRST_COMPLETED, - ) + await asyncio.wait(tasks, return_when=FIRST_COMPLETED) + # Once either server shuts down, cancel the other for task in tasks: task.cancel() + # Final wait for both servers to finish + await asyncio.wait(tasks) + check_for_failed_tasks(tasks) diff --git a/src/vllm_tgis_adapter/http.py b/src/vllm_tgis_adapter/http.py index 8178b85..f111517 100644 --- a/src/vllm_tgis_adapter/http.py +++ b/src/vllm_tgis_adapter/http.py @@ -29,16 +29,25 @@ async def run_http_server( app = await init_app(engine, args) # type: ignore[arg-type] - await serve_http( - app, - engine, - host=args.host, - port=args.port, - log_level=args.uvicorn_log_level, - timeout_keep_alive=TIMEOUT_KEEP_ALIVE, - ssl_keyfile=args.ssl_keyfile, - ssl_certfile=args.ssl_certfile, - ssl_ca_certs=args.ssl_ca_certs, - ssl_cert_reqs=args.ssl_cert_reqs, - **uvicorn_kwargs, - ) + serve_kwargs = { + "host": args.host, + "port": args.port, + "log_level": args.uvicorn_log_level, + "timeout_keep_alive": TIMEOUT_KEEP_ALIVE, + "ssl_keyfile": args.ssl_keyfile, + "ssl_certfile": args.ssl_certfile, + "ssl_ca_certs": args.ssl_ca_certs, + "ssl_cert_reqs": args.ssl_cert_reqs, + } + serve_kwargs.update(uvicorn_kwargs) + + try: + shutdown_coro = await serve_http(app, engine, **serve_kwargs) + except TypeError: + # vllm 0.5.4 backwards compatibility + # HTTP server will not shut itself down when the engine dies + shutdown_coro = await serve_http(app, **serve_kwargs) + + # launcher.serve_http returns a shutdown coroutine to await + # (The double await is intentional) + await shutdown_coro