Skip to content

vllm.entrypoints.cli.launch

LaunchSubcommand

Bases: CLISubcommand

The launch subcommand for the vLLM CLI.

Uses nested sub-subcommands so each component can define its own arguments independently (e.g. vllm launch render).

Source code in vllm/entrypoints/cli/launch.py
class LaunchSubcommand(CLISubcommand):
    """The `launch` subcommand for the vLLM CLI.

    Uses nested sub-subcommands so each component can define its own
    arguments independently (e.g. ``vllm launch render``).
    """

    name = "launch"

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        if hasattr(args, "model_tag") and args.model_tag is not None:
            args.model = args.model_tag

        args.launch_command(args)

    def validate(self, args: argparse.Namespace) -> None:
        validate_parsed_serve_args(args)

    def subparser_init(
        self, subparsers: argparse._SubParsersAction
    ) -> FlexibleArgumentParser:
        launch_parser = subparsers.add_parser(
            self.name,
            help=DESCRIPTION,
            description=DESCRIPTION,
            usage=f"vllm {self.name} <component> [options]",
        )
        launch_subparsers = launch_parser.add_subparsers(
            required=True, dest="launch_component"
        )

        for cmd_cls in LaunchSubcommandBase.__subclasses__():
            cmd_subparser = launch_subparsers.add_parser(
                cmd_cls.name,
                help=cmd_cls.help,
                description=cmd_cls.help,
                usage=f"vllm {self.name} {cmd_cls.name} [options]",
            )
            cmd_subparser.set_defaults(launch_command=cmd_cls.cmd)
            cmd_cls.add_cli_args(cmd_subparser)
            cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
                subcmd=f"{self.name} {cmd_cls.name}"
            )

        return launch_parser

LaunchSubcommandBase

Bases: CLISubcommand

The base class of subcommands for vllm launch.

Source code in vllm/entrypoints/cli/launch.py
class LaunchSubcommandBase(CLISubcommand):
    """The base class of subcommands for `vllm launch`."""

    help: str

    @classmethod
    def add_cli_args(cls, parser: FlexibleArgumentParser) -> None:
        """Add the CLI arguments to the parser.

        By default, adds the standard vLLM serving arguments.
        Subclasses can override to add component-specific arguments.
        """
        make_arg_parser(parser)

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        raise NotImplementedError

add_cli_args classmethod

add_cli_args(parser: FlexibleArgumentParser) -> None

Add the CLI arguments to the parser.

By default, adds the standard vLLM serving arguments. Subclasses can override to add component-specific arguments.

Source code in vllm/entrypoints/cli/launch.py
@classmethod
def add_cli_args(cls, parser: FlexibleArgumentParser) -> None:
    """Add the CLI arguments to the parser.

    By default, adds the standard vLLM serving arguments.
    Subclasses can override to add component-specific arguments.
    """
    make_arg_parser(parser)

RenderSubcommand

Bases: LaunchSubcommandBase

The render subcommand for vllm launch.

Source code in vllm/entrypoints/cli/launch.py
class RenderSubcommand(LaunchSubcommandBase):
    """The `render` subcommand for `vllm launch`."""

    name = "render"
    help = "Launch a GPU-less rendering server (preprocessing and postprocessing only)."

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
        uvloop.run(run_launch_fastapi(args))

run_launch_fastapi async

run_launch_fastapi(args: Namespace) -> None

Run the online serving layer with FastAPI (no GPU inference).

Source code in vllm/entrypoints/cli/launch.py
async def run_launch_fastapi(args: argparse.Namespace) -> None:
    """Run the online serving layer with FastAPI (no GPU inference)."""
    from vllm.config import VllmConfig
    from vllm.v1.engine.launch import LaunchEngineClient

    # 1. Socket binding
    listen_address, sock = setup_server(args)

    # 2. Create LaunchEngineClient (no GPU)
    engine_args = AsyncEngineArgs.from_cli_args(args)
    model_config = engine_args.create_model_config()
    vllm_config = VllmConfig(model_config=model_config)
    engine_client = LaunchEngineClient.from_vllm_config(vllm_config)

    # 3. Build app, initialize state, and start serving
    shutdown_task = await build_and_serve(engine_client, listen_address, sock, args)
    try:
        await shutdown_task
    finally:
        sock.close()