vllm.entrypoints.openai.cli_args

This file contains the command line arguments for the vLLM's OpenAI-compatible server. It is kept in a separate file for documentation purposes.

logger `module-attribute` ¶

logger = init_logger(__name__)

LoRAParserAction ¶

Bases: Action

Source code in vllm/entrypoints/openai/cli_args.py

class LoRAParserAction(argparse.Action):

    def __call__(
        self,
        parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: Optional[Union[str, Sequence[str]]],
        option_string: Optional[str] = None,
    ):
        if values is None:
            values = []
        if isinstance(values, str):
            raise TypeError("Expected values to be a list")

        lora_list: list[LoRAModulePath] = []
        for item in values:
            if item in [None, '']:  # Skip if item is None or empty string
                continue
            if '=' in item and ',' not in item:  # Old format: name=path
                name, path = item.split('=')
                lora_list.append(LoRAModulePath(name, path))
            else:  # Assume JSON format
                try:
                    lora_dict = json.loads(item)
                    lora = LoRAModulePath(**lora_dict)
                    lora_list.append(lora)
                except json.JSONDecodeError:
                    parser.error(
                        f"Invalid JSON format for --lora-modules: {item}")
                except TypeError as e:
                    parser.error(
                        f"Invalid fields for --lora-modules: {item} - {str(e)}"
                    )
        setattr(namespace, self.dest, lora_list)

call ¶

__call__(
    parser: ArgumentParser,
    namespace: Namespace,
    values: Optional[Union[str, Sequence[str]]],
    option_string: Optional[str] = None,
)

Source code in vllm/entrypoints/openai/cli_args.py

def __call__(
    self,
    parser: argparse.ArgumentParser,
    namespace: argparse.Namespace,
    values: Optional[Union[str, Sequence[str]]],
    option_string: Optional[str] = None,
):
    if values is None:
        values = []
    if isinstance(values, str):
        raise TypeError("Expected values to be a list")

    lora_list: list[LoRAModulePath] = []
    for item in values:
        if item in [None, '']:  # Skip if item is None or empty string
            continue
        if '=' in item and ',' not in item:  # Old format: name=path
            name, path = item.split('=')
            lora_list.append(LoRAModulePath(name, path))
        else:  # Assume JSON format
            try:
                lora_dict = json.loads(item)
                lora = LoRAModulePath(**lora_dict)
                lora_list.append(lora)
            except json.JSONDecodeError:
                parser.error(
                    f"Invalid JSON format for --lora-modules: {item}")
            except TypeError as e:
                parser.error(
                    f"Invalid fields for --lora-modules: {item} - {str(e)}"
                )
    setattr(namespace, self.dest, lora_list)

PromptAdapterParserAction ¶

Bases: Action

Source code in vllm/entrypoints/openai/cli_args.py

class PromptAdapterParserAction(argparse.Action):

    def __call__(
        self,
        parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: Optional[Union[str, Sequence[str]]],
        option_string: Optional[str] = None,
    ):
        if values is None:
            values = []
        if isinstance(values, str):
            raise TypeError("Expected values to be a list")

        adapter_list: list[PromptAdapterPath] = []
        for item in values:
            name, path = item.split('=')
            adapter_list.append(PromptAdapterPath(name, path))
        setattr(namespace, self.dest, adapter_list)

call ¶

__call__(
    parser: ArgumentParser,
    namespace: Namespace,
    values: Optional[Union[str, Sequence[str]]],
    option_string: Optional[str] = None,
)

Source code in vllm/entrypoints/openai/cli_args.py

def __call__(
    self,
    parser: argparse.ArgumentParser,
    namespace: argparse.Namespace,
    values: Optional[Union[str, Sequence[str]]],
    option_string: Optional[str] = None,
):
    if values is None:
        values = []
    if isinstance(values, str):
        raise TypeError("Expected values to be a list")

    adapter_list: list[PromptAdapterPath] = []
    for item in values:
        name, path = item.split('=')
        adapter_list.append(PromptAdapterPath(name, path))
    setattr(namespace, self.dest, adapter_list)

create_parser_for_docs ¶

create_parser_for_docs() -> FlexibleArgumentParser

Source code in vllm/entrypoints/openai/cli_args.py

def create_parser_for_docs() -> FlexibleArgumentParser:
    parser_for_docs = FlexibleArgumentParser(
        prog="-m vllm.entrypoints.openai.api_server")
    return make_arg_parser(parser_for_docs)

log_non_default_args ¶

log_non_default_args(args: Namespace)

Source code in vllm/entrypoints/openai/cli_args.py

def log_non_default_args(args: argparse.Namespace):
    non_default_args = {}
    parser = make_arg_parser(FlexibleArgumentParser())
    for arg, default in vars(parser.parse_args([])).items():
        if default != getattr(args, arg):
            non_default_args[arg] = getattr(args, arg)
    logger.info("non-default args: %s", non_default_args)

make_arg_parser ¶

make_arg_parser(
    parser: FlexibleArgumentParser,
) -> FlexibleArgumentParser

Source code in vllm/entrypoints/openai/cli_args.py

def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
    parser.add_argument("--host",
                        type=optional_type(str),
                        default=None,
                        help="Host name.")
    parser.add_argument("--port", type=int, default=8000, help="Port number.")
    parser.add_argument(
        "--uvicorn-log-level",
        type=str,
        default="info",
        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
        help="Log level for uvicorn.")
    parser.add_argument("--disable-uvicorn-access-log",
                        action="store_true",
                        help="Disable uvicorn access log.")
    parser.add_argument("--allow-credentials",
                        action="store_true",
                        help="Allow credentials.")
    parser.add_argument("--allowed-origins",
                        type=json.loads,
                        default=["*"],
                        help="Allowed origins.")
    parser.add_argument("--allowed-methods",
                        type=json.loads,
                        default=["*"],
                        help="Allowed methods.")
    parser.add_argument("--allowed-headers",
                        type=json.loads,
                        default=["*"],
                        help="Allowed headers.")
    parser.add_argument("--api-key",
                        type=optional_type(str),
                        default=None,
                        help="If provided, the server will require this key "
                        "to be presented in the header.")
    parser.add_argument(
        "--lora-modules",
        type=optional_type(str),
        default=None,
        nargs='+',
        action=LoRAParserAction,
        help="LoRA module configurations in either 'name=path' format"
        "or JSON format. "
        "Example (old format): ``'name=path'`` "
        "Example (new format): "
        "``{\"name\": \"name\", \"path\": \"lora_path\", "
        "\"base_model_name\": \"id\"}``")
    parser.add_argument(
        "--prompt-adapters",
        type=optional_type(str),
        default=None,
        nargs='+',
        action=PromptAdapterParserAction,
        help="Prompt adapter configurations in the format name=path. "
        "Multiple adapters can be specified.")
    parser.add_argument("--chat-template",
                        type=optional_type(str),
                        default=None,
                        help="The file path to the chat template, "
                        "or the template in single-line form "
                        "for the specified model.")
    parser.add_argument(
        '--chat-template-content-format',
        type=str,
        default="auto",
        choices=get_args(ChatTemplateContentFormatOption),
        help='The format to render message content within a chat template.'
        '\n\n'
        '* "string" will render the content as a string. '
        'Example: ``"Hello World"``\n'
        '* "openai" will render the content as a list of dictionaries, '
        'similar to OpenAI schema. '
        'Example: ``[{"type": "text", "text": "Hello world!"}]``')
    parser.add_argument("--response-role",
                        type=optional_type(str),
                        default="assistant",
                        help="The role name to return if "
                        "``request.add_generation_prompt=true``.")
    parser.add_argument("--ssl-keyfile",
                        type=optional_type(str),
                        default=None,
                        help="The file path to the SSL key file.")
    parser.add_argument("--ssl-certfile",
                        type=optional_type(str),
                        default=None,
                        help="The file path to the SSL cert file.")
    parser.add_argument("--ssl-ca-certs",
                        type=optional_type(str),
                        default=None,
                        help="The CA certificates file.")
    parser.add_argument(
        "--enable-ssl-refresh",
        action="store_true",
        default=False,
        help="Refresh SSL Context when SSL certificate files change")
    parser.add_argument(
        "--ssl-cert-reqs",
        type=int,
        default=int(ssl.CERT_NONE),
        help="Whether client certificate is required (see stdlib ssl module's)."
    )
    parser.add_argument(
        "--root-path",
        type=optional_type(str),
        default=None,
        help="FastAPI root_path when app is behind a path based routing proxy."
    )
    parser.add_argument(
        "--middleware",
        type=optional_type(str),
        action="append",
        default=[],
        help="Additional ASGI middleware to apply to the app. "
        "We accept multiple --middleware arguments. "
        "The value should be an import path. "
        "If a function is provided, vLLM will add it to the server "
        "using ``@app.middleware('http')``. "
        "If a class is provided, vLLM will add it to the server "
        "using ``app.add_middleware()``. ")
    parser.add_argument(
        "--return-tokens-as-token-ids",
        action="store_true",
        help="When ``--max-logprobs`` is specified, represents single tokens "
        " as strings of the form 'token_id:{token_id}' so that tokens "
        "that are not JSON-encodable can be identified.")
    parser.add_argument(
        "--disable-frontend-multiprocessing",
        action="store_true",
        help="If specified, will run the OpenAI frontend server in the same "
        "process as the model serving engine.")
    parser.add_argument(
        "--enable-request-id-headers",
        action="store_true",
        help="If specified, API server will add X-Request-Id header to "
        "responses.")
    parser.add_argument(
        "--enable-auto-tool-choice",
        action="store_true",
        default=False,
        help="Enable auto tool choice for supported models. Use "
        "``--tool-call-parser`` to specify which parser to use.")
    parser.add_argument(
        "--expand-tools-even-if-tool-choice-none",
        action="store_true",
        default=False,
        deprecated=True,
        help="Include tool definitions in prompts "
        "even when tool_choice='none'. "
        "This is a transitional option that will be removed in v0.10.0. "
        "In v0.10.0, tool definitions will always be included regardless of "
        "tool_choice setting. Use this flag now to test the new behavior "
        "before the breaking change.")

    valid_tool_parsers = ToolParserManager.tool_parsers.keys()
    parser.add_argument(
        "--tool-call-parser",
        type=str,
        metavar="{" + ",".join(valid_tool_parsers) + "} or name registered in "
        "--tool-parser-plugin",
        default=None,
        help=
        "Select the tool call parser depending on the model that you're using."
        " This is used to parse the model-generated tool call into OpenAI API "
        "format. Required for ``--enable-auto-tool-choice``.")

    parser.add_argument(
        "--tool-parser-plugin",
        type=str,
        default="",
        help=
        "Special the tool parser plugin write to parse the model-generated tool"
        " into OpenAI API format, the name register in this plugin can be used "
        "in ``--tool-call-parser``.")

    parser.add_argument(
        "--log-config-file",
        type=str,
        default=envs.VLLM_LOGGING_CONFIG_PATH,
        help="Path to logging config JSON file for both vllm and uvicorn",
    )

    parser = AsyncEngineArgs.add_cli_args(parser)

    parser.add_argument('--max-log-len',
                        type=int,
                        default=None,
                        help='Max number of prompt characters or prompt '
                        'ID numbers being printed in log.'
                        ' The default of None means unlimited.')

    parser.add_argument(
        "--disable-fastapi-docs",
        action='store_true',
        default=False,
        help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."
    )
    parser.add_argument(
        "--enable-prompt-tokens-details",
        action='store_true',
        default=False,
        help="If set to True, enable prompt_tokens_details in usage.")
    parser.add_argument(
        "--enable-force-include-usage",
        action='store_true',
        default=False,
        help="If set to True, including usage on every request.")
    parser.add_argument(
        "--enable-server-load-tracking",
        action='store_true',
        default=False,
        help=
        "If set to True, enable tracking server_load_metrics in the app state."
    )

    return parser

validate_parsed_serve_args ¶

validate_parsed_serve_args(args: Namespace)

Quick checks for model serve args that raise prior to loading.

Source code in vllm/entrypoints/openai/cli_args.py

def validate_parsed_serve_args(args: argparse.Namespace):
    """Quick checks for model serve args that raise prior to loading."""
    if hasattr(args, "subparser") and args.subparser != "serve":
        return

    # Ensure that the chat template is valid; raises if it likely isn't
    validate_chat_template(args.chat_template)

    # Enable auto tool needs a tool call parser to be valid
    if args.enable_auto_tool_choice and not args.tool_call_parser:
        raise TypeError("Error: --enable-auto-tool-choice requires "
                        "--tool-call-parser")
    if args.enable_prompt_embeds and args.enable_prompt_adapter:
        raise ValueError(
            "Cannot use prompt embeds and prompt adapter at the same time.")

vllm.entrypoints.openai.cli_args

logger module-attribute ¶

LoRAParserAction ¶

__call__ ¶

PromptAdapterParserAction ¶

__call__ ¶

create_parser_for_docs ¶

log_non_default_args ¶

make_arg_parser ¶

validate_parsed_serve_args ¶

logger `module-attribute` ¶

call ¶

call ¶