Skip to content

vllm.model_executor.guided_decoding

Modules:

Name Description
guidance_decoding
guidance_logits_processors
guided_fields
lm_format_enforcer_decoding
outlines_decoding
outlines_logits_processors
utils
xgrammar_decoding

logger module-attribute

logger = init_logger(__name__)

get_guided_decoding_logits_processor async

get_guided_decoding_logits_processor(
    guided_params: GuidedDecodingParams,
    tokenizer: PreTrainedTokenizer,
    model_config: ModelConfig,
    reasoning_backend: str | None = None,
) -> LogitsProcessor | None
Source code in vllm/model_executor/guided_decoding/__init__.py
async def get_guided_decoding_logits_processor(
        guided_params: GuidedDecodingParams,
        tokenizer: PreTrainedTokenizer,
        model_config: ModelConfig,
        reasoning_backend: str | None = None) -> LogitsProcessor | None:

    reasoner = None
    if reasoning_backend:
        reasoner_class = ReasoningParserManager.get_reasoning_parser(
            reasoning_backend)
        reasoner = reasoner_class(tokenizer)

    guided_params = maybe_backend_fallback(guided_params)

    # CFG grammar not supported by LMFE, so we use outlines instead
    if guided_params.backend == 'outlines':
        # NOTE: lazy import outlines to avoid https://github.com/vllm-project/vllm/issues/4193
        from vllm.model_executor.guided_decoding.outlines_decoding import (  # noqa
            get_outlines_guided_decoding_logits_processor)
        return await get_outlines_guided_decoding_logits_processor(
            guided_params, tokenizer, reasoner)
    if guided_params.backend == 'lm-format-enforcer':
        from vllm.model_executor.guided_decoding.lm_format_enforcer_decoding import (  # noqa
            get_local_lm_format_enforcer_guided_decoding_logits_processor)
        return get_local_lm_format_enforcer_guided_decoding_logits_processor(
            guided_params, tokenizer)
    if guided_params.backend == 'xgrammar':
        from vllm.model_executor.guided_decoding.xgrammar_decoding import (  # noqa
            get_local_xgrammar_guided_decoding_logits_processor)
        return get_local_xgrammar_guided_decoding_logits_processor(
            guided_params, tokenizer, model_config, reasoner)
    if guided_params.backend == 'guidance':
        from vllm.model_executor.guided_decoding.guidance_decoding import (
            get_local_guidance_guided_decoding_logits_processor)
        return get_local_guidance_guided_decoding_logits_processor(
            guided_params, tokenizer)
    raise ValueError(
        f"Unknown guided decoding backend '{guided_params.backend}'. "
        "Must be one of 'outlines, 'lm-format-enforcer', 'xgrammar', 'guidance'"
    )

get_local_guided_decoding_logits_processor

get_local_guided_decoding_logits_processor(
    guided_params: GuidedDecodingParams,
    tokenizer: PreTrainedTokenizer,
    model_config: ModelConfig,
    reasoning_backend: str | None = None,
) -> LogitsProcessor | None
Source code in vllm/model_executor/guided_decoding/__init__.py
def get_local_guided_decoding_logits_processor(
        guided_params: GuidedDecodingParams,
        tokenizer: PreTrainedTokenizer,
        model_config: ModelConfig,
        reasoning_backend: str | None = None) -> LogitsProcessor | None:
    guided_params = maybe_backend_fallback(guided_params)

    reasoner = None
    if reasoning_backend:
        reasoner_class = ReasoningParserManager.get_reasoning_parser(
            reasoning_backend)
        reasoner = reasoner_class(tokenizer)

    # CFG grammar not supported by LMFE, so we use outlines instead
    if guided_params.backend == 'outlines':
        # NOTE: lazy import outlines to avoid https://github.com/vllm-project/vllm/issues/4193
        from vllm.model_executor.guided_decoding.outlines_decoding import (  # noqa
            get_local_outlines_guided_decoding_logits_processor)
        return get_local_outlines_guided_decoding_logits_processor(
            guided_params, tokenizer, reasoner)
    if guided_params.backend == 'lm-format-enforcer':
        from vllm.model_executor.guided_decoding.lm_format_enforcer_decoding import (  # noqa
            get_local_lm_format_enforcer_guided_decoding_logits_processor)
        return get_local_lm_format_enforcer_guided_decoding_logits_processor(
            guided_params, tokenizer)
    if guided_params.backend == 'xgrammar':
        from vllm.model_executor.guided_decoding.xgrammar_decoding import (  # noqa
            get_local_xgrammar_guided_decoding_logits_processor)
        return get_local_xgrammar_guided_decoding_logits_processor(
            guided_params, tokenizer, model_config, reasoner)
    if guided_params.backend == 'guidance':
        from vllm.model_executor.guided_decoding.guidance_decoding import (
            get_local_guidance_guided_decoding_logits_processor)
        return get_local_guidance_guided_decoding_logits_processor(
            guided_params, tokenizer)

    raise ValueError(
        f"Unknown guided decoding backend '{guided_params.backend}'. "
        "Must be one of 'outlines, 'lm-format-enforcer', 'xgrammar', 'guidance'"
    )

maybe_backend_fallback

maybe_backend_fallback(
    guided_params: GuidedDecodingParams,
) -> GuidedDecodingParams
Source code in vllm/model_executor/guided_decoding/__init__.py
def maybe_backend_fallback(
        guided_params: GuidedDecodingParams) -> GuidedDecodingParams:

    def fallback_or_error(guided_params: GuidedDecodingParams, message: str,
                          fallback: str) -> None:
        """Change the backend to the specified fallback with a warning log,
        or raise a ValueError if the `disable_fallback` option is specified."""
        if guided_params.disable_fallback:
            raise ValueError(message)

        logger.warning("%s Falling back to use %s instead.", message, fallback)
        guided_params.backend = fallback

    # `auto` was added for V1 to explicitly declare a mode that has fallbacks
    # in place. If that is specified with V0, treat it as `xgrammar`, as we have
    # fallbacks enabled for that and it is the V0 default.
    if guided_params.backend == "auto":
        guided_params.backend = "xgrammar"

    # lm-format-enforce doesn't support grammar, fallback to xgrammar
    if guided_params.backend == "lm-format-enforcer":
        if guided_params.grammar is not None:
            fallback_or_error(
                guided_params,
                "lm-format-enforcer does not support grammar guided decoding.",
                "xgrammar")

        # lm-format-enforcer doesn't support some JSON schema features
        elif (guided_params.json is not None
              and has_lmf_unsupported_json_features(guided_params.json)):
            fallback_or_error(
                guided_params,
                "lm-format-enforcer does not support advanced JSON schema "
                "features like patterns or numeric ranges.", "outlines")

    if guided_params.backend == "xgrammar":
        from vllm.model_executor.guided_decoding.xgrammar_decoding import (
            xgr_installed)

        # xgrammar doesn't support some JSON schema features
        if (guided_params.json is not None and
                has_xgrammar_unsupported_json_features(guided_params.json)):
            fallback_or_error(
                guided_params,
                "xgrammar does not support advanced JSON schema features like "
                "string length, item limits, or property bounds.", "outlines")

        # xgrammar only supports GBNF grammars, so we must convert Lark.
        # We must check if the grammar is likely Lark and if that
        # grammar is convertible to GBNF
        elif (guided_params.grammar is not None
              and grammar_is_likely_lark(guided_params.grammar)):
            try:
                convert_lark_to_gbnf(guided_params.grammar)
            except Exception:
                fallback_or_error(
                    guided_params,
                    "xgrammar does not support Lark grammars and the "
                    "grammar failed to convert to GBNF.", "outlines")

        # If the xgrammar module cannot be imported successfully,
        # we should still allow users to use guided decoding with a fallback.
        elif not xgr_installed:
            fallback_or_error(
                guided_params,
                "xgrammar module cannot be imported successfully.", "outlines")

    if (guided_params.backend == "outlines"
            and guided_params.json_object is not None):
        # outlines doesn't support json_object, fallback to guidance
        fallback_or_error(guided_params,
                          "outlines does not support json_object.", "guidance")

    return guided_params