Skip to content

vllm.v1.structured_output.backend_types

StructuredOutputKey module-attribute

StructuredOutputKey = tuple[StructuredOutputOptions, str]

StructuredOutputBackend dataclass

Bases: ABC

Engine-level backend for structured output requests.

Source code in vllm/v1/structured_output/backend_types.py
@dataclass
class StructuredOutputBackend(ABC):
    """Engine-level backend for structured output requests."""

    vllm_config: VllmConfig
    tokenizer: AnyTokenizer
    vocab_size: int

    @abstractmethod
    def compile_grammar(self, request_type: StructuredOutputOptions,
                        grammar_spec: str) -> StructuredOutputGrammar:
        """
        Compiles a grammar specification into a structured output grammar.

        Args:
            request_type (StructuredOutputOptions): The type of structured
              output request.
            grammar_spec (str): The grammar specification to compile.

        Returns:
            StructuredOutputGrammar: The compiled structured output grammar.
        """

    @abstractmethod
    def allocate_token_bitmask(self, max_num_seqs: int) -> torch.Tensor:
        """
        Allocates a token bitmask for the specified maximum number of sequences.

        Args:
            max_num_seqs (int): The maximum number of sequences for which
              to allocate the bitmask.
        """

    @abstractmethod
    def destroy(self):
        """
        Backend-specific cleanup.
        """

tokenizer instance-attribute

tokenizer: AnyTokenizer

vllm_config instance-attribute

vllm_config: VllmConfig

vocab_size instance-attribute

vocab_size: int

__init__

__init__(
    vllm_config: VllmConfig,
    tokenizer: AnyTokenizer,
    vocab_size: int,
) -> None

allocate_token_bitmask abstractmethod

allocate_token_bitmask(max_num_seqs: int) -> Tensor

Allocates a token bitmask for the specified maximum number of sequences.

Parameters:

Name Type Description Default
max_num_seqs int

The maximum number of sequences for which to allocate the bitmask.

required
Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def allocate_token_bitmask(self, max_num_seqs: int) -> torch.Tensor:
    """
    Allocates a token bitmask for the specified maximum number of sequences.

    Args:
        max_num_seqs (int): The maximum number of sequences for which
          to allocate the bitmask.
    """

compile_grammar abstractmethod

compile_grammar(
    request_type: StructuredOutputOptions, grammar_spec: str
) -> StructuredOutputGrammar

Compiles a grammar specification into a structured output grammar.

Parameters:

Name Type Description Default
request_type StructuredOutputOptions

The type of structured output request.

required
grammar_spec str

The grammar specification to compile.

required

Returns:

Name Type Description
StructuredOutputGrammar StructuredOutputGrammar

The compiled structured output grammar.

Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def compile_grammar(self, request_type: StructuredOutputOptions,
                    grammar_spec: str) -> StructuredOutputGrammar:
    """
    Compiles a grammar specification into a structured output grammar.

    Args:
        request_type (StructuredOutputOptions): The type of structured
          output request.
        grammar_spec (str): The grammar specification to compile.

    Returns:
        StructuredOutputGrammar: The compiled structured output grammar.
    """

destroy abstractmethod

destroy()

Backend-specific cleanup.

Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def destroy(self):
    """
    Backend-specific cleanup.
    """

StructuredOutputGrammar

Bases: ABC

Request-level backend for structured output requests.

Source code in vllm/v1/structured_output/backend_types.py
class StructuredOutputGrammar(ABC):
    """Request-level backend for structured output requests."""

    @abstractmethod
    def accept_tokens(self, request_id: str, tokens: list[int]) -> bool:
        """
        Determines whether the provided tokens are accepted for the
        given request.

        Args:
            request_id (str): The unique identifier for the request.
            tokens (list[int]): A list of token IDs to evaluate.

        Returns:
            bool: True if the tokens are accepted, False otherwise.
        """

    @abstractmethod
    def validate_tokens(self, tokens: list[int]) -> list[int]:
        """
        Validates the provided tokens against the grammar.
        Will not advance the FSM.

        Args:
            tokens (list[int]): A list of token IDs to validate.

        Returns:
            list[int]: A list of accepted token IDs. Will be a prefix
                of the input tokens, and empty if none are accepted.
        """

    @abstractmethod
    def rollback(self, num_tokens: int) -> None:
        """
        Rolls back the state of the grammar by a specified number of tokens.
        Will also revert counters for the number of processed tokens.

        Args:
            num_tokens (int): The number of tokens to roll back.
        """

    @abstractmethod
    def fill_bitmask(self, bitmask: torch.Tensor, batch_index: int) -> None:
        """
        Fills the bitmask for a specific batch index.

        Args:
            bitmask (torch.Tensor): The bitmask to fill
            batch_index (int): The index in the bitmask to fill
        """

    @abstractmethod
    def is_terminated(self) -> bool:
        """
        Checks whether the structured output process has terminated.

        Returns:
            bool: True if the process is terminated, False otherwise.
        """

    @abstractmethod
    def reset(self):
        """
        Resets the state of the structured output grammar.
        """

accept_tokens abstractmethod

accept_tokens(request_id: str, tokens: list[int]) -> bool

Determines whether the provided tokens are accepted for the given request.

Parameters:

Name Type Description Default
request_id str

The unique identifier for the request.

required
tokens list[int]

A list of token IDs to evaluate.

required

Returns:

Name Type Description
bool bool

True if the tokens are accepted, False otherwise.

Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def accept_tokens(self, request_id: str, tokens: list[int]) -> bool:
    """
    Determines whether the provided tokens are accepted for the
    given request.

    Args:
        request_id (str): The unique identifier for the request.
        tokens (list[int]): A list of token IDs to evaluate.

    Returns:
        bool: True if the tokens are accepted, False otherwise.
    """

fill_bitmask abstractmethod

fill_bitmask(bitmask: Tensor, batch_index: int) -> None

Fills the bitmask for a specific batch index.

Parameters:

Name Type Description Default
bitmask Tensor

The bitmask to fill

required
batch_index int

The index in the bitmask to fill

required
Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def fill_bitmask(self, bitmask: torch.Tensor, batch_index: int) -> None:
    """
    Fills the bitmask for a specific batch index.

    Args:
        bitmask (torch.Tensor): The bitmask to fill
        batch_index (int): The index in the bitmask to fill
    """

is_terminated abstractmethod

is_terminated() -> bool

Checks whether the structured output process has terminated.

Returns:

Name Type Description
bool bool

True if the process is terminated, False otherwise.

Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def is_terminated(self) -> bool:
    """
    Checks whether the structured output process has terminated.

    Returns:
        bool: True if the process is terminated, False otherwise.
    """

reset abstractmethod

reset()

Resets the state of the structured output grammar.

Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def reset(self):
    """
    Resets the state of the structured output grammar.
    """

rollback abstractmethod

rollback(num_tokens: int) -> None

Rolls back the state of the grammar by a specified number of tokens. Will also revert counters for the number of processed tokens.

Parameters:

Name Type Description Default
num_tokens int

The number of tokens to roll back.

required
Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def rollback(self, num_tokens: int) -> None:
    """
    Rolls back the state of the grammar by a specified number of tokens.
    Will also revert counters for the number of processed tokens.

    Args:
        num_tokens (int): The number of tokens to roll back.
    """

validate_tokens abstractmethod

validate_tokens(tokens: list[int]) -> list[int]

Validates the provided tokens against the grammar. Will not advance the FSM.

Parameters:

Name Type Description Default
tokens list[int]

A list of token IDs to validate.

required

Returns:

Type Description
list[int]

list[int]: A list of accepted token IDs. Will be a prefix of the input tokens, and empty if none are accepted.

Source code in vllm/v1/structured_output/backend_types.py
@abstractmethod
def validate_tokens(self, tokens: list[int]) -> list[int]:
    """
    Validates the provided tokens against the grammar.
    Will not advance the FSM.

    Args:
        tokens (list[int]): A list of token IDs to validate.

    Returns:
        list[int]: A list of accepted token IDs. Will be a prefix
            of the input tokens, and empty if none are accepted.
    """

StructuredOutputOptions

Bases: Enum

Source code in vllm/v1/structured_output/backend_types.py
class StructuredOutputOptions(enum.Enum):
    JSON = enum.auto()
    JSON_OBJECT = enum.auto()
    REGEX = enum.auto()
    GRAMMAR = enum.auto()
    CHOICE = enum.auto()
    STRUCTURAL_TAG = enum.auto()

CHOICE class-attribute instance-attribute

CHOICE = auto()

GRAMMAR class-attribute instance-attribute

GRAMMAR = auto()

JSON class-attribute instance-attribute

JSON = auto()

JSON_OBJECT class-attribute instance-attribute

JSON_OBJECT = auto()

REGEX class-attribute instance-attribute

REGEX = auto()

STRUCTURAL_TAG class-attribute instance-attribute

STRUCTURAL_TAG = auto()