vllm.v1.structured_output.backend_types

StructuredOutputKey `module-attribute` ¶

StructuredOutputKey = tuple[StructuredOutputOptions, str]

StructuredOutputBackend `dataclass` ¶

Bases: ABC

Engine-level backend for structured output requests.

Source code in vllm/v1/structured_output/backend_types.py

@dataclass
class StructuredOutputBackend(ABC):
    """Engine-level backend for structured output requests."""

    vllm_config: VllmConfig
    tokenizer: AnyTokenizer
    vocab_size: int

    @abstractmethod
    def compile_grammar(self, request_type: StructuredOutputOptions,
                        grammar_spec: str) -> StructuredOutputGrammar:
        """
        Compiles a grammar specification into a structured output grammar.

        Args:
            request_type (StructuredOutputOptions): The type of structured
              output request.
            grammar_spec (str): The grammar specification to compile.

        Returns:
            StructuredOutputGrammar: The compiled structured output grammar.
        """

    @abstractmethod
    def allocate_token_bitmask(self, max_num_seqs: int) -> torch.Tensor:
        """
        Allocates a token bitmask for the specified maximum number of sequences.

        Args:
            max_num_seqs (int): The maximum number of sequences for which
              to allocate the bitmask.
        """

    @abstractmethod
    def destroy(self):
        """
        Backend-specific cleanup.
        """

tokenizer `instance-attribute` ¶

tokenizer: AnyTokenizer

vllm_config `instance-attribute` ¶

vllm_config: VllmConfig

vocab_size `instance-attribute` ¶

vocab_size: int

init ¶

__init__(
    vllm_config: VllmConfig,
    tokenizer: AnyTokenizer,
    vocab_size: int,
) -> None

allocate_token_bitmask `abstractmethod` ¶

allocate_token_bitmask(max_num_seqs: int) -> Tensor

Allocates a token bitmask for the specified maximum number of sequences.

Parameters:

Name	Type	Description	Default
`max_num_seqs`	`int`	The maximum number of sequences for which to allocate the bitmask.	required

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def allocate_token_bitmask(self, max_num_seqs: int) -> torch.Tensor:
    """
    Allocates a token bitmask for the specified maximum number of sequences.

    Args:
        max_num_seqs (int): The maximum number of sequences for which
          to allocate the bitmask.
    """

compile_grammar `abstractmethod` ¶

compile_grammar(
    request_type: StructuredOutputOptions, grammar_spec: str
) -> StructuredOutputGrammar

Compiles a grammar specification into a structured output grammar.

Parameters:

Name	Type	Description	Default
`request_type`	`StructuredOutputOptions`	The type of structured output request.	required
`grammar_spec`	`str`	The grammar specification to compile.	required

Returns:

Name	Type	Description
`StructuredOutputGrammar`	`StructuredOutputGrammar`	The compiled structured output grammar.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def compile_grammar(self, request_type: StructuredOutputOptions,
                    grammar_spec: str) -> StructuredOutputGrammar:
    """
    Compiles a grammar specification into a structured output grammar.

    Args:
        request_type (StructuredOutputOptions): The type of structured
          output request.
        grammar_spec (str): The grammar specification to compile.

    Returns:
        StructuredOutputGrammar: The compiled structured output grammar.
    """

destroy `abstractmethod` ¶

destroy()

Backend-specific cleanup.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def destroy(self):
    """
    Backend-specific cleanup.
    """

StructuredOutputGrammar ¶

Bases: ABC

Request-level backend for structured output requests.

Source code in vllm/v1/structured_output/backend_types.py

class StructuredOutputGrammar(ABC):
    """Request-level backend for structured output requests."""

    @abstractmethod
    def accept_tokens(self, request_id: str, tokens: list[int]) -> bool:
        """
        Determines whether the provided tokens are accepted for the
        given request.

        Args:
            request_id (str): The unique identifier for the request.
            tokens (list[int]): A list of token IDs to evaluate.

        Returns:
            bool: True if the tokens are accepted, False otherwise.
        """

    @abstractmethod
    def validate_tokens(self, tokens: list[int]) -> list[int]:
        """
        Validates the provided tokens against the grammar.
        Will not advance the FSM.

        Args:
            tokens (list[int]): A list of token IDs to validate.

        Returns:
            list[int]: A list of accepted token IDs. Will be a prefix
                of the input tokens, and empty if none are accepted.
        """

    @abstractmethod
    def rollback(self, num_tokens: int) -> None:
        """
        Rolls back the state of the grammar by a specified number of tokens.
        Will also revert counters for the number of processed tokens.

        Args:
            num_tokens (int): The number of tokens to roll back.
        """

    @abstractmethod
    def fill_bitmask(self, bitmask: torch.Tensor, batch_index: int) -> None:
        """
        Fills the bitmask for a specific batch index.

        Args:
            bitmask (torch.Tensor): The bitmask to fill
            batch_index (int): The index in the bitmask to fill
        """

    @abstractmethod
    def is_terminated(self) -> bool:
        """
        Checks whether the structured output process has terminated.

        Returns:
            bool: True if the process is terminated, False otherwise.
        """

    @abstractmethod
    def reset(self):
        """
        Resets the state of the structured output grammar.
        """

accept_tokens `abstractmethod` ¶

accept_tokens(request_id: str, tokens: list[int]) -> bool

Determines whether the provided tokens are accepted for the given request.

Parameters:

Name	Type	Description	Default
`request_id`	`str`	The unique identifier for the request.	required
`tokens`	`list[int]`	A list of token IDs to evaluate.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the tokens are accepted, False otherwise.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def accept_tokens(self, request_id: str, tokens: list[int]) -> bool:
    """
    Determines whether the provided tokens are accepted for the
    given request.

    Args:
        request_id (str): The unique identifier for the request.
        tokens (list[int]): A list of token IDs to evaluate.

    Returns:
        bool: True if the tokens are accepted, False otherwise.
    """

fill_bitmask `abstractmethod` ¶

fill_bitmask(bitmask: Tensor, batch_index: int) -> None

Fills the bitmask for a specific batch index.

Parameters:

Name	Type	Description	Default
`bitmask`	`Tensor`	The bitmask to fill	required
`batch_index`	`int`	The index in the bitmask to fill	required

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def fill_bitmask(self, bitmask: torch.Tensor, batch_index: int) -> None:
    """
    Fills the bitmask for a specific batch index.

    Args:
        bitmask (torch.Tensor): The bitmask to fill
        batch_index (int): The index in the bitmask to fill
    """

is_terminated `abstractmethod` ¶

is_terminated() -> bool

Checks whether the structured output process has terminated.

Returns:

Name	Type	Description
`bool`	`bool`	True if the process is terminated, False otherwise.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def is_terminated(self) -> bool:
    """
    Checks whether the structured output process has terminated.

    Returns:
        bool: True if the process is terminated, False otherwise.
    """

reset `abstractmethod` ¶

reset()

Resets the state of the structured output grammar.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def reset(self):
    """
    Resets the state of the structured output grammar.
    """

rollback `abstractmethod` ¶

rollback(num_tokens: int) -> None

Rolls back the state of the grammar by a specified number of tokens. Will also revert counters for the number of processed tokens.

Parameters:

Name	Type	Description	Default
`num_tokens`	`int`	The number of tokens to roll back.	required

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def rollback(self, num_tokens: int) -> None:
    """
    Rolls back the state of the grammar by a specified number of tokens.
    Will also revert counters for the number of processed tokens.

    Args:
        num_tokens (int): The number of tokens to roll back.
    """

validate_tokens `abstractmethod` ¶

validate_tokens(tokens: list[int]) -> list[int]

Validates the provided tokens against the grammar. Will not advance the FSM.

Parameters:

Name	Type	Description	Default
`tokens`	`list[int]`	A list of token IDs to validate.	required

Returns:

Type	Description
`list[int]`	list[int]: A list of accepted token IDs. Will be a prefix of the input tokens, and empty if none are accepted.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def validate_tokens(self, tokens: list[int]) -> list[int]:
    """
    Validates the provided tokens against the grammar.
    Will not advance the FSM.

    Args:
        tokens (list[int]): A list of token IDs to validate.

    Returns:
        list[int]: A list of accepted token IDs. Will be a prefix
            of the input tokens, and empty if none are accepted.
    """

StructuredOutputOptions ¶

Bases: Enum

Source code in vllm/v1/structured_output/backend_types.py

class StructuredOutputOptions(enum.Enum):
    JSON = enum.auto()
    JSON_OBJECT = enum.auto()
    REGEX = enum.auto()
    GRAMMAR = enum.auto()
    CHOICE = enum.auto()
    STRUCTURAL_TAG = enum.auto()

CHOICE `class-attribute` `instance-attribute` ¶

CHOICE = auto()

GRAMMAR `class-attribute` `instance-attribute` ¶

GRAMMAR = auto()

JSON `class-attribute` `instance-attribute` ¶

JSON = auto()

JSON_OBJECT `class-attribute` `instance-attribute` ¶

JSON_OBJECT = auto()

REGEX `class-attribute` `instance-attribute` ¶

REGEX = auto()

STRUCTURAL_TAG `class-attribute` `instance-attribute` ¶

STRUCTURAL_TAG = auto()

vllm.v1.structured_output.backend_types

StructuredOutputKey module-attribute ¶

StructuredOutputBackend dataclass ¶

tokenizer instance-attribute ¶

vllm_config instance-attribute ¶

vocab_size instance-attribute ¶

__init__ ¶

allocate_token_bitmask abstractmethod ¶

compile_grammar abstractmethod ¶

destroy abstractmethod ¶

StructuredOutputGrammar ¶

accept_tokens abstractmethod ¶

fill_bitmask abstractmethod ¶

is_terminated abstractmethod ¶

reset abstractmethod ¶

rollback abstractmethod ¶

validate_tokens abstractmethod ¶

StructuredOutputOptions ¶

CHOICE class-attribute instance-attribute ¶

GRAMMAR class-attribute instance-attribute ¶

JSON class-attribute instance-attribute ¶

JSON_OBJECT class-attribute instance-attribute ¶

REGEX class-attribute instance-attribute ¶

STRUCTURAL_TAG class-attribute instance-attribute ¶

StructuredOutputKey `module-attribute` ¶

StructuredOutputBackend `dataclass` ¶

tokenizer `instance-attribute` ¶

vllm_config `instance-attribute` ¶

vocab_size `instance-attribute` ¶

init ¶

allocate_token_bitmask `abstractmethod` ¶

compile_grammar `abstractmethod` ¶

destroy `abstractmethod` ¶

accept_tokens `abstractmethod` ¶

fill_bitmask `abstractmethod` ¶

is_terminated `abstractmethod` ¶

reset `abstractmethod` ¶

rollback `abstractmethod` ¶

validate_tokens `abstractmethod` ¶

CHOICE `class-attribute` `instance-attribute` ¶

GRAMMAR `class-attribute` `instance-attribute` ¶

JSON `class-attribute` `instance-attribute` ¶

JSON_OBJECT `class-attribute` `instance-attribute` ¶

REGEX `class-attribute` `instance-attribute` ¶

STRUCTURAL_TAG `class-attribute` `instance-attribute` ¶