Skip to content

vllm.model_executor.pooling_metadata

PoolingMetadata

Metadata for pooling operations in the Pooler layer.

This class holds the necessary information for pooling operations, providing context for how to perform pooling and other related operations.

Attributes:

Name Type Description
seq_groups

List of (seq_ids, pooling_params).

seq_data

A mapping of sequence ID to additional sequence data.

prompt_lens

List of the lengths of each prompt.

Source code in vllm/model_executor/pooling_metadata.py
class PoolingMetadata:
    """Metadata for pooling operations in the Pooler layer.

    This class holds the necessary information for pooling operations,
    providing context for how to perform pooling and other related operations.

    Attributes:
        seq_groups: List of (seq_ids, pooling_params).
        seq_data: A mapping of sequence ID to additional sequence data.
        prompt_lens: List of the lengths of each prompt.
    """

    def __init__(
        self,
        seq_groups: list[tuple[list[int], PoolingParams]],
        seq_data: dict[int, Any],  # Specific data related to sequences
        prompt_lens: list[int],
    ) -> None:
        self.seq_groups = seq_groups
        self.seq_data = seq_data
        self.prompt_lens = prompt_lens

    def __repr__(self) -> str:
        return ("PoolingMetadata("
                f"seq_groups={self.seq_groups}, "
                f"seq_data={self.seq_data}, "
                f"prompt_lens={self.prompt_lens})")

prompt_lens instance-attribute

prompt_lens = prompt_lens

seq_data instance-attribute

seq_data = seq_data

seq_groups instance-attribute

seq_groups = seq_groups

__init__

__init__(
    seq_groups: list[tuple[list[int], PoolingParams]],
    seq_data: dict[int, Any],
    prompt_lens: list[int],
) -> None
Source code in vllm/model_executor/pooling_metadata.py
def __init__(
    self,
    seq_groups: list[tuple[list[int], PoolingParams]],
    seq_data: dict[int, Any],  # Specific data related to sequences
    prompt_lens: list[int],
) -> None:
    self.seq_groups = seq_groups
    self.seq_data = seq_data
    self.prompt_lens = prompt_lens

__repr__

__repr__() -> str
Source code in vllm/model_executor/pooling_metadata.py
def __repr__(self) -> str:
    return ("PoolingMetadata("
            f"seq_groups={self.seq_groups}, "
            f"seq_data={self.seq_data}, "
            f"prompt_lens={self.prompt_lens})")

PoolingTensors dataclass

Tensors for pooling.

Source code in vllm/model_executor/pooling_metadata.py
@dataclass
class PoolingTensors:
    """Tensors for pooling."""

    prompt_lens: torch.Tensor

    @classmethod
    def from_pooling_metadata(
        cls,
        pooling_metadata: "PoolingMetadata",
        device: torch.device,
    ) -> "PoolingTensors":
        """
        Create PoolingTensors from PoolingMetadata.

        Args:
            pooling_metadata: PoolingMetadata instance to convert.
            device: Device to store the tensors.
        """
        # Convert prompt lengths to tensor
        pin_memory = is_pin_memory_available()

        prompt_lens_t = torch.tensor(
            pooling_metadata.prompt_lens,
            device="cpu",
            dtype=torch.long,
            pin_memory=pin_memory,
        )

        return cls(prompt_lens=prompt_lens_t.to(device=device,
                                                non_blocking=True), )

prompt_lens instance-attribute

prompt_lens: Tensor

__init__

__init__(prompt_lens: Tensor) -> None

from_pooling_metadata classmethod

from_pooling_metadata(
    pooling_metadata: PoolingMetadata, device: device
) -> PoolingTensors

Create PoolingTensors from PoolingMetadata.

Parameters:

Name Type Description Default
pooling_metadata PoolingMetadata

PoolingMetadata instance to convert.

required
device device

Device to store the tensors.

required
Source code in vllm/model_executor/pooling_metadata.py
@classmethod
def from_pooling_metadata(
    cls,
    pooling_metadata: "PoolingMetadata",
    device: torch.device,
) -> "PoolingTensors":
    """
    Create PoolingTensors from PoolingMetadata.

    Args:
        pooling_metadata: PoolingMetadata instance to convert.
        device: Device to store the tensors.
    """
    # Convert prompt lengths to tensor
    pin_memory = is_pin_memory_available()

    prompt_lens_t = torch.tensor(
        pooling_metadata.prompt_lens,
        device="cpu",
        dtype=torch.long,
        pin_memory=pin_memory,
    )

    return cls(prompt_lens=prompt_lens_t.to(device=device,
                                            non_blocking=True), )