Skip to content

vllm.utils.serial_utils

EMBED_DTYPES module-attribute

EMBED_DTYPES: Mapping[EmbedDType, DTypeInfo] = {
    "float32": DTypeInfo(float32, float32, float32),
    "float16": DTypeInfo(float16, float16, float16),
    "bfloat16": DTypeInfo(bfloat16, float16, float16),
    "fp8_e4m3": DTypeInfo(float8_e4m3fn, uint8, uint8),
    "fp8_e5m2": DTypeInfo(float8_e5m2, uint8, uint8),
}

ENDIANNESS module-attribute

ENDIANNESS: tuple[Endianness, ...] = get_args(Endianness)

EmbedDType module-attribute

EmbedDType = Literal[
    "float32", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"
]

EncodingFormat module-attribute

EncodingFormat = Literal[
    "float", "base64", "bytes", "bytes_only"
]

Endianness module-attribute

Endianness = Literal['native', 'big', 'little']

sys_byteorder module-attribute

sys_byteorder = byteorder

DTypeInfo dataclass

Source code in vllm/utils/serial_utils.py
@dataclass(frozen=True)
class DTypeInfo:
    torch_dtype: torch.dtype

    torch_view_dtype: torch.dtype
    numpy_view_dtype: npt.DTypeLike

    @property
    def nbytes(self) -> int:
        return self.torch_dtype.itemsize

nbytes property

nbytes: int

numpy_view_dtype instance-attribute

numpy_view_dtype: DTypeLike

torch_dtype instance-attribute

torch_dtype: dtype

torch_view_dtype instance-attribute

torch_view_dtype: dtype

__init__

__init__(
    torch_dtype: dtype,
    torch_view_dtype: dtype,
    numpy_view_dtype: DTypeLike,
) -> None

binary2tensor

binary2tensor(
    binary: bytes,
    shape: tuple[int, ...],
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> Tensor
Source code in vllm/utils/serial_utils.py
def binary2tensor(
    binary: bytes,
    shape: tuple[int, ...],
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> torch.Tensor:
    assert embed_dtype in EMBED_DTYPES
    assert endianness in ENDIANNESS

    dtype_info = EMBED_DTYPES[embed_dtype]

    np_array = np.frombuffer(binary, dtype=dtype_info.numpy_view_dtype).reshape(shape)

    if endianness != "native" and endianness != sys_byteorder:
        np_array = np_array.byteswap()

    return torch.from_numpy(np_array).view(dtype_info.torch_dtype)

tensor2base64

tensor2base64(x: Tensor) -> str
Source code in vllm/utils/serial_utils.py
def tensor2base64(x: torch.Tensor) -> str:
    with io.BytesIO() as buf:
        torch.save(x, buf)
        buf.seek(0)
        binary_data = buf.read()

    return pybase64.b64encode(binary_data).decode("utf-8")

tensor2binary

tensor2binary(
    tensor: Tensor,
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> bytes
Source code in vllm/utils/serial_utils.py
def tensor2binary(
    tensor: torch.Tensor,
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> bytes:
    assert isinstance(tensor, torch.Tensor)
    assert embed_dtype in EMBED_DTYPES
    assert endianness in ENDIANNESS

    dtype_info = EMBED_DTYPES[embed_dtype]

    np_array = (
        tensor.to(dtype_info.torch_dtype)
        .flatten()
        .contiguous()
        .view(dtype_info.torch_view_dtype)
        .numpy()
    )

    if endianness != "native" and endianness != sys_byteorder:
        np_array = np_array.byteswap()

    return np_array.tobytes()