vllm.utils.serial_utils ¶

EMBED_DTYPES `module-attribute` ¶

EMBED_DTYPES: Mapping[EmbedDType, DTypeInfo] = {
    "float32": DTypeInfo(float32, float32, float32),
    "float16": DTypeInfo(float16, float16, float16),
    "bfloat16": DTypeInfo(bfloat16, float16, float16),
    "fp8_e4m3": DTypeInfo(float8_e4m3fn, uint8, uint8),
    "fp8_e5m2": DTypeInfo(float8_e5m2, uint8, uint8),
}

ENDIANNESS `module-attribute` ¶

ENDIANNESS: tuple[Endianness, ...] = get_args(Endianness)

EmbedDType `module-attribute` ¶

EmbedDType = Literal[
    "float32", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"
]

EncodingFormat `module-attribute` ¶

EncodingFormat = Literal[
    "float", "base64", "bytes", "bytes_only"
]

Endianness `module-attribute` ¶

Endianness = Literal['native', 'big', 'little']

sys_byteorder `module-attribute` ¶

sys_byteorder = byteorder

DTypeInfo `dataclass` ¶

Source code in vllm/utils/serial_utils.py

@dataclass(frozen=True)
class DTypeInfo:
    torch_dtype: torch.dtype

    torch_view_dtype: torch.dtype
    numpy_view_dtype: npt.DTypeLike

    @property
    def nbytes(self) -> int:
        return self.torch_dtype.itemsize

nbytes `property` ¶

nbytes: int

numpy_view_dtype `instance-attribute` ¶

numpy_view_dtype: DTypeLike

torch_dtype `instance-attribute` ¶

torch_dtype: dtype

torch_view_dtype `instance-attribute` ¶

torch_view_dtype: dtype

init ¶

__init__(
    torch_dtype: dtype,
    torch_view_dtype: dtype,
    numpy_view_dtype: DTypeLike,
) -> None

binary2tensor ¶

binary2tensor(
    binary: bytes,
    shape: tuple[int, ...],
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> Tensor

Source code in vllm/utils/serial_utils.py

def binary2tensor(
    binary: bytes,
    shape: tuple[int, ...],
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> torch.Tensor:
    assert embed_dtype in EMBED_DTYPES
    assert endianness in ENDIANNESS

    dtype_info = EMBED_DTYPES[embed_dtype]

    np_array = np.frombuffer(binary, dtype=dtype_info.numpy_view_dtype).reshape(shape)

    if endianness != "native" and endianness != sys_byteorder:
        np_array = np_array.byteswap()

    return torch.from_numpy(np_array).view(dtype_info.torch_dtype)

tensor2base64 ¶

tensor2base64(x: Tensor) -> str

Source code in vllm/utils/serial_utils.py

def tensor2base64(x: torch.Tensor) -> str:
    with io.BytesIO() as buf:
        torch.save(x, buf)
        buf.seek(0)
        binary_data = buf.read()

    return pybase64.b64encode(binary_data).decode("utf-8")

tensor2binary ¶

tensor2binary(
    tensor: Tensor,
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> bytes

Source code in vllm/utils/serial_utils.py

def tensor2binary(
    tensor: torch.Tensor,
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> bytes:
    assert isinstance(tensor, torch.Tensor)
    assert embed_dtype in EMBED_DTYPES
    assert endianness in ENDIANNESS

    dtype_info = EMBED_DTYPES[embed_dtype]

    np_array = (
        tensor.to(dtype_info.torch_dtype)
        .flatten()
        .contiguous()
        .view(dtype_info.torch_view_dtype)
        .numpy()
    )

    if endianness != "native" and endianness != sys_byteorder:
        np_array = np_array.byteswap()

    return np_array.tobytes()

vllm.utils.serial_utils ¶

EMBED_DTYPES module-attribute ¶

ENDIANNESS module-attribute ¶

EmbedDType module-attribute ¶

EncodingFormat module-attribute ¶

Endianness module-attribute ¶

sys_byteorder module-attribute ¶

DTypeInfo dataclass ¶

nbytes property ¶

numpy_view_dtype instance-attribute ¶

torch_dtype instance-attribute ¶

torch_view_dtype instance-attribute ¶

__init__ ¶

binary2tensor ¶

tensor2base64 ¶

tensor2binary ¶

EMBED_DTYPES `module-attribute` ¶

ENDIANNESS `module-attribute` ¶

EmbedDType `module-attribute` ¶

EncodingFormat `module-attribute` ¶

Endianness `module-attribute` ¶

sys_byteorder `module-attribute` ¶

DTypeInfo `dataclass` ¶

nbytes `property` ¶

numpy_view_dtype `instance-attribute` ¶

torch_dtype `instance-attribute` ¶

torch_view_dtype `instance-attribute` ¶

init ¶