Skip to content

vllm.model_executor.layers.quantization.utils

Modules:

Name Description
allspark_utils
bitblas_utils
fp8_utils
gptq_utils
int8_utils
layer_utils
machete_utils
marlin_utils
marlin_utils_fp4
marlin_utils_fp8
marlin_utils_test

Utility functions used for tests and benchmarks

marlin_utils_test_24

Utility functions used for tests and benchmarks

marlin_utils_test_qqq
mxfp4_utils
nvfp4_emulation_utils
quant_utils

This file is used for /tests and /benchmarks

w8a8_utils

__all__ module-attribute

__all__ = ['update_tensor_inplace', 'replace_parameter']

replace_parameter

replace_parameter(
    mod: Module, name: str, new: Union[Tensor, Parameter]
) -> None
Source code in vllm/model_executor/layers/quantization/utils/layer_utils.py
def replace_parameter(mod: torch.nn.Module, name: str,
                      new: Union[torch.Tensor, torch.nn.Parameter]) -> None:

    old = getattr(mod, name)
    if type(old) is type(new) and old.dtype == new.dtype and \
        old.untyped_storage().nbytes() == new.untyped_storage().nbytes():
        # If we can just update in-place to avoid re-registering
        #   can be faster if the underlying storage is the same
        update_tensor_inplace(old, new)
    else:
        # Fallback re-register parameter, convert to Parameter if necessary
        # this not only ensures we don't register a tensor as a parameter, but
        # also ensures that all parameter subclasses get re-registered as
        # parameters for `torch.compile` compatibility
        if not isinstance(new, torch.nn.Parameter):
            new = torch.nn.Parameter(new, requires_grad=False)
        mod.register_parameter(name,
                               torch.nn.Parameter(new, requires_grad=False))

update_tensor_inplace

update_tensor_inplace(dst: Tensor, src: Tensor)
Source code in vllm/model_executor/layers/quantization/utils/layer_utils.py
def update_tensor_inplace(dst: torch.Tensor, src: torch.Tensor):
    assert dst.dtype == src.dtype, "Tensors must have the same dtype"

    # update tensor shape and stride
    dst.as_strided_(src.shape, src.stride())

    # If not the same underlying storage move tensor data
    if dst.data_ptr() != src.data_ptr():
        dst.copy_(src)
        del src