Skip to content

vllm.model_executor.layers.quantization.utils

Modules:

Name	Description
`allspark_utils`
`bitblas_utils`
`fp8_utils`
`gptq_utils`
`int8_utils`
`layer_utils`
`machete_utils`
`marlin_utils`
`marlin_utils_fp4`
`marlin_utils_fp8`
`marlin_utils_test`	Utility functions used for tests and benchmarks
`marlin_utils_test_24`	Utility functions used for tests and benchmarks
`marlin_utils_test_qqq`
`mxfp4_utils`
`nvfp4_emulation_utils`
`quant_utils`	This file is used for /tests and /benchmarks
`w8a8_utils`

all `module-attribute` ¶

__all__ = ['update_tensor_inplace', 'replace_parameter']

replace_parameter ¶

replace_parameter(
    mod: Module, name: str, new: Union[Tensor, Parameter]
) -> None

Source code in vllm/model_executor/layers/quantization/utils/layer_utils.py

def replace_parameter(mod: torch.nn.Module, name: str,
                      new: Union[torch.Tensor, torch.nn.Parameter]) -> None:

    old = getattr(mod, name)
    if type(old) is type(new) and old.dtype == new.dtype and \
        old.untyped_storage().nbytes() == new.untyped_storage().nbytes():
        # If we can just update in-place to avoid re-registering
        #   can be faster if the underlying storage is the same
        update_tensor_inplace(old, new)
    else:
        # Fallback re-register parameter, convert to Parameter if necessary
        # this not only ensures we don't register a tensor as a parameter, but
        # also ensures that all parameter subclasses get re-registered as
        # parameters for `torch.compile` compatibility
        if not isinstance(new, torch.nn.Parameter):
            new = torch.nn.Parameter(new, requires_grad=False)
        mod.register_parameter(name,
                               torch.nn.Parameter(new, requires_grad=False))

update_tensor_inplace ¶

update_tensor_inplace(dst: Tensor, src: Tensor)

Source code in vllm/model_executor/layers/quantization/utils/layer_utils.py

def update_tensor_inplace(dst: torch.Tensor, src: torch.Tensor):
    assert dst.dtype == src.dtype, "Tensors must have the same dtype"

    # update tensor shape and stride
    dst.as_strided_(src.shape, src.stride())

    # If not the same underlying storage move tensor data
    if dst.data_ptr() != src.data_ptr():
        dst.copy_(src)
        del src