Skip to content

vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel

MPLinearKernel

Bases: ABC

Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
class MPLinearKernel(ABC):

    @classmethod
    @abstractmethod
    def get_min_capability(cls) -> int:
        raise NotImplementedError

    @classmethod
    @abstractmethod
    def can_implement(cls,
                      c: MPLinearLayerConfig) -> tuple[bool, Optional[str]]:
        raise NotImplementedError

    def __init__(self,
                 c: MPLinearLayerConfig,
                 w_q_param_name: str,
                 w_s_param_name: str,
                 w_zp_param_name: Optional[str] = None,
                 w_gidx_param_name: Optional[str] = None) -> None:
        assert self.can_implement(c)
        self.config = c
        self.w_q_name = w_q_param_name
        self.w_s_name = w_s_param_name
        if c.zero_points:
            assert w_zp_param_name is not None
        if c.has_g_idx:
            assert w_gidx_param_name is not None
        self.w_zp_name = w_zp_param_name
        self.w_gidx_name = w_gidx_param_name

    @abstractmethod
    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
        raise NotImplementedError

    @abstractmethod
    def apply_weights(self,
                      layer: torch.nn.Module,
                      x: torch.Tensor,
                      bias: Optional[torch.Tensor] = None) -> torch.Tensor:
        raise NotImplementedError

    def _transform_param(self, layer: torch.nn.Module, name: Optional[str],
                         fn: Callable) -> None:
        if name is not None and getattr(layer, name, None) is not None:

            old_param = getattr(layer, name)
            new_param = fn(old_param)
            # replace the parameter with torch.nn.Parameter for TorchDynamo
            # compatibility
            replace_parameter(
                layer, name,
                torch.nn.Parameter(new_param.data, requires_grad=False))

    def _get_weight_params(
            self, layer: torch.nn.Module) -> tuple[
                torch.Tensor,  # w_q
                torch.Tensor,  # w_s
                Optional[torch.Tensor],  # w_zp, 
                Optional[torch.Tensor]  # w_gidx
            ]:
        return (
            getattr(layer, self.w_q_name),
            getattr(layer, self.w_s_name),
            getattr(layer, self.w_zp_name or "", None),
            getattr(layer, self.w_gidx_name or "", None),
        )

config instance-attribute

config = c

w_gidx_name instance-attribute

w_gidx_name = w_gidx_param_name

w_q_name instance-attribute

w_q_name = w_q_param_name

w_s_name instance-attribute

w_s_name = w_s_param_name

w_zp_name instance-attribute

w_zp_name = w_zp_param_name

__init__

__init__(
    c: MPLinearLayerConfig,
    w_q_param_name: str,
    w_s_param_name: str,
    w_zp_param_name: Optional[str] = None,
    w_gidx_param_name: Optional[str] = None,
) -> None
Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
def __init__(self,
             c: MPLinearLayerConfig,
             w_q_param_name: str,
             w_s_param_name: str,
             w_zp_param_name: Optional[str] = None,
             w_gidx_param_name: Optional[str] = None) -> None:
    assert self.can_implement(c)
    self.config = c
    self.w_q_name = w_q_param_name
    self.w_s_name = w_s_param_name
    if c.zero_points:
        assert w_zp_param_name is not None
    if c.has_g_idx:
        assert w_gidx_param_name is not None
    self.w_zp_name = w_zp_param_name
    self.w_gidx_name = w_gidx_param_name

_get_weight_params

_get_weight_params(
    layer: Module,
) -> tuple[
    Tensor, Tensor, Optional[Tensor], Optional[Tensor]
]
Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
def _get_weight_params(
        self, layer: torch.nn.Module) -> tuple[
            torch.Tensor,  # w_q
            torch.Tensor,  # w_s
            Optional[torch.Tensor],  # w_zp, 
            Optional[torch.Tensor]  # w_gidx
        ]:
    return (
        getattr(layer, self.w_q_name),
        getattr(layer, self.w_s_name),
        getattr(layer, self.w_zp_name or "", None),
        getattr(layer, self.w_gidx_name or "", None),
    )

_transform_param

_transform_param(
    layer: Module, name: Optional[str], fn: Callable
) -> None
Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
def _transform_param(self, layer: torch.nn.Module, name: Optional[str],
                     fn: Callable) -> None:
    if name is not None and getattr(layer, name, None) is not None:

        old_param = getattr(layer, name)
        new_param = fn(old_param)
        # replace the parameter with torch.nn.Parameter for TorchDynamo
        # compatibility
        replace_parameter(
            layer, name,
            torch.nn.Parameter(new_param.data, requires_grad=False))

apply_weights abstractmethod

apply_weights(
    layer: Module, x: Tensor, bias: Optional[Tensor] = None
) -> Tensor
Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
@abstractmethod
def apply_weights(self,
                  layer: torch.nn.Module,
                  x: torch.Tensor,
                  bias: Optional[torch.Tensor] = None) -> torch.Tensor:
    raise NotImplementedError

can_implement abstractmethod classmethod

can_implement(
    c: MPLinearLayerConfig,
) -> tuple[bool, Optional[str]]
Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
@classmethod
@abstractmethod
def can_implement(cls,
                  c: MPLinearLayerConfig) -> tuple[bool, Optional[str]]:
    raise NotImplementedError

get_min_capability abstractmethod classmethod

get_min_capability() -> int
Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
@classmethod
@abstractmethod
def get_min_capability(cls) -> int:
    raise NotImplementedError

process_weights_after_loading abstractmethod

process_weights_after_loading(layer: Module) -> None
Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
@abstractmethod
def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
    raise NotImplementedError

MPLinearLayerConfig dataclass

Source code in vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
@dataclass
class MPLinearLayerConfig:
    full_weight_shape: tuple[int, int]  # [in, out]
    partition_weight_shape: tuple[int, int]
    weight_type: ScalarType
    act_type: torch.dtype
    group_size: int
    zero_points: bool
    has_g_idx: bool

act_type instance-attribute

act_type: dtype

full_weight_shape instance-attribute

full_weight_shape: tuple[int, int]

group_size instance-attribute

group_size: int

has_g_idx instance-attribute

has_g_idx: bool

partition_weight_shape instance-attribute

partition_weight_shape: tuple[int, int]

weight_type instance-attribute

weight_type: ScalarType

zero_points instance-attribute

zero_points: bool

__init__

__init__(
    full_weight_shape: tuple[int, int],
    partition_weight_shape: tuple[int, int],
    weight_type: ScalarType,
    act_type: dtype,
    group_size: int,
    zero_points: bool,
    has_g_idx: bool,
) -> None