vllm.model_executor.models.aimv2
AIMv2Attention
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
proj
instance-attribute
¶
proj = RowParallelLinear(
input_size=embed_dim,
output_size=embed_dim,
bias=use_bias,
quant_config=quant_config,
prefix=f"{prefix}.proj",
)
qkv
instance-attribute
¶
qkv = QKVParallelLinear(
hidden_size=embed_dim,
head_size=head_dim,
total_num_heads=num_heads,
bias=qkv_bias,
quant_config=quant_config,
prefix=f"{prefix}.qkv",
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
AIMv2Block
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
attn
instance-attribute
¶
attn = AIMv2Attention(
config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
)
mlp
instance-attribute
¶
mlp = AIMv2SwiGLUFFN(
config,
quant_config=quant_config,
prefix=f"{prefix}.mlp",
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
AIMv2Model
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
trunk
instance-attribute
¶
trunk = AIMv2Transformer(
config,
quant_config=quant_config,
require_post_norm=require_post_norm,
prefix=f"{prefix}.trunk",
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
*,
require_post_norm: Optional[bool] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/aimv2.py
forward
¶
load_weights
¶
Source code in vllm/model_executor/models/aimv2.py
AIMv2PatchEmbed
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
proj
instance-attribute
¶
proj = Conv2d(
num_channels,
hidden_size,
kernel_size=(patch_size, patch_size),
stride=(patch_size, patch_size),
)
AIMv2SwiGLUFFN
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
fc13
instance-attribute
¶
fc13 = MergedColumnParallelLinear(
in_features,
[hidden_features] * 2,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.fc13",
)
fc2
instance-attribute
¶
fc2 = RowParallelLinear(
input_size=hidden_features,
output_size=in_features,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.fc2",
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
AIMv2Transformer
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
blocks
instance-attribute
¶
blocks = ModuleList(
[
AIMv2Block(
config,
quant_config,
prefix=f"{prefix}.blocks.{i}",
)
for i in range(num_hidden_layers)
]
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
*,
require_post_norm: Optional[bool] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/aimv2.py
forward
¶
Source code in vllm/model_executor/models/aimv2.py
AIMv2ViTPreprocessor
¶
Bases: Module