vllm.model_executor.models.bert
BertAttention
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
output
instance-attribute
¶
output = BertSelfOutput(
hidden_size=hidden_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
self
instance-attribute
¶
self = BertSelfAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__
¶
__init__(
hidden_size: int,
num_attention_heads: int,
layer_norm_eps: float,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertEmbedding
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
position_embeddings
instance-attribute
¶
position_embeddings = VocabParallelEmbedding(
max_position_embeddings, hidden_size
)
token_type_embeddings
instance-attribute
¶
token_type_embeddings = VocabParallelEmbedding(
type_vocab_size, hidden_size
)
word_embeddings
instance-attribute
¶
word_embeddings = VocabParallelEmbedding(
vocab_size, hidden_size
)
__init__
¶
Source code in vllm/model_executor/models/bert.py
forward
¶
forward(
input_ids: Tensor,
seq_lens: Tensor,
position_ids: Tensor,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
BertEmbeddingModel
¶
Bases: Module
, SupportsV0Only
, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
Name | Type | Description |
---|---|---|
model |
An instance of BertModel used for forward operations. |
|
_pooler |
An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
hf_to_vllm_mapper
class-attribute
instance-attribute
¶
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={"model.": ""}
)
model
instance-attribute
¶
model = _build_model(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "model"),
)
__init__
¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
_build_model
¶
_build_model(
vllm_config: VllmConfig, prefix: str = ""
) -> BertModel
_build_pooler
¶
_build_pooler(pooler_config: PoolerConfig) -> Pooler
forward
¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
load_weights
¶
Source code in vllm/model_executor/models/bert.py
pooler
¶
pooler(
hidden_states: Tensor, pooling_metadata: PoolingMetadata
) -> Optional[PoolerOutput]
BertEncoder
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
layer
instance-attribute
¶
layer = ModuleList(
[
BertLayer(
config=config,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.layer.{layer_idx}",
)
for layer_idx in range(num_hidden_layers)
]
)
__init__
¶
__init__(vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
BertForSequenceClassification
¶
Bases: Module
, SupportsV0Only
, SupportsCrossEncoding
, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
Name | Type | Description |
---|---|---|
model |
An instance of BertModel used for forward operations. |
|
_pooler |
An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
bert
instance-attribute
¶
bert = BertModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "bert"),
embedding_class=BertEmbedding,
add_pooling_layer=True,
)
default_activation_function
instance-attribute
¶
default_activation_function = (
get_cross_encoder_activation_function(config)
)
__init__
¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
forward
¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
load_weights
¶
Source code in vllm/model_executor/models/bert.py
pooler
¶
pooler(
hidden_states: Tensor, pooling_metadata: PoolingMetadata
) -> Optional[PoolerOutput]
BertIntermediate
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense
instance-attribute
¶
dense = ColumnParallelLinear(
input_size=hidden_size,
output_size=intermediate_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__
¶
__init__(
hidden_size: int,
intermediate_size: int,
hidden_act: str,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward
¶
BertLayer
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attention
instance-attribute
¶
attention = BertAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
layer_norm_eps=layer_norm_eps,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attention",
)
intermediate
instance-attribute
¶
intermediate = BertIntermediate(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
hidden_act=hidden_act,
quant_config=quant_config,
prefix=f"{prefix}.intermediate",
)
output
instance-attribute
¶
output = BertOutput(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__
¶
__init__(
config: BertConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertModel
¶
Bases: Module
, SupportsQuant
Source code in vllm/model_executor/models/bert.py
encoder
instance-attribute
¶
encoder = BertEncoder(
vllm_config=vllm_config, prefix=f"{prefix}.encoder"
)
packed_modules_mapping
class-attribute
instance-attribute
¶
__init__
¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type = BertEmbedding,
add_pooling_layer: bool = False,
)
Source code in vllm/model_executor/models/bert.py
forward
¶
forward(
input_ids: Tensor,
position_ids: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
load_weights
¶
Source code in vllm/model_executor/models/bert.py
BertOutput
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense
instance-attribute
¶
dense = RowParallelLinear(
input_size=intermediate_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__
¶
__init__(
hidden_size: int,
intermediate_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward
¶
BertPooler
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
__init__
¶
forward
¶
Source code in vllm/model_executor/models/bert.py
BertSelfAttention
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attn
instance-attribute
¶
attn = Attention(
num_heads=num_heads,
head_size=head_dim,
scale=scaling,
num_kv_heads=num_kv_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
attn_type=ENCODER_ONLY,
)
qkv_proj
instance-attribute
¶
qkv_proj = QKVParallelLinear(
hidden_size=hidden_size,
head_size=head_dim,
total_num_heads=total_num_heads,
total_num_kv_heads=total_num_kv_heads,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.qkv_proj",
)
__init__
¶
__init__(
hidden_size: int,
num_attention_heads: int,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward
¶
BertSelfOutput
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense
instance-attribute
¶
dense = RowParallelLinear(
input_size=hidden_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__
¶
__init__(
hidden_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)