vllm.model_executor.models.bert_with_rope
BertWithRope
¶
Bases: Module
, SupportsV0Only
, SupportsQuant
Source code in vllm/model_executor/models/bert_with_rope.py
encoder
instance-attribute
¶
encoder = BertWithRopeEncoder(
vllm_config=vllm_config,
bias=getattr(config, "bias", True),
rotary_kwargs=rotary_kwargs,
prefix=f"{prefix}.encoder",
)
hf_to_vllm_mapper
class-attribute
instance-attribute
¶
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={"model.": ""}
)
__init__
¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert_with_rope.py
load_weights
¶
Source code in vllm/model_executor/models/bert_with_rope.py
BertWithRopeAttention
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
attn
instance-attribute
¶
attn = Attention(
num_heads=num_heads,
head_size=head_dim,
scale=scaling,
num_kv_heads=num_kv_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
attn_type=ENCODER_ONLY,
)
out_proj
instance-attribute
¶
out_proj = RowParallelLinear(
input_size=hidden_size,
output_size=hidden_size,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
qkv_proj
instance-attribute
¶
qkv_proj = QKVParallelLinear(
hidden_size=hidden_size,
head_size=head_dim,
total_num_heads=total_num_heads,
total_num_kv_heads=total_num_kv_heads,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.qkv_proj",
)
__init__
¶
__init__(
hidden_size: int,
num_attention_heads: int,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
bias: bool = True,
rotary_kwargs: Optional[dict] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
Source code in vllm/model_executor/models/bert_with_rope.py
BertWithRopeBlock
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
attn
instance-attribute
¶
attn = BertWithRopeAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
cache_config=cache_config,
quant_config=quant_config,
bias=bias,
rotary_kwargs=rotary_kwargs,
prefix=f"{prefix}.attention",
)
__init__
¶
__init__(
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
moe: bool = False,
bias: bool = True,
rotary_kwargs: Optional[dict] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
Source code in vllm/model_executor/models/bert_with_rope.py
BertWithRopeEmbedding
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
token_type_embeddings
instance-attribute
¶
token_type_embeddings = VocabParallelEmbedding(
type_vocab_size, hidden_size
)
word_embeddings
instance-attribute
¶
word_embeddings = VocabParallelEmbedding(
vocab_size, hidden_size
)
__init__
¶
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
Source code in vllm/model_executor/models/bert_with_rope.py
BertWithRopeEncoder
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
layers
instance-attribute
¶
layers = ModuleList(
[
BertWithRopeBlock(
config=config,
cache_config=cache_config,
quant_config=quant_config,
bias=bias,
moe=every_n > 0 and layer_idx % every_n == 1,
rotary_kwargs=rotary_kwargs,
prefix=f"{prefix}.layer.{layer_idx}",
)
for layer_idx in range(num_hidden_layers)
]
)
__init__
¶
__init__(
vllm_config: VllmConfig,
bias: bool = True,
rotary_kwargs: Optional[dict] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
BertWithRopeGatedMLP
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
down_proj
instance-attribute
¶
down_proj = RowParallelLinear(
input_size=intermediate_size,
output_size=hidden_size,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.down_proj",
)
gate_up_proj
instance-attribute
¶
gate_up_proj = MergedColumnParallelLinear(
hidden_size,
[intermediate_size] * 2,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.gate_up_proj",
)
__init__
¶
__init__(
hidden_size: int,
intermediate_size: int,
hidden_act: str,
bias: bool = True,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
BertWithRopeMLP
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
down_proj
instance-attribute
¶
down_proj = RowParallelLinear(
input_size=intermediate_size,
output_size=hidden_size,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.down_proj",
)
up_proj
instance-attribute
¶
up_proj = ColumnParallelLinear(
input_size=hidden_size,
output_size=intermediate_size,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.up_proj",
)
__init__
¶
__init__(
hidden_size: int,
intermediate_size: int,
hidden_act: str,
bias: bool = True,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
Source code in vllm/model_executor/models/bert_with_rope.py
GteNewModel
¶
Bases: BertWithRope
Source code in vllm/model_executor/models/bert_with_rope.py
hf_to_vllm_mapper
class-attribute
instance-attribute
¶
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_substr={
"new.": "",
"layer": "layers",
"attention.qkv_proj": "attn.qkv_proj",
"attention.o_proj": "attn.out_proj",
}
)
__init__
¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert_with_rope.py
ignore_unnecessary_layers
¶
load_weights
¶
split_up_gate_proj
¶
Source code in vllm/model_executor/models/bert_with_rope.py
JinaRobertaModel
¶
Bases: BertWithRope
Source code in vllm/model_executor/models/bert_with_rope.py
542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 |
|
hf_to_vllm_mapper
class-attribute
instance-attribute
¶
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_substr={
"emb_ln": "embeddings.LayerNorm",
"mixer.Wqkv": "attn.qkv_proj",
"mixer.out_proj": "attn.out_proj",
"norm1": "attn_ln",
"mlp.fc1.": "mlp.up_proj.",
"mlp.fc2": "mlp.down_proj",
"norm2": "mlp_ln",
}
)
forward
¶
forward(
input_ids: Tensor,
position_ids: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert_with_rope.py
jina_merge_lora_weights
¶
Source code in vllm/model_executor/models/bert_with_rope.py
load_weights
¶
NomicBertModel
¶
Bases: BertWithRope
Source code in vllm/model_executor/models/bert_with_rope.py
hf_to_vllm_mapper
class-attribute
instance-attribute
¶
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_substr={
"emb_ln": "embeddings.LayerNorm",
"attn.Wqkv": "attn.qkv_proj",
"norm1": "attn_ln",
"mlp.fc1.": "mlp.up_proj.",
"mlp.fc11": "mlp.up_proj",
"mlp.fc12": "mlp.gate_proj",
"mlp.fc2": "mlp.down_proj",
"norm2": "mlp_ln",
}
)
NomicExpertMLP
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
__init__
¶
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
Source code in vllm/model_executor/models/bert_with_rope.py
NomicExperts
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
mlp
instance-attribute
¶
mlp = NomicExpertMLP(
hidden_size=n_embd,
ffn_hidden_size=n_inner,
moe_num_experts=moe_num_experts,
ffn_act_fn=hidden_act,
)
__init__
¶
Source code in vllm/model_executor/models/bert_with_rope.py
forward
¶
Source code in vllm/model_executor/models/bert_with_rope.py
NomicMoELayer
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
experts
instance-attribute
¶
experts = NomicExperts(
config,
hidden_size=n_embd,
ffn_hidden_size=n_inner,
moe_num_experts=num_experts,
)
router
instance-attribute
¶
router = NomicRouter(
n_embd, moe_num_experts=num_experts, moe_top_k=moe_top_k
)
__init__
¶
Source code in vllm/model_executor/models/bert_with_rope.py
NomicRouter
¶
Bases: Module
Source code in vllm/model_executor/models/bert_with_rope.py
__init__
¶
forward
¶
Source code in vllm/model_executor/models/bert_with_rope.py
SnowflakeGteNewModel
¶
Bases: GteNewModel
Source code in vllm/model_executor/models/bert_with_rope.py
hf_to_vllm_mapper
class-attribute
instance-attribute
¶
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_substr={
"layer": "layers",
"attention.qkv_proj": "attn.qkv_proj",
"attention.o_proj": "attn.out_proj",
}
)