vllm.model_executor.models.llama_eagle
EagleLlamaForCausalLM
¶
Bases: LlamaForCausalLM
Source code in vllm/model_executor/models/llama_eagle.py
logits_processor
instance-attribute
¶
logits_processor = LogitsProcessor(
vocab_size, scale=logit_scale
)
model
instance-attribute
¶
model = LlamaModel(
vllm_config=vllm_config,
prefix="model",
start_layer_id=target_layer_num,
)
__init__
¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/llama_eagle.py
forward
¶
load_weights
¶
Source code in vllm/model_executor/models/llama_eagle.py
LlamaModel
¶
Bases: Module
Source code in vllm/model_executor/models/llama_eagle.py
embed_tokens
instance-attribute
¶
embed_tokens = VocabParallelEmbedding(
vocab_size,
hidden_size,
prefix=maybe_prefix(prefix, "embed_tokens"),
)
layers
instance-attribute
¶
layers = ModuleList(
[
LlamaDecoderLayer(
config,
i == 0,
prefix=maybe_prefix(
prefix, f"layers.{i + start_layer_id}"
),
)
for i in range(num_hidden_layers)
]
)
__init__
¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
start_layer_id: int = 0,
) -> None