Skip to content

vllm.transformers_utils.configs.chatglm

ChatGLMConfig

Bases: PretrainedConfig

Source code in vllm/transformers_utils/configs/chatglm.py
class ChatGLMConfig(PretrainedConfig):
    model_type = "chatglm"
    attribute_map = {
        "num_hidden_layers": "num_layers",
        "n_head_kv": "multi_query_group_num",
    }

    def __init__(self,
                 num_layers=28,
                 padded_vocab_size=65024,
                 hidden_size=4096,
                 ffn_hidden_size=13696,
                 kv_channels=128,
                 num_attention_heads=32,
                 seq_length=2048,
                 hidden_dropout=0.0,
                 attention_dropout=0.0,
                 layernorm_epsilon=1e-5,
                 rmsnorm=True,
                 apply_residual_connection_post_layernorm=False,
                 post_layer_norm=True,
                 add_bias_linear=False,
                 add_qkv_bias=False,
                 interleaved_qkv=False,
                 bias_dropout_fusion=True,
                 multi_query_attention=False,
                 multi_query_group_num=1,
                 apply_query_key_layer_scaling=True,
                 attention_softmax_in_fp32=True,
                 fp32_residual_connection=False,
                 quantization_bit=0,
                 pre_seq_len=None,
                 prefix_projection=False,
                 **kwargs):
        self.num_layers = num_layers
        self.vocab_size = padded_vocab_size
        self.padded_vocab_size = padded_vocab_size
        self.hidden_size = hidden_size
        self.ffn_hidden_size = ffn_hidden_size
        self.kv_channels = kv_channels
        self.num_attention_heads = num_attention_heads
        self.seq_length = seq_length
        # It is to be compatible with long lora.
        self.max_position_embeddings = seq_length
        self.hidden_dropout = hidden_dropout
        self.attention_dropout = attention_dropout
        self.layernorm_epsilon = layernorm_epsilon
        self.rmsnorm = rmsnorm
        self.apply_residual_connection_post_layernorm = (
            apply_residual_connection_post_layernorm)
        self.post_layer_norm = post_layer_norm
        self.add_bias_linear = add_bias_linear
        self.add_qkv_bias = add_qkv_bias
        self.bias_dropout_fusion = bias_dropout_fusion
        self.multi_query_attention = multi_query_attention
        self.multi_query_group_num = multi_query_group_num
        self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        self.attention_softmax_in_fp32 = attention_softmax_in_fp32
        self.fp32_residual_connection = fp32_residual_connection
        self.quantization_bit = quantization_bit
        self.pre_seq_len = pre_seq_len
        self.prefix_projection = prefix_projection
        self.interleaved_qkv = interleaved_qkv
        super().__init__(**kwargs)

add_bias_linear instance-attribute

add_bias_linear = add_bias_linear

add_qkv_bias instance-attribute

add_qkv_bias = add_qkv_bias

apply_query_key_layer_scaling instance-attribute

apply_query_key_layer_scaling = (
    apply_query_key_layer_scaling
)

apply_residual_connection_post_layernorm instance-attribute

apply_residual_connection_post_layernorm = (
    apply_residual_connection_post_layernorm
)

attention_dropout instance-attribute

attention_dropout = attention_dropout

attention_softmax_in_fp32 instance-attribute

attention_softmax_in_fp32 = attention_softmax_in_fp32

attribute_map class-attribute instance-attribute

attribute_map = {
    "num_hidden_layers": "num_layers",
    "n_head_kv": "multi_query_group_num",
}

bias_dropout_fusion instance-attribute

bias_dropout_fusion = bias_dropout_fusion

ffn_hidden_size instance-attribute

ffn_hidden_size = ffn_hidden_size

fp32_residual_connection instance-attribute

fp32_residual_connection = fp32_residual_connection

hidden_dropout instance-attribute

hidden_dropout = hidden_dropout

hidden_size instance-attribute

hidden_size = hidden_size

interleaved_qkv instance-attribute

interleaved_qkv = interleaved_qkv

kv_channels instance-attribute

kv_channels = kv_channels

layernorm_epsilon instance-attribute

layernorm_epsilon = layernorm_epsilon

max_position_embeddings instance-attribute

max_position_embeddings = seq_length

model_type class-attribute instance-attribute

model_type = 'chatglm'

multi_query_attention instance-attribute

multi_query_attention = multi_query_attention

multi_query_group_num instance-attribute

multi_query_group_num = multi_query_group_num

num_attention_heads instance-attribute

num_attention_heads = num_attention_heads

num_layers instance-attribute

num_layers = num_layers

padded_vocab_size instance-attribute

padded_vocab_size = padded_vocab_size

post_layer_norm instance-attribute

post_layer_norm = post_layer_norm

pre_seq_len instance-attribute

pre_seq_len = pre_seq_len

prefix_projection instance-attribute

prefix_projection = prefix_projection

quantization_bit instance-attribute

quantization_bit = quantization_bit

rmsnorm instance-attribute

rmsnorm = rmsnorm

seq_length instance-attribute

seq_length = seq_length

vocab_size instance-attribute

vocab_size = padded_vocab_size

__init__

__init__(
    num_layers=28,
    padded_vocab_size=65024,
    hidden_size=4096,
    ffn_hidden_size=13696,
    kv_channels=128,
    num_attention_heads=32,
    seq_length=2048,
    hidden_dropout=0.0,
    attention_dropout=0.0,
    layernorm_epsilon=1e-05,
    rmsnorm=True,
    apply_residual_connection_post_layernorm=False,
    post_layer_norm=True,
    add_bias_linear=False,
    add_qkv_bias=False,
    interleaved_qkv=False,
    bias_dropout_fusion=True,
    multi_query_attention=False,
    multi_query_group_num=1,
    apply_query_key_layer_scaling=True,
    attention_softmax_in_fp32=True,
    fp32_residual_connection=False,
    quantization_bit=0,
    pre_seq_len=None,
    prefix_projection=False,
    **kwargs,
)
Source code in vllm/transformers_utils/configs/chatglm.py
def __init__(self,
             num_layers=28,
             padded_vocab_size=65024,
             hidden_size=4096,
             ffn_hidden_size=13696,
             kv_channels=128,
             num_attention_heads=32,
             seq_length=2048,
             hidden_dropout=0.0,
             attention_dropout=0.0,
             layernorm_epsilon=1e-5,
             rmsnorm=True,
             apply_residual_connection_post_layernorm=False,
             post_layer_norm=True,
             add_bias_linear=False,
             add_qkv_bias=False,
             interleaved_qkv=False,
             bias_dropout_fusion=True,
             multi_query_attention=False,
             multi_query_group_num=1,
             apply_query_key_layer_scaling=True,
             attention_softmax_in_fp32=True,
             fp32_residual_connection=False,
             quantization_bit=0,
             pre_seq_len=None,
             prefix_projection=False,
             **kwargs):
    self.num_layers = num_layers
    self.vocab_size = padded_vocab_size
    self.padded_vocab_size = padded_vocab_size
    self.hidden_size = hidden_size
    self.ffn_hidden_size = ffn_hidden_size
    self.kv_channels = kv_channels
    self.num_attention_heads = num_attention_heads
    self.seq_length = seq_length
    # It is to be compatible with long lora.
    self.max_position_embeddings = seq_length
    self.hidden_dropout = hidden_dropout
    self.attention_dropout = attention_dropout
    self.layernorm_epsilon = layernorm_epsilon
    self.rmsnorm = rmsnorm
    self.apply_residual_connection_post_layernorm = (
        apply_residual_connection_post_layernorm)
    self.post_layer_norm = post_layer_norm
    self.add_bias_linear = add_bias_linear
    self.add_qkv_bias = add_qkv_bias
    self.bias_dropout_fusion = bias_dropout_fusion
    self.multi_query_attention = multi_query_attention
    self.multi_query_group_num = multi_query_group_num
    self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
    self.attention_softmax_in_fp32 = attention_softmax_in_fp32
    self.fp32_residual_connection = fp32_residual_connection
    self.quantization_bit = quantization_bit
    self.pre_seq_len = pre_seq_len
    self.prefix_projection = prefix_projection
    self.interleaved_qkv = interleaved_qkv
    super().__init__(**kwargs)