Auron-1.1B / config.json
nyxia's picture
Upload Chimera 1.1B at step 249500
b2f872b verified
{
"dim": 2048,
"n_layers": 24,
"vocab_size": 151936,
"max_seq_len": 2048,
"n_heads": 32,
"n_kv_heads": 8,
"head_dim": 64,
"gdn_expand_v": 2,
"gdn_head_dim": 64,
"gdn_n_heads": 32,
"conv_kernel": 4,
"gdn_use_gate": true,
"gdn_use_short_conv": true,
"ffn_mult": 2.67,
"attn_interval": 4,
"use_x0_inject": true,
"use_resid_lambdas": true,
"use_skip_connections": true,
"use_diff_attn": false,
"rope_base": 10000.0,
"partial_rotary_factor": 0.25,
"n_bottom": 6,
"n_physical_top": 6,
"n_top_loops": 3,
"architecture": "Chimera",
"config_class": "ChimeraConfig",
"topology": "6 bottom + 6x3 top = 24 virtual",
"step": 249500,
"total_params": 1072527280,
"size_label": "1.1B",
"model_type": "zara-ml"
}