| { | |
| "dim": 2048, | |
| "n_layers": 24, | |
| "vocab_size": 151936, | |
| "max_seq_len": 2048, | |
| "n_heads": 32, | |
| "n_kv_heads": 8, | |
| "head_dim": 64, | |
| "gdn_expand_v": 2, | |
| "gdn_head_dim": 64, | |
| "gdn_n_heads": 32, | |
| "conv_kernel": 4, | |
| "gdn_use_gate": true, | |
| "gdn_use_short_conv": true, | |
| "ffn_mult": 2.67, | |
| "attn_interval": 4, | |
| "use_x0_inject": true, | |
| "use_resid_lambdas": true, | |
| "use_skip_connections": true, | |
| "use_diff_attn": false, | |
| "rope_base": 10000.0, | |
| "partial_rotary_factor": 0.25, | |
| "n_bottom": 6, | |
| "n_physical_top": 6, | |
| "n_top_loops": 3, | |
| "architecture": "Chimera", | |
| "config_class": "ChimeraConfig", | |
| "topology": "6 bottom + 6x3 top = 24 virtual", | |
| "step": 249500, | |
| "total_params": 1072527280, | |
| "size_label": "1.1B", | |
| "model_type": "zara-ml" | |
| } |