trohrbaugh commited on
Commit
cdd7014
·
verified ·
1 Parent(s): 9b2b1d4

Add 8 KNOWN_BASES entries batch 1+2: GLM-4.x, Seed-OSS, Bailing, Kimi, ERNIE-4.5, Qianfan, LLaDA2, InternS1

Browse files
Files changed (1) hide show
  1. scan.py +26 -3
scan.py CHANGED
@@ -128,9 +128,16 @@ KNOWN_BASES = {
128
  "bailing_v2": {
129
  "name": "AntGroup Bailing-V2 / V2.5 (inclusionAI Ling)",
130
  "vocab_size": 157184,
131
- "model_type_patterns": ["bailing_hybrid", "bailing_moe"],
132
- # V2 = bailing_moe (MoE 256/8, 4 dense prefix, GQA 64→8)
133
- # V2.5 = bailing_hybrid: adds MLA kv_lora_rank + linear-attn + MTP
 
 
 
 
 
 
 
134
  },
135
  "kimi": {
136
  "name": "Moonshot Kimi (K2, Kimi-Linear)",
@@ -138,6 +145,22 @@ KNOWN_BASES = {
138
  "model_type_patterns": ["kimi_linear", "kimi"],
139
  # Kimi-Linear adds linear_attn_config + MLA + MTP on Kimi MoE backbone
140
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  }
142
 
143
 
 
128
  "bailing_v2": {
129
  "name": "AntGroup Bailing-V2 / V2.5 (inclusionAI Ling)",
130
  "vocab_size": 157184,
131
+ "model_type_patterns": ["bailing_hybrid", "bailing_moe", "bailingmm_moe_v2_lite"],
132
+ # V2 = bailing_moe; V2.5 = bailing_hybrid (MLA + linear-attn + MTP)
133
+ # bailingmm_moe_v2_lite = Ming-flash-omni multimodal lite variant
134
+ },
135
+ "llada2": {
136
+ "name": "inclusionAI LLaDA2 (discrete-diffusion MoE)",
137
+ "vocab_size": 157184,
138
+ "model_type_patterns": ["llada2_moe", "llada2"],
139
+ # Shares Bailing-V2 tokenizer/expert geometry (256/8, vocab 157184)
140
+ # but uses discrete-diffusion masked LM — non-autoregressive, separate family
141
  },
142
  "kimi": {
143
  "name": "Moonshot Kimi (K2, Kimi-Linear)",
 
145
  "model_type_patterns": ["kimi_linear", "kimi"],
146
  # Kimi-Linear adds linear_attn_config + MLA + MTP on Kimi MoE backbone
147
  },
148
+ "ernie4_5_vl": {
149
+ "name": "Baidu ERNIE 4.5 VL (MoE multimodal)",
150
+ "vocab_size": 103424,
151
+ "model_type_patterns": ["ernie4_5_moe_vl", "ernie4_5_vl"],
152
+ },
153
+ "qianfan_vl": {
154
+ "name": "Baidu Qianfan-VL (dense multimodal)",
155
+ "vocab_size": 182025,
156
+ "model_type_patterns": ["qianfan_vl", "qianfan"],
157
+ # Distinct Baidu tokenizer from ERNIE — two separate VLM lineages
158
+ },
159
+ "interns1": {
160
+ "name": "InternLM S1 (dense, long-chain reasoning)",
161
+ "vocab_size": 153216,
162
+ "model_type_patterns": ["interns1", "intern_s1"],
163
+ },
164
  }
165
 
166