leejunhyeok commited on
Commit
9be8a4a
·
verified ·
1 Parent(s): 80e1a1c

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +11 -8
config.json CHANGED
@@ -8,11 +8,12 @@
8
  "AutoConfig": "configuration_motif.MotifConfig",
9
  "AutoModelForCausalLM": "modeling_motif.MotifForCausalLM"
10
  },
 
11
  "bfloat16": true,
12
  "bos_token_id": 219396,
13
  "continual_training": false,
14
  "decoder_split_layers": [],
15
- "decontam_attn": false,
16
  "dim_model_base": 2048,
17
  "dim_model_base_attn": 128,
18
  "dim_model_base_init": 2048,
@@ -24,6 +25,7 @@
24
  "encoder_split_layers": [],
25
  "eos_token_id": 219395,
26
  "first_expansion": false,
 
27
  "gate_up_proj_alpha": 0.15625,
28
  "hidden_act": "poly_norm",
29
  "hidden_act_moe": null,
@@ -35,6 +37,7 @@
35
  "intermediate_size": 8192,
36
  "k_proj_alpha": 0.15625,
37
  "lm_head_alpha": null,
 
38
  "loss_reduction": "mean",
39
  "max_position_embeddings": 16384,
40
  "max_window_layers": 28,
@@ -52,7 +55,7 @@
52
  "num_attention_heads": 16,
53
  "num_hidden_layers": 32,
54
  "num_key_value_heads": 16,
55
- "num_stages": false,
56
  "o_proj_alpha": 0.15625,
57
  "post_attention_layernorm_alpha": null,
58
  "q_proj_alpha": 0.15625,
@@ -64,18 +67,18 @@
64
  "scoring_func": null,
65
  "seq_aux": null,
66
  "sliding_window": null,
67
- "tensor_parallel": true,
68
  "tie_word_embeddings": true,
69
  "topk_group": null,
70
  "topk_method": null,
71
- "torch_dtype": "bfloat16",
72
- "transformers_version": "4.51.3",
73
- "use_advanced_parallelization": true,
74
  "use_bias": false,
75
- "use_cache": false,
76
  "use_emb_alpha": false,
77
  "use_fused_mlp": null,
78
- "use_moreh_attention": true,
79
  "use_moreh_moe": false,
80
  "use_mrope": false,
81
  "use_norm_alpha": false,
 
8
  "AutoConfig": "configuration_motif.MotifConfig",
9
  "AutoModelForCausalLM": "modeling_motif.MotifForCausalLM"
10
  },
11
+ "batch_num": null,
12
  "bfloat16": true,
13
  "bos_token_id": 219396,
14
  "continual_training": false,
15
  "decoder_split_layers": [],
16
+ "decontam_attn": true,
17
  "dim_model_base": 2048,
18
  "dim_model_base_attn": 128,
19
  "dim_model_base_init": 2048,
 
25
  "encoder_split_layers": [],
26
  "eos_token_id": 219395,
27
  "first_expansion": false,
28
+ "fused_rope": false,
29
  "gate_up_proj_alpha": 0.15625,
30
  "hidden_act": "poly_norm",
31
  "hidden_act_moe": null,
 
37
  "intermediate_size": 8192,
38
  "k_proj_alpha": 0.15625,
39
  "lm_head_alpha": null,
40
+ "load_pretrained": "",
41
  "loss_reduction": "mean",
42
  "max_position_embeddings": 16384,
43
  "max_window_layers": 28,
 
55
  "num_attention_heads": 16,
56
  "num_hidden_layers": 32,
57
  "num_key_value_heads": 16,
58
+ "num_stages": 3,
59
  "o_proj_alpha": 0.15625,
60
  "post_attention_layernorm_alpha": null,
61
  "q_proj_alpha": 0.15625,
 
67
  "scoring_func": null,
68
  "seq_aux": null,
69
  "sliding_window": null,
70
+ "tensor_parallel": false,
71
  "tie_word_embeddings": true,
72
  "topk_group": null,
73
  "topk_method": null,
74
+ "torch_dtype": "float32",
75
+ "transformers_version": "4.46.3",
76
+ "use_advanced_parallelization": false,
77
  "use_bias": false,
78
+ "use_cache": true,
79
  "use_emb_alpha": false,
80
  "use_fused_mlp": null,
81
+ "use_moreh_attention": false,
82
  "use_moreh_moe": false,
83
  "use_mrope": false,
84
  "use_norm_alpha": false,