Motif-Technologies
/

Motif-2.6B

Text Generation

text-generation-inference

Model card Files Files and versions

leejunhyeok commited on Jun 9, 2025

Commit

9be8a4a

·

verified ·

1 Parent(s): 80e1a1c

Update config.json

Files changed (1) hide show

config.json +11 -8

config.json CHANGED Viewed

@@ -8,11 +8,12 @@
     "AutoConfig": "configuration_motif.MotifConfig",
     "AutoModelForCausalLM": "modeling_motif.MotifForCausalLM"
   },
   "bfloat16": true,
   "bos_token_id": 219396,
   "continual_training": false,
   "decoder_split_layers": [],
-  "decontam_attn": false,
   "dim_model_base": 2048,
   "dim_model_base_attn": 128,
   "dim_model_base_init": 2048,
@@ -24,6 +25,7 @@
   "encoder_split_layers": [],
   "eos_token_id": 219395,
   "first_expansion": false,
   "gate_up_proj_alpha": 0.15625,
   "hidden_act": "poly_norm",
   "hidden_act_moe": null,
@@ -35,6 +37,7 @@
   "intermediate_size": 8192,
   "k_proj_alpha": 0.15625,
   "lm_head_alpha": null,
   "loss_reduction": "mean",
   "max_position_embeddings": 16384,
   "max_window_layers": 28,
@@ -52,7 +55,7 @@
   "num_attention_heads": 16,
   "num_hidden_layers": 32,
   "num_key_value_heads": 16,
-  "num_stages": false,
   "o_proj_alpha": 0.15625,
   "post_attention_layernorm_alpha": null,
   "q_proj_alpha": 0.15625,
@@ -64,18 +67,18 @@
   "scoring_func": null,
   "seq_aux": null,
   "sliding_window": null,
-  "tensor_parallel": true,
   "tie_word_embeddings": true,
   "topk_group": null,
   "topk_method": null,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.51.3",
-  "use_advanced_parallelization": true,
   "use_bias": false,
-  "use_cache": false,
   "use_emb_alpha": false,
   "use_fused_mlp": null,
-  "use_moreh_attention": true,
   "use_moreh_moe": false,
   "use_mrope": false,
   "use_norm_alpha": false,

     "AutoConfig": "configuration_motif.MotifConfig",
     "AutoModelForCausalLM": "modeling_motif.MotifForCausalLM"
   },
+  "batch_num": null,
   "bfloat16": true,
   "bos_token_id": 219396,
   "continual_training": false,
   "decoder_split_layers": [],
+  "decontam_attn": true,
   "dim_model_base": 2048,
   "dim_model_base_attn": 128,
   "dim_model_base_init": 2048,
   "encoder_split_layers": [],
   "eos_token_id": 219395,
   "first_expansion": false,
+  "fused_rope": false,
   "gate_up_proj_alpha": 0.15625,
   "hidden_act": "poly_norm",
   "hidden_act_moe": null,
   "intermediate_size": 8192,
   "k_proj_alpha": 0.15625,
   "lm_head_alpha": null,
+  "load_pretrained": "",
   "loss_reduction": "mean",
   "max_position_embeddings": 16384,
   "max_window_layers": 28,
   "num_attention_heads": 16,
   "num_hidden_layers": 32,
   "num_key_value_heads": 16,
+  "num_stages": 3,
   "o_proj_alpha": 0.15625,
   "post_attention_layernorm_alpha": null,
   "q_proj_alpha": 0.15625,
   "scoring_func": null,
   "seq_aux": null,
   "sliding_window": null,
+  "tensor_parallel": false,
   "tie_word_embeddings": true,
   "topk_group": null,
   "topk_method": null,
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.3",
+  "use_advanced_parallelization": false,
   "use_bias": false,
+  "use_cache": true,
   "use_emb_alpha": false,
   "use_fused_mlp": null,
+  "use_moreh_attention": false,
   "use_moreh_moe": false,
   "use_mrope": false,
   "use_norm_alpha": false,