(Trained with Unsloth)

Files changed (5) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "unsloth/Llama-3.2-1B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -22,6 +22,7 @@
   "num_attention_heads": 32,
   "num_hidden_layers": 23,
   "num_key_value_heads": 8,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
@@ -33,8 +34,9 @@
   },
   "rope_theta": 500000.0,
   "tie_word_embeddings": true,
-  "torch_dtype": "float32",
   "transformers_version": "4.47.1",
   "use_cache": true,
   "vocab_size": 128256
 }

 {
+  "_name_or_path": "frameai/Loxa-1.6B",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "num_attention_heads": 32,
   "num_hidden_layers": 23,
   "num_key_value_heads": 8,
+  "pad_token_id": 128004,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
   },
   "rope_theta": 500000.0,
   "tie_word_embeddings": true,
+  "torch_dtype": "float16",
   "transformers_version": "4.47.1",
+  "unsloth_version": "2025.1.5",
   "use_cache": true,
   "vocab_size": 128256
 }

generation_config.json CHANGED Viewed

@@ -6,6 +6,8 @@
     128008,
     128009
   ],
   "temperature": 0.6,
   "top_p": 0.9,
   "transformers_version": "4.47.1"

     128008,
     128009
   ],
+  "max_length": 131072,
+  "pad_token_id": 128004,
   "temperature": 0.6,
   "top_p": 0.9,
   "transformers_version": "4.47.1"

pytorch_model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bdfd236d01886cb61004b62b7ca9d86247a12f6d984b1c15ca9e207f2935fdb
+size 3323200206

special_tokens_map.json CHANGED Viewed

@@ -6,6 +6,12 @@
     "rstrip": false,
     "single_word": false
   },
-  "eos_token": "<|eot_id|>",
-  "pad_token": "<|eot_id|>"
 }

     "rstrip": false,
     "single_word": false
   },
+  "eos_token": {
+    "content": "<|eot_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|finetune_right_pad_id|>"
 }

tokenizer_config.json CHANGED Viewed

@@ -2059,7 +2059,7 @@
     "attention_mask"
   ],
   "model_max_length": 131072,
-  "pad_token": "<|eot_id|>",
   "padding_side": "left",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

     "attention_mask"
   ],
   "model_max_length": 131072,
+  "pad_token": "<|finetune_right_pad_id|>",
   "padding_side": "left",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }