SlitherCode commited on
Commit
10ad828
·
verified ·
1 Parent(s): c4b712b

Upload ParchmentForCausalLM

Browse files
Files changed (3) hide show
  1. README.md +4 -4
  2. config.json +3 -0
  3. model.safetensors +2 -2
README.md CHANGED
@@ -2,10 +2,10 @@
2
  language: en
3
  license: mit
4
  tags:
5
- - pretrained
6
- - causal-lm
7
- - fineweb-edu
8
- - custom-architecture
9
  ---
10
 
11
  # tiny-edu-166m (ParchmentLM)
 
2
  language: en
3
  license: mit
4
  tags:
5
+ - pretrained
6
+ - causal-lm
7
+ - fineweb-edu
8
+ - custom-architecture
9
  ---
10
 
11
  # tiny-edu-166m (ParchmentLM)
config.json CHANGED
@@ -7,10 +7,13 @@
7
  "d_model": 768,
8
  "dtype": "float32",
9
  "eos_token_id": 100257,
 
10
  "max_seq_len": 1024,
11
  "model_type": "parchment",
12
  "n_heads": 12,
13
  "n_layers": 12,
 
 
14
  "pad_token_id": 100257,
15
  "rms_norm_eps": 1e-06,
16
  "rope_base": 10000.0,
 
7
  "d_model": 768,
8
  "dtype": "float32",
9
  "eos_token_id": 100257,
10
+ "hidden_size": 768,
11
  "max_seq_len": 1024,
12
  "model_type": "parchment",
13
  "n_heads": 12,
14
  "n_layers": 12,
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 12,
17
  "pad_token_id": 100257,
18
  "rms_norm_eps": 1e-06,
19
  "rope_base": 10000.0,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6ccc0c579af181fcd9d676bf6446a1fac97cc678cf10bb192da9c5bd949dcad
3
- size 647877512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a92b4dd4cec11886a1edfade108d27c6feb30d673bea50a1c21c378ad03f659
3
+ size 647880264