bimabk commited on
Commit
513af2c
·
verified ·
1 Parent(s): e4841eb

Upload task output 1

Browse files
README.md CHANGED
@@ -3,7 +3,7 @@ base_model: None
3
  library_name: peft
4
  pipeline_tag: text-generation
5
  tags:
6
- - base_model:adapter:/cache/models/Qwen--Qwen2-7B-Instruct
7
  - grpo
8
  - lora
9
  - transformers
 
3
  library_name: peft
4
  pipeline_tag: text-generation
5
  tags:
6
+ - base_model:adapter:/cache/models/unsloth--Llama-3.2-3B-Instruct
7
  - grpo
8
  - lora
9
  - transformers
adapter_config.json CHANGED
@@ -29,12 +29,12 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "down_proj",
33
  "v_proj",
34
  "up_proj",
35
- "q_proj",
36
- "gate_proj",
37
  "k_proj",
 
 
38
  "o_proj"
39
  ],
40
  "target_parameters": null,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "gate_proj",
33
  "v_proj",
34
  "up_proj",
 
 
35
  "k_proj",
36
+ "q_proj",
37
+ "down_proj",
38
  "o_proj"
39
  ],
40
  "target_parameters": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2c2ddd97d678c463706fa230da0e93fa52db448900cc8f2fe481cbee6fc08c8
3
- size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d5508f5dc8171e7c93d74369b8e1af48e33dffac0fbda8ae7e54838fea4911d
3
+ size 194563400
chat_template.jinja CHANGED
@@ -1,6 +1,93 @@
1
- {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
2
- You are a helpful assistant.<|im_end|>
3
- ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
4
- ' + message['content'] + '<|im_end|>' + '
5
- '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
6
- ' }}{% endif %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
loss.txt CHANGED
@@ -1 +1 @@
1
- 31,no_eval
 
1
+ 9,no_eval
special_tokens_map.json CHANGED
@@ -1,17 +1,20 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>"
5
- ],
 
 
 
6
  "eos_token": {
7
- "content": "<|im_end|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
  "pad_token": {
14
- "content": "<|endoftext|>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "eos_token": {
10
+ "content": "<|eot_id|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
3
- size 11418266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
tokenizer_config.json CHANGED
@@ -1,24 +1,2048 @@
1
  {
2
- "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
- "151643": {
5
- "content": "<|endoftext|>",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
- "151644": {
13
- "content": "<|im_start|>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
- "151645": {
21
- "content": "<|im_end|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
@@ -26,18 +2050,17 @@
26
  "special": true
27
  }
28
  },
29
- "additional_special_tokens": [
30
- "<|im_start|>",
31
- "<|im_end|>"
32
- ],
33
- "bos_token": null,
34
- "clean_up_tokenization_spaces": false,
35
- "eos_token": "<|im_end|>",
36
- "errors": "replace",
37
  "extra_special_tokens": {},
 
 
 
 
38
  "model_max_length": 131072,
39
- "pad_token": "<|endoftext|>",
40
- "split_special_tokens": false,
41
- "tokenizer_class": "Qwen2Tokenizer",
42
  "unk_token": null
43
  }
 
1
  {
2
+ "add_bos_token": true,
3
  "added_tokens_decoder": {
4
+ "128000": {
5
+ "content": "<|begin_of_text|>",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "128001": {
13
+ "content": "<|end_of_text|>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "128002": {
21
+ "content": "<|reserved_special_token_0|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "128003": {
29
+ "content": "<|reserved_special_token_1|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128004": {
37
+ "content": "<|finetune_right_pad_id|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "128005": {
45
+ "content": "<|reserved_special_token_2|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "128006": {
53
+ "content": "<|start_header_id|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "128007": {
61
+ "content": "<|end_header_id|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "128008": {
69
+ "content": "<|eom_id|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "128009": {
77
+ "content": "<|eot_id|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "128010": {
85
+ "content": "<|python_tag|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "128011": {
93
+ "content": "<|reserved_special_token_3|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "128012": {
101
+ "content": "<|reserved_special_token_4|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "128013": {
109
+ "content": "<|reserved_special_token_5|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "128014": {
117
+ "content": "<|reserved_special_token_6|>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "128015": {
125
+ "content": "<|reserved_special_token_7|>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "128016": {
133
+ "content": "<|reserved_special_token_8|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "128017": {
141
+ "content": "<|reserved_special_token_9|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "128018": {
149
+ "content": "<|reserved_special_token_10|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "128019": {
157
+ "content": "<|reserved_special_token_11|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "128020": {
165
+ "content": "<|reserved_special_token_12|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "128021": {
173
+ "content": "<|reserved_special_token_13|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "128022": {
181
+ "content": "<|reserved_special_token_14|>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "128023": {
189
+ "content": "<|reserved_special_token_15|>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "128024": {
197
+ "content": "<|reserved_special_token_16|>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "128025": {
205
+ "content": "<|reserved_special_token_17|>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "128026": {
213
+ "content": "<|reserved_special_token_18|>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "128027": {
221
+ "content": "<|reserved_special_token_19|>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "128028": {
229
+ "content": "<|reserved_special_token_20|>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "128029": {
237
+ "content": "<|reserved_special_token_21|>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "128030": {
245
+ "content": "<|reserved_special_token_22|>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "128031": {
253
+ "content": "<|reserved_special_token_23|>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "128032": {
261
+ "content": "<|reserved_special_token_24|>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "128033": {
269
+ "content": "<|reserved_special_token_25|>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "128034": {
277
+ "content": "<|reserved_special_token_26|>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "128035": {
285
+ "content": "<|reserved_special_token_27|>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "128036": {
293
+ "content": "<|reserved_special_token_28|>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "128037": {
301
+ "content": "<|reserved_special_token_29|>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "128038": {
309
+ "content": "<|reserved_special_token_30|>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "128039": {
317
+ "content": "<|reserved_special_token_31|>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "128040": {
325
+ "content": "<|reserved_special_token_32|>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "128041": {
333
+ "content": "<|reserved_special_token_33|>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "128042": {
341
+ "content": "<|reserved_special_token_34|>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "128043": {
349
+ "content": "<|reserved_special_token_35|>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "128044": {
357
+ "content": "<|reserved_special_token_36|>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "128045": {
365
+ "content": "<|reserved_special_token_37|>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "128046": {
373
+ "content": "<|reserved_special_token_38|>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "128047": {
381
+ "content": "<|reserved_special_token_39|>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "128048": {
389
+ "content": "<|reserved_special_token_40|>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "128049": {
397
+ "content": "<|reserved_special_token_41|>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "128050": {
405
+ "content": "<|reserved_special_token_42|>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "128051": {
413
+ "content": "<|reserved_special_token_43|>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "128052": {
421
+ "content": "<|reserved_special_token_44|>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "128053": {
429
+ "content": "<|reserved_special_token_45|>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "128054": {
437
+ "content": "<|reserved_special_token_46|>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "128055": {
445
+ "content": "<|reserved_special_token_47|>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "128056": {
453
+ "content": "<|reserved_special_token_48|>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "128057": {
461
+ "content": "<|reserved_special_token_49|>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "128058": {
469
+ "content": "<|reserved_special_token_50|>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "128059": {
477
+ "content": "<|reserved_special_token_51|>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "128060": {
485
+ "content": "<|reserved_special_token_52|>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "128061": {
493
+ "content": "<|reserved_special_token_53|>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "128062": {
501
+ "content": "<|reserved_special_token_54|>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "128063": {
509
+ "content": "<|reserved_special_token_55|>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "128064": {
517
+ "content": "<|reserved_special_token_56|>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "128065": {
525
+ "content": "<|reserved_special_token_57|>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "128066": {
533
+ "content": "<|reserved_special_token_58|>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "128067": {
541
+ "content": "<|reserved_special_token_59|>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "128068": {
549
+ "content": "<|reserved_special_token_60|>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "128069": {
557
+ "content": "<|reserved_special_token_61|>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "128070": {
565
+ "content": "<|reserved_special_token_62|>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "128071": {
573
+ "content": "<|reserved_special_token_63|>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "128072": {
581
+ "content": "<|reserved_special_token_64|>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "128073": {
589
+ "content": "<|reserved_special_token_65|>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "128074": {
597
+ "content": "<|reserved_special_token_66|>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "128075": {
605
+ "content": "<|reserved_special_token_67|>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "128076": {
613
+ "content": "<|reserved_special_token_68|>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "128077": {
621
+ "content": "<|reserved_special_token_69|>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "128078": {
629
+ "content": "<|reserved_special_token_70|>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "128079": {
637
+ "content": "<|reserved_special_token_71|>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "128080": {
645
+ "content": "<|reserved_special_token_72|>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "128081": {
653
+ "content": "<|reserved_special_token_73|>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "128082": {
661
+ "content": "<|reserved_special_token_74|>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "128083": {
669
+ "content": "<|reserved_special_token_75|>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "128084": {
677
+ "content": "<|reserved_special_token_76|>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "128085": {
685
+ "content": "<|reserved_special_token_77|>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "128086": {
693
+ "content": "<|reserved_special_token_78|>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "128087": {
701
+ "content": "<|reserved_special_token_79|>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "128088": {
709
+ "content": "<|reserved_special_token_80|>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "128089": {
717
+ "content": "<|reserved_special_token_81|>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "128090": {
725
+ "content": "<|reserved_special_token_82|>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "128091": {
733
+ "content": "<|reserved_special_token_83|>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "128092": {
741
+ "content": "<|reserved_special_token_84|>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "128093": {
749
+ "content": "<|reserved_special_token_85|>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "128094": {
757
+ "content": "<|reserved_special_token_86|>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "128095": {
765
+ "content": "<|reserved_special_token_87|>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "128096": {
773
+ "content": "<|reserved_special_token_88|>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "128097": {
781
+ "content": "<|reserved_special_token_89|>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "128098": {
789
+ "content": "<|reserved_special_token_90|>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "128099": {
797
+ "content": "<|reserved_special_token_91|>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "128100": {
805
+ "content": "<|reserved_special_token_92|>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "128101": {
813
+ "content": "<|reserved_special_token_93|>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "128102": {
821
+ "content": "<|reserved_special_token_94|>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "128103": {
829
+ "content": "<|reserved_special_token_95|>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "128104": {
837
+ "content": "<|reserved_special_token_96|>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "128105": {
845
+ "content": "<|reserved_special_token_97|>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "128106": {
853
+ "content": "<|reserved_special_token_98|>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "128107": {
861
+ "content": "<|reserved_special_token_99|>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "128108": {
869
+ "content": "<|reserved_special_token_100|>",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "128109": {
877
+ "content": "<|reserved_special_token_101|>",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "128110": {
885
+ "content": "<|reserved_special_token_102|>",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "128111": {
893
+ "content": "<|reserved_special_token_103|>",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "128112": {
901
+ "content": "<|reserved_special_token_104|>",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "128113": {
909
+ "content": "<|reserved_special_token_105|>",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "128114": {
917
+ "content": "<|reserved_special_token_106|>",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "128115": {
925
+ "content": "<|reserved_special_token_107|>",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "128116": {
933
+ "content": "<|reserved_special_token_108|>",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "128117": {
941
+ "content": "<|reserved_special_token_109|>",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "128118": {
949
+ "content": "<|reserved_special_token_110|>",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "128119": {
957
+ "content": "<|reserved_special_token_111|>",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "128120": {
965
+ "content": "<|reserved_special_token_112|>",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "128121": {
973
+ "content": "<|reserved_special_token_113|>",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "128122": {
981
+ "content": "<|reserved_special_token_114|>",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "128123": {
989
+ "content": "<|reserved_special_token_115|>",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "128124": {
997
+ "content": "<|reserved_special_token_116|>",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "128125": {
1005
+ "content": "<|reserved_special_token_117|>",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "128126": {
1013
+ "content": "<|reserved_special_token_118|>",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "128127": {
1021
+ "content": "<|reserved_special_token_119|>",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "128128": {
1029
+ "content": "<|reserved_special_token_120|>",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "128129": {
1037
+ "content": "<|reserved_special_token_121|>",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "128130": {
1045
+ "content": "<|reserved_special_token_122|>",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "128131": {
1053
+ "content": "<|reserved_special_token_123|>",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "128132": {
1061
+ "content": "<|reserved_special_token_124|>",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "128133": {
1069
+ "content": "<|reserved_special_token_125|>",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "128134": {
1077
+ "content": "<|reserved_special_token_126|>",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "128135": {
1085
+ "content": "<|reserved_special_token_127|>",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "128136": {
1093
+ "content": "<|reserved_special_token_128|>",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "128137": {
1101
+ "content": "<|reserved_special_token_129|>",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "128138": {
1109
+ "content": "<|reserved_special_token_130|>",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "128139": {
1117
+ "content": "<|reserved_special_token_131|>",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "128140": {
1125
+ "content": "<|reserved_special_token_132|>",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "128141": {
1133
+ "content": "<|reserved_special_token_133|>",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "128142": {
1141
+ "content": "<|reserved_special_token_134|>",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "128143": {
1149
+ "content": "<|reserved_special_token_135|>",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "128144": {
1157
+ "content": "<|reserved_special_token_136|>",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "128145": {
1165
+ "content": "<|reserved_special_token_137|>",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "128146": {
1173
+ "content": "<|reserved_special_token_138|>",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "128147": {
1181
+ "content": "<|reserved_special_token_139|>",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "128148": {
1189
+ "content": "<|reserved_special_token_140|>",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "128149": {
1197
+ "content": "<|reserved_special_token_141|>",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "128150": {
1205
+ "content": "<|reserved_special_token_142|>",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "128151": {
1213
+ "content": "<|reserved_special_token_143|>",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "128152": {
1221
+ "content": "<|reserved_special_token_144|>",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "128153": {
1229
+ "content": "<|reserved_special_token_145|>",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "128154": {
1237
+ "content": "<|reserved_special_token_146|>",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "128155": {
1245
+ "content": "<|reserved_special_token_147|>",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "128156": {
1253
+ "content": "<|reserved_special_token_148|>",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "128157": {
1261
+ "content": "<|reserved_special_token_149|>",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "128158": {
1269
+ "content": "<|reserved_special_token_150|>",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "128159": {
1277
+ "content": "<|reserved_special_token_151|>",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "128160": {
1285
+ "content": "<|reserved_special_token_152|>",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "128161": {
1293
+ "content": "<|reserved_special_token_153|>",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "128162": {
1301
+ "content": "<|reserved_special_token_154|>",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "128163": {
1309
+ "content": "<|reserved_special_token_155|>",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "128164": {
1317
+ "content": "<|reserved_special_token_156|>",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "128165": {
1325
+ "content": "<|reserved_special_token_157|>",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "128166": {
1333
+ "content": "<|reserved_special_token_158|>",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "128167": {
1341
+ "content": "<|reserved_special_token_159|>",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "128168": {
1349
+ "content": "<|reserved_special_token_160|>",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "128169": {
1357
+ "content": "<|reserved_special_token_161|>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "128170": {
1365
+ "content": "<|reserved_special_token_162|>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "128171": {
1373
+ "content": "<|reserved_special_token_163|>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "128172": {
1381
+ "content": "<|reserved_special_token_164|>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "128173": {
1389
+ "content": "<|reserved_special_token_165|>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "128174": {
1397
+ "content": "<|reserved_special_token_166|>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "128175": {
1405
+ "content": "<|reserved_special_token_167|>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "128176": {
1413
+ "content": "<|reserved_special_token_168|>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "128177": {
1421
+ "content": "<|reserved_special_token_169|>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "128178": {
1429
+ "content": "<|reserved_special_token_170|>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "128179": {
1437
+ "content": "<|reserved_special_token_171|>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "128180": {
1445
+ "content": "<|reserved_special_token_172|>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "128181": {
1453
+ "content": "<|reserved_special_token_173|>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "128182": {
1461
+ "content": "<|reserved_special_token_174|>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "128183": {
1469
+ "content": "<|reserved_special_token_175|>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "128184": {
1477
+ "content": "<|reserved_special_token_176|>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "128185": {
1485
+ "content": "<|reserved_special_token_177|>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "128186": {
1493
+ "content": "<|reserved_special_token_178|>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "128187": {
1501
+ "content": "<|reserved_special_token_179|>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "128188": {
1509
+ "content": "<|reserved_special_token_180|>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "128189": {
1517
+ "content": "<|reserved_special_token_181|>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "128190": {
1525
+ "content": "<|reserved_special_token_182|>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "128191": {
1533
+ "content": "<|reserved_special_token_183|>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "128192": {
1541
+ "content": "<|reserved_special_token_184|>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "128193": {
1549
+ "content": "<|reserved_special_token_185|>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "128194": {
1557
+ "content": "<|reserved_special_token_186|>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "128195": {
1565
+ "content": "<|reserved_special_token_187|>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "128196": {
1573
+ "content": "<|reserved_special_token_188|>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "128197": {
1581
+ "content": "<|reserved_special_token_189|>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "128198": {
1589
+ "content": "<|reserved_special_token_190|>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "128199": {
1597
+ "content": "<|reserved_special_token_191|>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "128200": {
1605
+ "content": "<|reserved_special_token_192|>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "128201": {
1613
+ "content": "<|reserved_special_token_193|>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "128202": {
1621
+ "content": "<|reserved_special_token_194|>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "128203": {
1629
+ "content": "<|reserved_special_token_195|>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "128204": {
1637
+ "content": "<|reserved_special_token_196|>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "128205": {
1645
+ "content": "<|reserved_special_token_197|>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "128206": {
1653
+ "content": "<|reserved_special_token_198|>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "128207": {
1661
+ "content": "<|reserved_special_token_199|>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "128208": {
1669
+ "content": "<|reserved_special_token_200|>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "128209": {
1677
+ "content": "<|reserved_special_token_201|>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "128210": {
1685
+ "content": "<|reserved_special_token_202|>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "128211": {
1693
+ "content": "<|reserved_special_token_203|>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "128212": {
1701
+ "content": "<|reserved_special_token_204|>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "128213": {
1709
+ "content": "<|reserved_special_token_205|>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "128214": {
1717
+ "content": "<|reserved_special_token_206|>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "128215": {
1725
+ "content": "<|reserved_special_token_207|>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "128216": {
1733
+ "content": "<|reserved_special_token_208|>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "128217": {
1741
+ "content": "<|reserved_special_token_209|>",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "128218": {
1749
+ "content": "<|reserved_special_token_210|>",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "128219": {
1757
+ "content": "<|reserved_special_token_211|>",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "128220": {
1765
+ "content": "<|reserved_special_token_212|>",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "128221": {
1773
+ "content": "<|reserved_special_token_213|>",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "128222": {
1781
+ "content": "<|reserved_special_token_214|>",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "128223": {
1789
+ "content": "<|reserved_special_token_215|>",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "128224": {
1797
+ "content": "<|reserved_special_token_216|>",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "128225": {
1805
+ "content": "<|reserved_special_token_217|>",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "128226": {
1813
+ "content": "<|reserved_special_token_218|>",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "128227": {
1821
+ "content": "<|reserved_special_token_219|>",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "128228": {
1829
+ "content": "<|reserved_special_token_220|>",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "128229": {
1837
+ "content": "<|reserved_special_token_221|>",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "128230": {
1845
+ "content": "<|reserved_special_token_222|>",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "128231": {
1853
+ "content": "<|reserved_special_token_223|>",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "128232": {
1861
+ "content": "<|reserved_special_token_224|>",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "128233": {
1869
+ "content": "<|reserved_special_token_225|>",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "128234": {
1877
+ "content": "<|reserved_special_token_226|>",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "128235": {
1885
+ "content": "<|reserved_special_token_227|>",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "128236": {
1893
+ "content": "<|reserved_special_token_228|>",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "128237": {
1901
+ "content": "<|reserved_special_token_229|>",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "128238": {
1909
+ "content": "<|reserved_special_token_230|>",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "128239": {
1917
+ "content": "<|reserved_special_token_231|>",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "128240": {
1925
+ "content": "<|reserved_special_token_232|>",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "128241": {
1933
+ "content": "<|reserved_special_token_233|>",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "128242": {
1941
+ "content": "<|reserved_special_token_234|>",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "128243": {
1949
+ "content": "<|reserved_special_token_235|>",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "128244": {
1957
+ "content": "<|reserved_special_token_236|>",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "128245": {
1965
+ "content": "<|reserved_special_token_237|>",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "128246": {
1973
+ "content": "<|reserved_special_token_238|>",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "128247": {
1981
+ "content": "<|reserved_special_token_239|>",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "128248": {
1989
+ "content": "<|reserved_special_token_240|>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "128249": {
1997
+ "content": "<|reserved_special_token_241|>",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "128250": {
2005
+ "content": "<|reserved_special_token_242|>",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "128251": {
2013
+ "content": "<|reserved_special_token_243|>",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "128252": {
2021
+ "content": "<|reserved_special_token_244|>",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "128253": {
2029
+ "content": "<|reserved_special_token_245|>",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "128254": {
2037
+ "content": "<|reserved_special_token_246|>",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "128255": {
2045
+ "content": "<|reserved_special_token_247|>",
2046
  "lstrip": false,
2047
  "normalized": false,
2048
  "rstrip": false,
 
2050
  "special": true
2051
  }
2052
  },
2053
+ "bos_token": "<|begin_of_text|>",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
 
 
 
 
 
2056
  "extra_special_tokens": {},
2057
+ "model_input_names": [
2058
+ "input_ids",
2059
+ "attention_mask"
2060
+ ],
2061
  "model_max_length": 131072,
2062
+ "pad_token": "<|finetune_right_pad_id|>",
2063
+ "padding_side": "left",
2064
+ "tokenizer_class": "PreTrainedTokenizerFast",
2065
  "unk_token": null
2066
  }
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.00124,
6
  "eval_steps": 500,
7
- "global_step": 31,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -16,31 +16,31 @@
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
- "completions/max_length": 114.0,
20
- "completions/max_terminated_length": 114.0,
21
- "completions/mean_length": 98.921875,
22
- "completions/mean_terminated_length": 98.921875,
23
- "completions/min_length": 58.0,
24
- "completions/min_terminated_length": 58.0,
25
- "entropy": 0.19646543450653553,
26
  "epoch": 4e-05,
27
- "frac_reward_zero_std": 0.125,
28
- "grad_norm": 0.9278627038002014,
29
  "kl": 0.0,
30
  "learning_rate": 0.0,
31
- "loss": -0.0037,
32
- "num_tokens": 34199.0,
33
- "reward": 10.018266677856445,
34
- "reward_std": 4.776409149169922,
35
- "rewards/rollout_reward_func/mean": 10.018266677856445,
36
- "rewards/rollout_reward_func/std": 7.215184688568115,
37
- "sampling/importance_sampling_ratio/max": 1.2804653644561768,
38
- "sampling/importance_sampling_ratio/mean": 0.9914791584014893,
39
- "sampling/importance_sampling_ratio/min": 0.6299315094947815,
40
- "sampling/sampling_logp_difference/max": 0.30363547801971436,
41
- "sampling/sampling_logp_difference/mean": 0.01853932812809944,
42
  "step": 1,
43
- "step_time": 8.847447882999631
44
  },
45
  {
46
  "clip_ratio/high_max": 0.0,
@@ -49,31 +49,31 @@
49
  "clip_ratio/low_min": 0.0,
50
  "clip_ratio/region_mean": 0.0,
51
  "completions/clipped_ratio": 0.0,
52
- "completions/max_length": 117.0,
53
- "completions/max_terminated_length": 117.0,
54
- "completions/mean_length": 82.109375,
55
- "completions/mean_terminated_length": 82.109375,
56
- "completions/min_length": 2.0,
57
- "completions/min_terminated_length": 2.0,
58
- "entropy": 0.13141211355105042,
59
  "epoch": 8e-05,
60
- "frac_reward_zero_std": 0.125,
61
- "grad_norm": 0.6150323748588562,
62
  "kl": 0.0,
63
  "learning_rate": 2.8571428571428575e-07,
64
- "loss": 0.0052,
65
- "num_tokens": 67966.0,
66
- "reward": 13.53847885131836,
67
- "reward_std": 4.694334030151367,
68
- "rewards/rollout_reward_func/mean": 13.538479804992676,
69
- "rewards/rollout_reward_func/std": 7.1933274269104,
70
- "sampling/importance_sampling_ratio/max": 1.1734346151351929,
71
- "sampling/importance_sampling_ratio/mean": 0.9927313923835754,
72
- "sampling/importance_sampling_ratio/min": 0.4750845432281494,
73
- "sampling/sampling_logp_difference/max": 0.5278338193893433,
74
- "sampling/sampling_logp_difference/mean": 0.0108323460444808,
75
  "step": 2,
76
- "step_time": 7.127139926000382
77
  },
78
  {
79
  "clip_ratio/high_max": 0.0,
@@ -82,130 +82,130 @@
82
  "clip_ratio/low_min": 0.0,
83
  "clip_ratio/region_mean": 0.0,
84
  "completions/clipped_ratio": 0.0,
85
- "completions/max_length": 116.0,
86
- "completions/max_terminated_length": 116.0,
87
- "completions/mean_length": 86.8125,
88
- "completions/mean_terminated_length": 86.8125,
89
- "completions/min_length": 2.0,
90
- "completions/min_terminated_length": 2.0,
91
- "entropy": 0.12198319449089468,
92
  "epoch": 0.00012,
93
- "frac_reward_zero_std": 0.375,
94
- "grad_norm": 1.4374709129333496,
95
- "kl": 0.0009402413852512836,
96
  "learning_rate": 5.714285714285715e-07,
97
- "loss": 0.0066,
98
- "num_tokens": 100354.0,
99
- "reward": 10.038440704345703,
100
- "reward_std": 2.5273919105529785,
101
- "rewards/rollout_reward_func/mean": 10.038440704345703,
102
- "rewards/rollout_reward_func/std": 3.4852232933044434,
103
- "sampling/importance_sampling_ratio/max": 1.9403773546218872,
104
- "sampling/importance_sampling_ratio/mean": 1.0181429386138916,
105
- "sampling/importance_sampling_ratio/min": 0.7186898589134216,
106
- "sampling/sampling_logp_difference/max": 0.6623420715332031,
107
- "sampling/sampling_logp_difference/mean": 0.012397471815347672,
108
  "step": 3,
109
- "step_time": 9.22867403300097
110
  },
111
  {
112
- "clip_ratio/high_max": 0.0,
113
- "clip_ratio/high_mean": 0.0,
114
- "clip_ratio/low_mean": 0.0,
115
  "clip_ratio/low_min": 0.0,
116
- "clip_ratio/region_mean": 0.0,
117
  "completions/clipped_ratio": 0.0,
118
- "completions/max_length": 114.0,
119
- "completions/max_terminated_length": 114.0,
120
- "completions/mean_length": 98.03125,
121
- "completions/mean_terminated_length": 98.03125,
122
- "completions/min_length": 2.0,
123
- "completions/min_terminated_length": 2.0,
124
- "entropy": 0.19211739487946033,
125
  "epoch": 0.00016,
126
- "frac_reward_zero_std": 0.25,
127
- "grad_norm": 0.9476256370544434,
128
- "kl": 0.0006379662081599236,
129
  "learning_rate": 8.571428571428572e-07,
130
- "loss": -0.0098,
131
- "num_tokens": 129880.0,
132
- "reward": 10.634136199951172,
133
- "reward_std": 2.9668378829956055,
134
- "rewards/rollout_reward_func/mean": 10.634136199951172,
135
- "rewards/rollout_reward_func/std": 6.936125755310059,
136
- "sampling/importance_sampling_ratio/max": 1.2051615715026855,
137
- "sampling/importance_sampling_ratio/mean": 0.9807850122451782,
138
- "sampling/importance_sampling_ratio/min": 0.2036220282316208,
139
- "sampling/sampling_logp_difference/max": 1.0556471347808838,
140
- "sampling/sampling_logp_difference/mean": 0.01921888440847397,
141
  "step": 4,
142
- "step_time": 7.034282748000123
143
  },
144
  {
145
- "clip_ratio/high_max": 0.03125,
146
- "clip_ratio/high_mean": 0.0078125,
147
  "clip_ratio/low_mean": 0.0,
148
  "clip_ratio/low_min": 0.0,
149
- "clip_ratio/region_mean": 0.0078125,
150
  "completions/clipped_ratio": 0.0,
151
- "completions/max_length": 117.0,
152
- "completions/max_terminated_length": 117.0,
153
- "completions/mean_length": 100.96875,
154
- "completions/mean_terminated_length": 100.96875,
155
- "completions/min_length": 58.0,
156
- "completions/min_terminated_length": 58.0,
157
- "entropy": 0.17289281217381358,
158
  "epoch": 0.0002,
159
- "frac_reward_zero_std": 0.25,
160
- "grad_norm": 0.8713013529777527,
161
- "kl": 0.0023218162823468447,
162
  "learning_rate": 1.142857142857143e-06,
163
- "loss": 0.003,
164
- "num_tokens": 163618.0,
165
- "reward": 10.66733169555664,
166
- "reward_std": 2.2034752368927,
167
- "rewards/rollout_reward_func/mean": 10.66733169555664,
168
- "rewards/rollout_reward_func/std": 7.9976725578308105,
169
- "sampling/importance_sampling_ratio/max": 1.3269492387771606,
170
- "sampling/importance_sampling_ratio/mean": 1.0104732513427734,
171
- "sampling/importance_sampling_ratio/min": 0.8934441208839417,
172
- "sampling/sampling_logp_difference/max": 0.23215103149414062,
173
- "sampling/sampling_logp_difference/mean": 0.011650541797280312,
174
  "step": 5,
175
- "step_time": 7.114355061999959
176
  },
177
  {
178
- "clip_ratio/high_max": 0.0,
179
- "clip_ratio/high_mean": 0.0,
180
- "clip_ratio/low_mean": 0.00390625,
181
  "clip_ratio/low_min": 0.0,
182
- "clip_ratio/region_mean": 0.00390625,
183
  "completions/clipped_ratio": 0.0,
184
- "completions/max_length": 110.0,
185
- "completions/max_terminated_length": 110.0,
186
- "completions/mean_length": 97.421875,
187
- "completions/mean_terminated_length": 97.421875,
188
- "completions/min_length": 2.0,
189
- "completions/min_terminated_length": 2.0,
190
- "entropy": 0.1717861178331077,
191
  "epoch": 0.00024,
192
- "frac_reward_zero_std": 0.125,
193
- "grad_norm": 0.8476853966712952,
194
- "kl": 0.0003589589614421129,
195
  "learning_rate": 1.4285714285714286e-06,
196
- "loss": 0.0079,
197
- "num_tokens": 194293.0,
198
- "reward": 11.867555618286133,
199
- "reward_std": 3.1243598461151123,
200
- "rewards/rollout_reward_func/mean": 11.867554664611816,
201
- "rewards/rollout_reward_func/std": 6.9736504554748535,
202
- "sampling/importance_sampling_ratio/max": 1.1688475608825684,
203
- "sampling/importance_sampling_ratio/mean": 1.007383108139038,
204
- "sampling/importance_sampling_ratio/min": 0.8644587397575378,
205
- "sampling/sampling_logp_difference/max": 0.15558338165283203,
206
- "sampling/sampling_logp_difference/mean": 0.009581982158124447,
207
  "step": 6,
208
- "step_time": 8.641884654999558
209
  },
210
  {
211
  "clip_ratio/high_max": 0.0,
@@ -213,829 +213,103 @@
213
  "clip_ratio/low_mean": 0.0,
214
  "clip_ratio/low_min": 0.0,
215
  "clip_ratio/region_mean": 0.0,
216
- "completions/clipped_ratio": 0.0,
217
- "completions/max_length": 116.0,
218
- "completions/max_terminated_length": 116.0,
219
- "completions/mean_length": 102.59375,
220
- "completions/mean_terminated_length": 102.59375,
221
- "completions/min_length": 58.0,
222
- "completions/min_terminated_length": 58.0,
223
- "entropy": 0.1727504450827837,
224
  "epoch": 0.00028,
225
- "frac_reward_zero_std": 0.25,
226
- "grad_norm": 0.8516405820846558,
227
- "kl": 0.001040005125105381,
228
  "learning_rate": 1.7142857142857145e-06,
229
- "loss": 0.0032,
230
- "num_tokens": 228451.0,
231
- "reward": 10.84666919708252,
232
- "reward_std": 2.81697416305542,
233
- "rewards/rollout_reward_func/mean": 10.84666919708252,
234
- "rewards/rollout_reward_func/std": 3.9264276027679443,
235
- "sampling/importance_sampling_ratio/max": 1.192152738571167,
236
- "sampling/importance_sampling_ratio/mean": 0.9886180758476257,
237
- "sampling/importance_sampling_ratio/min": 0.7800420522689819,
238
- "sampling/sampling_logp_difference/max": 0.24933236837387085,
239
- "sampling/sampling_logp_difference/mean": 0.015093531459569931,
240
  "step": 7,
241
- "step_time": 7.0255837680001605
242
  },
243
  {
244
- "clip_ratio/high_max": 0.0,
245
- "clip_ratio/high_mean": 0.0,
246
  "clip_ratio/low_mean": 0.0,
247
  "clip_ratio/low_min": 0.0,
248
- "clip_ratio/region_mean": 0.0,
249
  "completions/clipped_ratio": 0.0,
250
- "completions/max_length": 116.0,
251
- "completions/max_terminated_length": 116.0,
252
- "completions/mean_length": 103.84375,
253
- "completions/mean_terminated_length": 103.84375,
254
- "completions/min_length": 94.0,
255
- "completions/min_terminated_length": 94.0,
256
- "entropy": 0.16474777180701494,
257
  "epoch": 0.00032,
258
- "frac_reward_zero_std": 0.5,
259
- "grad_norm": 0.7940111756324768,
260
- "kl": 0.0005032288372603944,
261
  "learning_rate": 2.0000000000000003e-06,
262
- "loss": 0.0009,
263
- "num_tokens": 263877.0,
264
- "reward": 10.964729309082031,
265
- "reward_std": 1.7662436962127686,
266
- "rewards/rollout_reward_func/mean": 10.964729309082031,
267
- "rewards/rollout_reward_func/std": 7.242088317871094,
268
- "sampling/importance_sampling_ratio/max": 1.2055360078811646,
269
- "sampling/importance_sampling_ratio/mean": 1.004713773727417,
270
- "sampling/importance_sampling_ratio/min": 0.7339034676551819,
271
- "sampling/sampling_logp_difference/max": 0.3093966245651245,
272
- "sampling/sampling_logp_difference/mean": 0.012640302069485188,
273
  "step": 8,
274
- "step_time": 7.154970041000297
275
  },
276
  {
277
- "clip_ratio/high_max": 0.0,
278
- "clip_ratio/high_mean": 0.0,
279
- "clip_ratio/low_mean": 0.00390625,
280
  "clip_ratio/low_min": 0.0,
281
- "clip_ratio/region_mean": 0.00390625,
282
  "completions/clipped_ratio": 0.0,
283
- "completions/max_length": 116.0,
284
- "completions/max_terminated_length": 116.0,
285
- "completions/mean_length": 105.921875,
286
- "completions/mean_terminated_length": 105.921875,
287
- "completions/min_length": 94.0,
288
- "completions/min_terminated_length": 94.0,
289
- "entropy": 0.1817057733424008,
290
  "epoch": 0.00036,
291
- "frac_reward_zero_std": 0.125,
292
- "grad_norm": 0.7268296480178833,
293
- "kl": 0.0013755811378359795,
294
  "learning_rate": 2.285714285714286e-06,
295
- "loss": -0.0026,
296
- "num_tokens": 302532.0,
297
- "reward": 12.797033309936523,
298
- "reward_std": 3.91664457321167,
299
- "rewards/rollout_reward_func/mean": 12.797033309936523,
300
- "rewards/rollout_reward_func/std": 6.278445720672607,
301
- "sampling/importance_sampling_ratio/max": 1.328028917312622,
302
- "sampling/importance_sampling_ratio/mean": 0.9793672561645508,
303
- "sampling/importance_sampling_ratio/min": 0.6404329538345337,
304
- "sampling/sampling_logp_difference/max": 0.5215651988983154,
305
- "sampling/sampling_logp_difference/mean": 0.020225321874022484,
306
  "step": 9,
307
- "step_time": 8.5138989149998
308
- },
309
- {
310
- "clip_ratio/high_max": 0.03125,
311
- "clip_ratio/high_mean": 0.0078125,
312
- "clip_ratio/low_mean": 0.00390625,
313
- "clip_ratio/low_min": 0.0,
314
- "clip_ratio/region_mean": 0.01171875,
315
- "completions/clipped_ratio": 0.0,
316
- "completions/max_length": 116.0,
317
- "completions/max_terminated_length": 116.0,
318
- "completions/mean_length": 87.90625,
319
- "completions/mean_terminated_length": 87.90625,
320
- "completions/min_length": 2.0,
321
- "completions/min_terminated_length": 2.0,
322
- "entropy": 0.1744129522703588,
323
- "epoch": 0.0004,
324
- "frac_reward_zero_std": 0.125,
325
- "grad_norm": 0.6529544591903687,
326
- "kl": 0.0011357483454048634,
327
- "learning_rate": 2.571428571428571e-06,
328
- "loss": 0.0154,
329
- "num_tokens": 337382.0,
330
- "reward": 9.921274185180664,
331
- "reward_std": 3.3844058513641357,
332
- "rewards/rollout_reward_func/mean": 9.921274185180664,
333
- "rewards/rollout_reward_func/std": 6.207524299621582,
334
- "sampling/importance_sampling_ratio/max": 1.2752257585525513,
335
- "sampling/importance_sampling_ratio/mean": 1.0078442096710205,
336
- "sampling/importance_sampling_ratio/min": 0.8506130576133728,
337
- "sampling/sampling_logp_difference/max": 0.23299765586853027,
338
- "sampling/sampling_logp_difference/mean": 0.010802164673805237,
339
- "step": 10,
340
- "step_time": 7.074561795000363
341
- },
342
- {
343
- "clip_ratio/high_max": 0.0,
344
- "clip_ratio/high_mean": 0.0,
345
- "clip_ratio/low_mean": 0.0,
346
- "clip_ratio/low_min": 0.0,
347
- "clip_ratio/region_mean": 0.0,
348
- "completions/clipped_ratio": 0.0,
349
- "completions/max_length": 113.0,
350
- "completions/max_terminated_length": 113.0,
351
- "completions/mean_length": 102.5,
352
- "completions/mean_terminated_length": 102.5,
353
- "completions/min_length": 58.0,
354
- "completions/min_terminated_length": 58.0,
355
- "entropy": 0.1777965882793069,
356
- "epoch": 0.00044,
357
- "frac_reward_zero_std": 0.25,
358
- "grad_norm": 0.9877777099609375,
359
- "kl": 0.0009214265737682581,
360
- "learning_rate": 2.8571428571428573e-06,
361
- "loss": -0.0011,
362
- "num_tokens": 373766.0,
363
- "reward": 13.959955215454102,
364
- "reward_std": 3.020989418029785,
365
- "rewards/rollout_reward_func/mean": 13.959955215454102,
366
- "rewards/rollout_reward_func/std": 8.485596656799316,
367
- "sampling/importance_sampling_ratio/max": 1.161659836769104,
368
- "sampling/importance_sampling_ratio/mean": 0.9922082424163818,
369
- "sampling/importance_sampling_ratio/min": 0.8054305911064148,
370
- "sampling/sampling_logp_difference/max": 0.21640020608901978,
371
- "sampling/sampling_logp_difference/mean": 0.01080007292330265,
372
- "step": 11,
373
- "step_time": 7.412672027999633
374
- },
375
- {
376
- "clip_ratio/high_max": 0.046875,
377
- "clip_ratio/high_mean": 0.01171875,
378
- "clip_ratio/low_mean": 0.0078125,
379
- "clip_ratio/low_min": 0.0,
380
- "clip_ratio/region_mean": 0.01953125,
381
- "completions/clipped_ratio": 0.0,
382
- "completions/max_length": 116.0,
383
- "completions/max_terminated_length": 116.0,
384
- "completions/mean_length": 99.0,
385
- "completions/mean_terminated_length": 99.0,
386
- "completions/min_length": 2.0,
387
- "completions/min_terminated_length": 2.0,
388
- "entropy": 0.18321187514811754,
389
- "epoch": 0.00048,
390
- "frac_reward_zero_std": 0.25,
391
- "grad_norm": 0.7719992995262146,
392
- "kl": 0.0012023542076349258,
393
- "learning_rate": 3.142857142857143e-06,
394
- "loss": 0.0098,
395
- "num_tokens": 409802.0,
396
- "reward": 13.237505912780762,
397
- "reward_std": 3.283658027648926,
398
- "rewards/rollout_reward_func/mean": 13.237504959106445,
399
- "rewards/rollout_reward_func/std": 8.317092895507812,
400
- "sampling/importance_sampling_ratio/max": 1.3050973415374756,
401
- "sampling/importance_sampling_ratio/mean": 0.9854879379272461,
402
- "sampling/importance_sampling_ratio/min": 0.6236510872840881,
403
- "sampling/sampling_logp_difference/max": 0.47287511825561523,
404
- "sampling/sampling_logp_difference/mean": 0.018457502126693726,
405
- "step": 12,
406
- "step_time": 7.962151656000515
407
- },
408
- {
409
- "clip_ratio/high_max": 0.0,
410
- "clip_ratio/high_mean": 0.0,
411
- "clip_ratio/low_mean": 0.0,
412
- "clip_ratio/low_min": 0.0,
413
- "clip_ratio/region_mean": 0.0,
414
- "completions/clipped_ratio": 0.0,
415
- "completions/max_length": 113.0,
416
- "completions/max_terminated_length": 113.0,
417
- "completions/mean_length": 98.671875,
418
- "completions/mean_terminated_length": 98.671875,
419
- "completions/min_length": 58.0,
420
- "completions/min_terminated_length": 58.0,
421
- "entropy": 0.1837792107835412,
422
- "epoch": 0.00052,
423
- "frac_reward_zero_std": 0.0,
424
- "grad_norm": 1.1921685934066772,
425
- "kl": 0.0006314956117421389,
426
- "learning_rate": 3.428571428571429e-06,
427
- "loss": -0.0016,
428
- "num_tokens": 443993.0,
429
- "reward": 8.716323852539062,
430
- "reward_std": 3.7696497440338135,
431
- "rewards/rollout_reward_func/mean": 8.716324806213379,
432
- "rewards/rollout_reward_func/std": 4.9213151931762695,
433
- "sampling/importance_sampling_ratio/max": 1.2224400043487549,
434
- "sampling/importance_sampling_ratio/mean": 1.0003582239151,
435
- "sampling/importance_sampling_ratio/min": 0.703804075717926,
436
- "sampling/sampling_logp_difference/max": 0.3478405475616455,
437
- "sampling/sampling_logp_difference/mean": 0.013392799533903599,
438
- "step": 13,
439
- "step_time": 6.872026027999937
440
- },
441
- {
442
- "clip_ratio/high_max": 0.0,
443
- "clip_ratio/high_mean": 0.0,
444
- "clip_ratio/low_mean": 0.0,
445
- "clip_ratio/low_min": 0.0,
446
- "clip_ratio/region_mean": 0.0,
447
- "completions/clipped_ratio": 0.0,
448
- "completions/max_length": 117.0,
449
- "completions/max_terminated_length": 117.0,
450
- "completions/mean_length": 87.078125,
451
- "completions/mean_terminated_length": 87.078125,
452
- "completions/min_length": 2.0,
453
- "completions/min_terminated_length": 2.0,
454
- "entropy": 0.1762648681178689,
455
- "epoch": 0.00056,
456
- "frac_reward_zero_std": 0.0,
457
- "grad_norm": 0.8949439525604248,
458
- "kl": 0.0012336352374404669,
459
- "learning_rate": 3.7142857142857146e-06,
460
- "loss": 0.0215,
461
- "num_tokens": 477034.0,
462
- "reward": 13.550655364990234,
463
- "reward_std": 4.669343948364258,
464
- "rewards/rollout_reward_func/mean": 13.550655364990234,
465
- "rewards/rollout_reward_func/std": 7.081562042236328,
466
- "sampling/importance_sampling_ratio/max": 1.2484861612319946,
467
- "sampling/importance_sampling_ratio/mean": 0.9962727427482605,
468
- "sampling/importance_sampling_ratio/min": 0.7459995746612549,
469
- "sampling/sampling_logp_difference/max": 0.29502665996551514,
470
- "sampling/sampling_logp_difference/mean": 0.011807022616267204,
471
- "step": 14,
472
- "step_time": 7.88232419100018
473
- },
474
- {
475
- "clip_ratio/high_max": 0.0,
476
- "clip_ratio/high_mean": 0.0,
477
- "clip_ratio/low_mean": 0.00390625,
478
- "clip_ratio/low_min": 0.0,
479
- "clip_ratio/region_mean": 0.00390625,
480
- "completions/clipped_ratio": 0.0,
481
- "completions/max_length": 116.0,
482
- "completions/max_terminated_length": 116.0,
483
- "completions/mean_length": 98.84375,
484
- "completions/mean_terminated_length": 98.84375,
485
- "completions/min_length": 2.0,
486
- "completions/min_terminated_length": 2.0,
487
- "entropy": 0.14872624445706606,
488
- "epoch": 0.0006,
489
- "frac_reward_zero_std": 0.375,
490
- "grad_norm": 0.5905139446258545,
491
- "kl": 0.0012812165077775717,
492
- "learning_rate": 4.000000000000001e-06,
493
- "loss": 0.0049,
494
- "num_tokens": 511160.0,
495
- "reward": 10.958195686340332,
496
- "reward_std": 3.023810386657715,
497
- "rewards/rollout_reward_func/mean": 10.958196640014648,
498
- "rewards/rollout_reward_func/std": 4.840097427368164,
499
- "sampling/importance_sampling_ratio/max": 1.139298915863037,
500
- "sampling/importance_sampling_ratio/mean": 0.9891129732131958,
501
- "sampling/importance_sampling_ratio/min": 0.7860896587371826,
502
- "sampling/sampling_logp_difference/max": 0.20429694652557373,
503
- "sampling/sampling_logp_difference/mean": 0.009246795438230038,
504
- "step": 15,
505
- "step_time": 8.032791925999845
506
- },
507
- {
508
- "clip_ratio/high_max": 0.0,
509
- "clip_ratio/high_mean": 0.0,
510
- "clip_ratio/low_mean": 0.0,
511
- "clip_ratio/low_min": 0.0,
512
- "clip_ratio/region_mean": 0.0,
513
- "completions/clipped_ratio": 0.0,
514
- "completions/max_length": 116.0,
515
- "completions/max_terminated_length": 116.0,
516
- "completions/mean_length": 99.53125,
517
- "completions/mean_terminated_length": 99.53125,
518
- "completions/min_length": 70.0,
519
- "completions/min_terminated_length": 70.0,
520
- "entropy": 0.1544574573636055,
521
- "epoch": 0.00064,
522
- "frac_reward_zero_std": 0.375,
523
- "grad_norm": 1.1432291269302368,
524
- "kl": 0.003647498415375594,
525
- "learning_rate": 4.2857142857142855e-06,
526
- "loss": 0.0286,
527
- "num_tokens": 545066.0,
528
- "reward": 9.331792831420898,
529
- "reward_std": 1.93760085105896,
530
- "rewards/rollout_reward_func/mean": 9.331792831420898,
531
- "rewards/rollout_reward_func/std": 5.057403087615967,
532
- "sampling/importance_sampling_ratio/max": 1.3562160730361938,
533
- "sampling/importance_sampling_ratio/mean": 0.9893874526023865,
534
- "sampling/importance_sampling_ratio/min": 0.5071713328361511,
535
- "sampling/sampling_logp_difference/max": 0.40043067932128906,
536
- "sampling/sampling_logp_difference/mean": 0.017478572204709053,
537
- "step": 16,
538
- "step_time": 7.066636556999811
539
- },
540
- {
541
- "clip_ratio/high_max": 0.03125,
542
- "clip_ratio/high_mean": 0.0078125,
543
- "clip_ratio/low_mean": 0.00390625,
544
- "clip_ratio/low_min": 0.0,
545
- "clip_ratio/region_mean": 0.01171875,
546
- "completions/clipped_ratio": 0.0,
547
- "completions/max_length": 114.0,
548
- "completions/max_terminated_length": 114.0,
549
- "completions/mean_length": 99.1875,
550
- "completions/mean_terminated_length": 99.1875,
551
- "completions/min_length": 57.0,
552
- "completions/min_terminated_length": 57.0,
553
- "entropy": 0.21858551260083914,
554
- "epoch": 0.00068,
555
- "frac_reward_zero_std": 0.125,
556
- "grad_norm": 1.1050941944122314,
557
- "kl": 0.0031870862003415823,
558
- "learning_rate": 4.571428571428572e-06,
559
- "loss": 0.024,
560
- "num_tokens": 580310.0,
561
- "reward": 10.79472541809082,
562
- "reward_std": 2.3240370750427246,
563
- "rewards/rollout_reward_func/mean": 10.79472541809082,
564
- "rewards/rollout_reward_func/std": 7.201944828033447,
565
- "sampling/importance_sampling_ratio/max": 1.270713210105896,
566
- "sampling/importance_sampling_ratio/mean": 1.0256067514419556,
567
- "sampling/importance_sampling_ratio/min": 0.751465916633606,
568
- "sampling/sampling_logp_difference/max": 0.30277013778686523,
569
- "sampling/sampling_logp_difference/mean": 0.022086970508098602,
570
- "step": 17,
571
- "step_time": 8.46836156100062
572
- },
573
- {
574
- "clip_ratio/high_max": 0.0,
575
- "clip_ratio/high_mean": 0.0,
576
- "clip_ratio/low_mean": 0.0,
577
- "clip_ratio/low_min": 0.0,
578
- "clip_ratio/region_mean": 0.0,
579
- "completions/clipped_ratio": 0.0,
580
- "completions/max_length": 116.0,
581
- "completions/max_terminated_length": 116.0,
582
- "completions/mean_length": 101.03125,
583
- "completions/mean_terminated_length": 101.03125,
584
- "completions/min_length": 2.0,
585
- "completions/min_terminated_length": 2.0,
586
- "entropy": 0.20830629393458366,
587
- "epoch": 0.00072,
588
- "frac_reward_zero_std": 0.125,
589
- "grad_norm": 0.6307684183120728,
590
- "kl": 0.0033635632134974003,
591
- "learning_rate": 4.857142857142858e-06,
592
- "loss": 0.0016,
593
- "num_tokens": 616096.0,
594
- "reward": 10.57010269165039,
595
- "reward_std": 3.070500373840332,
596
- "rewards/rollout_reward_func/mean": 10.570101737976074,
597
- "rewards/rollout_reward_func/std": 6.5729169845581055,
598
- "sampling/importance_sampling_ratio/max": 1.3546802997589111,
599
- "sampling/importance_sampling_ratio/mean": 1.0241458415985107,
600
- "sampling/importance_sampling_ratio/min": 0.6485600471496582,
601
- "sampling/sampling_logp_difference/max": 0.388120174407959,
602
- "sampling/sampling_logp_difference/mean": 0.028211820870637894,
603
- "step": 18,
604
- "step_time": 7.452459238999609
605
- },
606
- {
607
- "clip_ratio/high_max": 0.0,
608
- "clip_ratio/high_mean": 0.0,
609
- "clip_ratio/low_mean": 0.0,
610
- "clip_ratio/low_min": 0.0,
611
- "clip_ratio/region_mean": 0.0,
612
- "completions/clipped_ratio": 0.0,
613
- "completions/max_length": 116.0,
614
- "completions/max_terminated_length": 116.0,
615
- "completions/mean_length": 99.75,
616
- "completions/mean_terminated_length": 99.75,
617
- "completions/min_length": 2.0,
618
- "completions/min_terminated_length": 2.0,
619
- "entropy": 0.175787306856364,
620
- "epoch": 0.00076,
621
- "frac_reward_zero_std": 0.0,
622
- "grad_norm": 0.8829342722892761,
623
- "kl": 0.01204019202850759,
624
- "learning_rate": 5.142857142857142e-06,
625
- "loss": -0.0198,
626
- "num_tokens": 647564.0,
627
- "reward": 10.796049118041992,
628
- "reward_std": 3.542346477508545,
629
- "rewards/rollout_reward_func/mean": 10.79604721069336,
630
- "rewards/rollout_reward_func/std": 4.0647735595703125,
631
- "sampling/importance_sampling_ratio/max": 1.428004503250122,
632
- "sampling/importance_sampling_ratio/mean": 1.0033756494522095,
633
- "sampling/importance_sampling_ratio/min": 0.6384609341621399,
634
- "sampling/sampling_logp_difference/max": 0.39859604835510254,
635
- "sampling/sampling_logp_difference/mean": 0.021462757140398026,
636
- "step": 19,
637
- "step_time": 6.987104802000204
638
- },
639
- {
640
- "clip_ratio/high_max": 0.0,
641
- "clip_ratio/high_mean": 0.0,
642
- "clip_ratio/low_mean": 0.0,
643
- "clip_ratio/low_min": 0.0,
644
- "clip_ratio/region_mean": 0.0,
645
- "completions/clipped_ratio": 0.0,
646
- "completions/max_length": 116.0,
647
- "completions/max_terminated_length": 116.0,
648
- "completions/mean_length": 107.59375,
649
- "completions/mean_terminated_length": 107.59375,
650
- "completions/min_length": 94.0,
651
- "completions/min_terminated_length": 94.0,
652
- "entropy": 0.16672849422320724,
653
- "epoch": 0.0008,
654
- "frac_reward_zero_std": 0.25,
655
- "grad_norm": 0.5178841352462769,
656
- "kl": 0.008307450218126178,
657
- "learning_rate": 5.428571428571429e-06,
658
- "loss": 0.0045,
659
- "num_tokens": 679866.0,
660
- "reward": 14.327037811279297,
661
- "reward_std": 2.3548567295074463,
662
- "rewards/rollout_reward_func/mean": 14.327038764953613,
663
- "rewards/rollout_reward_func/std": 6.473320007324219,
664
- "sampling/importance_sampling_ratio/max": 1.1844276189804077,
665
- "sampling/importance_sampling_ratio/mean": 1.0173743963241577,
666
- "sampling/importance_sampling_ratio/min": 0.7388046979904175,
667
- "sampling/sampling_logp_difference/max": 0.30275261402130127,
668
- "sampling/sampling_logp_difference/mean": 0.01609945483505726,
669
- "step": 20,
670
- "step_time": 8.920992404999652
671
- },
672
- {
673
- "clip_ratio/high_max": 0.015625,
674
- "clip_ratio/high_mean": 0.00390625,
675
- "clip_ratio/low_mean": 0.0,
676
- "clip_ratio/low_min": 0.0,
677
- "clip_ratio/region_mean": 0.00390625,
678
- "completions/clipped_ratio": 0.0,
679
- "completions/max_length": 116.0,
680
- "completions/max_terminated_length": 116.0,
681
- "completions/mean_length": 94.15625,
682
- "completions/mean_terminated_length": 94.15625,
683
- "completions/min_length": 2.0,
684
- "completions/min_terminated_length": 2.0,
685
- "entropy": 0.1697295242920518,
686
- "epoch": 0.00084,
687
- "frac_reward_zero_std": 0.25,
688
- "grad_norm": 0.5032536387443542,
689
- "kl": 0.005904986290261149,
690
- "learning_rate": 5.7142857142857145e-06,
691
- "loss": 0.0063,
692
- "num_tokens": 712944.0,
693
- "reward": 9.954044342041016,
694
- "reward_std": 2.929586887359619,
695
- "rewards/rollout_reward_func/mean": 9.954044342041016,
696
- "rewards/rollout_reward_func/std": 3.8391432762145996,
697
- "sampling/importance_sampling_ratio/max": 1.1809278726577759,
698
- "sampling/importance_sampling_ratio/mean": 0.9926953911781311,
699
- "sampling/importance_sampling_ratio/min": 0.671869158744812,
700
- "sampling/sampling_logp_difference/max": 0.3325324058532715,
701
- "sampling/sampling_logp_difference/mean": 0.019740980118513107,
702
- "step": 21,
703
- "step_time": 7.055920110000216
704
- },
705
- {
706
- "clip_ratio/high_max": 0.015625,
707
- "clip_ratio/high_mean": 0.00390625,
708
- "clip_ratio/low_mean": 0.0,
709
- "clip_ratio/low_min": 0.0,
710
- "clip_ratio/region_mean": 0.00390625,
711
- "completions/clipped_ratio": 0.0,
712
- "completions/max_length": 120.0,
713
- "completions/max_terminated_length": 120.0,
714
- "completions/mean_length": 104.90625,
715
- "completions/mean_terminated_length": 104.90625,
716
- "completions/min_length": 94.0,
717
- "completions/min_terminated_length": 94.0,
718
- "entropy": 0.14952043676748872,
719
- "epoch": 0.00088,
720
- "frac_reward_zero_std": 0.5,
721
- "grad_norm": 0.41113221645355225,
722
- "kl": 0.020291190361604095,
723
- "learning_rate": 6e-06,
724
- "loss": -0.0074,
725
- "num_tokens": 745842.0,
726
- "reward": 15.216217041015625,
727
- "reward_std": 2.0014686584472656,
728
- "rewards/rollout_reward_func/mean": 15.216217041015625,
729
- "rewards/rollout_reward_func/std": 7.341615676879883,
730
- "sampling/importance_sampling_ratio/max": 1.4198538064956665,
731
- "sampling/importance_sampling_ratio/mean": 1.0097235441207886,
732
- "sampling/importance_sampling_ratio/min": 0.4881555140018463,
733
- "sampling/sampling_logp_difference/max": 0.7177610397338867,
734
- "sampling/sampling_logp_difference/mean": 0.0277442317456007,
735
- "step": 22,
736
- "step_time": 7.018612760999531
737
- },
738
- {
739
- "clip_ratio/high_max": 0.0,
740
- "clip_ratio/high_mean": 0.0,
741
- "clip_ratio/low_mean": 0.0,
742
- "clip_ratio/low_min": 0.0,
743
- "clip_ratio/region_mean": 0.0,
744
- "completions/clipped_ratio": 0.0,
745
- "completions/max_length": 109.0,
746
- "completions/max_terminated_length": 109.0,
747
- "completions/mean_length": 97.75,
748
- "completions/mean_terminated_length": 97.75,
749
- "completions/min_length": 58.0,
750
- "completions/min_terminated_length": 58.0,
751
- "entropy": 0.14867904456332326,
752
- "epoch": 0.00092,
753
- "frac_reward_zero_std": 0.625,
754
- "grad_norm": 0.4940281808376312,
755
- "kl": 0.011669340077787638,
756
- "learning_rate": 6.285714285714286e-06,
757
- "loss": -0.0072,
758
- "num_tokens": 777962.0,
759
- "reward": 9.629616737365723,
760
- "reward_std": 1.4242491722106934,
761
- "rewards/rollout_reward_func/mean": 9.629616737365723,
762
- "rewards/rollout_reward_func/std": 4.168231964111328,
763
- "sampling/importance_sampling_ratio/max": 1.235112190246582,
764
- "sampling/importance_sampling_ratio/mean": 0.9895117282867432,
765
- "sampling/importance_sampling_ratio/min": 0.5506332516670227,
766
- "sampling/sampling_logp_difference/max": 0.5967001914978027,
767
- "sampling/sampling_logp_difference/mean": 0.02138374000787735,
768
- "step": 23,
769
- "step_time": 8.369276020999905
770
- },
771
- {
772
- "clip_ratio/high_max": 0.0,
773
- "clip_ratio/high_mean": 0.0,
774
- "clip_ratio/low_mean": 0.0,
775
- "clip_ratio/low_min": 0.0,
776
- "clip_ratio/region_mean": 0.0,
777
- "completions/clipped_ratio": 0.0,
778
- "completions/max_length": 116.0,
779
- "completions/max_terminated_length": 116.0,
780
- "completions/mean_length": 105.8125,
781
- "completions/mean_terminated_length": 105.8125,
782
- "completions/min_length": 94.0,
783
- "completions/min_terminated_length": 94.0,
784
- "entropy": 0.153433071449399,
785
- "epoch": 0.00096,
786
- "frac_reward_zero_std": 0.625,
787
- "grad_norm": 0.6142421960830688,
788
- "kl": 0.036034643882885575,
789
- "learning_rate": 6.571428571428572e-06,
790
- "loss": -0.0105,
791
- "num_tokens": 814758.0,
792
- "reward": 14.580177307128906,
793
- "reward_std": 1.8490748405456543,
794
- "rewards/rollout_reward_func/mean": 14.580177307128906,
795
- "rewards/rollout_reward_func/std": 7.987873554229736,
796
- "sampling/importance_sampling_ratio/max": 1.822190284729004,
797
- "sampling/importance_sampling_ratio/mean": 0.9637724161148071,
798
- "sampling/importance_sampling_ratio/min": 0.504238486289978,
799
- "sampling/sampling_logp_difference/max": 0.6850337982177734,
800
- "sampling/sampling_logp_difference/mean": 0.02902819588780403,
801
- "step": 24,
802
- "step_time": 6.940408582999908
803
- },
804
- {
805
- "clip_ratio/high_max": 0.0,
806
- "clip_ratio/high_mean": 0.0,
807
- "clip_ratio/low_mean": 0.0,
808
- "clip_ratio/low_min": 0.0,
809
- "clip_ratio/region_mean": 0.0,
810
- "completions/clipped_ratio": 0.0,
811
- "completions/max_length": 222.0,
812
- "completions/max_terminated_length": 222.0,
813
- "completions/mean_length": 164.84375,
814
- "completions/mean_terminated_length": 164.84375,
815
- "completions/min_length": 94.0,
816
- "completions/min_terminated_length": 94.0,
817
- "entropy": 0.16991846077144146,
818
- "epoch": 0.001,
819
- "frac_reward_zero_std": 0.125,
820
- "grad_norm": 0.6987430453300476,
821
- "kl": 0.07557606545742601,
822
- "learning_rate": 6.857142857142858e-06,
823
- "loss": -0.0195,
824
- "num_tokens": 848064.0,
825
- "reward": 15.993512153625488,
826
- "reward_std": 3.809764862060547,
827
- "rewards/rollout_reward_func/mean": 15.993511199951172,
828
- "rewards/rollout_reward_func/std": 6.686747074127197,
829
- "sampling/importance_sampling_ratio/max": 2.5669615268707275,
830
- "sampling/importance_sampling_ratio/mean": 1.038097620010376,
831
- "sampling/importance_sampling_ratio/min": 0.36189621686935425,
832
- "sampling/sampling_logp_difference/max": 0.9301660060882568,
833
- "sampling/sampling_logp_difference/mean": 0.04619593545794487,
834
- "step": 25,
835
- "step_time": 7.486579578000146
836
- },
837
- {
838
- "clip_ratio/high_max": 0.057291666977107525,
839
- "clip_ratio/high_mean": 0.014322916744276881,
840
- "clip_ratio/low_mean": 0.0026041667442768812,
841
- "clip_ratio/low_min": 0.0,
842
- "clip_ratio/region_mean": 0.016927083488553762,
843
- "completions/clipped_ratio": 0.0,
844
- "completions/max_length": 222.0,
845
- "completions/max_terminated_length": 222.0,
846
- "completions/mean_length": 164.546875,
847
- "completions/mean_terminated_length": 164.546875,
848
- "completions/min_length": 94.0,
849
- "completions/min_terminated_length": 94.0,
850
- "entropy": 0.1926758922636509,
851
- "epoch": 0.00104,
852
- "frac_reward_zero_std": 0.125,
853
- "grad_norm": 1.1327283382415771,
854
- "kl": 0.07225027051754296,
855
- "learning_rate": 7.1428571428571436e-06,
856
- "loss": -0.002,
857
- "num_tokens": 886623.0,
858
- "reward": 16.11737060546875,
859
- "reward_std": 4.506648540496826,
860
- "rewards/rollout_reward_func/mean": 16.11737060546875,
861
- "rewards/rollout_reward_func/std": 10.432522773742676,
862
- "sampling/importance_sampling_ratio/max": 2.0328967571258545,
863
- "sampling/importance_sampling_ratio/mean": 0.9802088737487793,
864
- "sampling/importance_sampling_ratio/min": 0.3310491144657135,
865
- "sampling/sampling_logp_difference/max": 0.9447128772735596,
866
- "sampling/sampling_logp_difference/mean": 0.046886004507541656,
867
- "step": 26,
868
- "step_time": 8.893368583999745
869
- },
870
- {
871
- "clip_ratio/high_max": 0.03125,
872
- "clip_ratio/high_mean": 0.0078125,
873
- "clip_ratio/low_mean": 0.0007812500116415322,
874
- "clip_ratio/low_min": 0.0,
875
- "clip_ratio/region_mean": 0.008593750011641532,
876
- "completions/clipped_ratio": 0.0,
877
- "completions/max_length": 222.0,
878
- "completions/max_terminated_length": 222.0,
879
- "completions/mean_length": 172.734375,
880
- "completions/mean_terminated_length": 172.734375,
881
- "completions/min_length": 94.0,
882
- "completions/min_terminated_length": 94.0,
883
- "entropy": 0.22064625099301338,
884
- "epoch": 0.00108,
885
- "frac_reward_zero_std": 0.0,
886
- "grad_norm": 1.0944156646728516,
887
- "kl": 0.10417186049744487,
888
- "learning_rate": 7.428571428571429e-06,
889
- "loss": -0.0587,
890
- "num_tokens": 923914.0,
891
- "reward": 18.09270477294922,
892
- "reward_std": 4.988367557525635,
893
- "rewards/rollout_reward_func/mean": 18.09270477294922,
894
- "rewards/rollout_reward_func/std": 10.039715766906738,
895
- "sampling/importance_sampling_ratio/max": 2.6798079013824463,
896
- "sampling/importance_sampling_ratio/mean": 0.9714202284812927,
897
- "sampling/importance_sampling_ratio/min": 1.6848749771671695e-13,
898
- "sampling/sampling_logp_difference/max": 27.938308715820312,
899
- "sampling/sampling_logp_difference/mean": 0.14040334522724152,
900
- "step": 27,
901
- "step_time": 7.770603708000635
902
- },
903
- {
904
- "clip_ratio/high_max": 0.03125,
905
- "clip_ratio/high_mean": 0.0078125,
906
- "clip_ratio/low_mean": 0.0,
907
- "clip_ratio/low_min": 0.0,
908
- "clip_ratio/region_mean": 0.0078125,
909
- "completions/clipped_ratio": 0.0,
910
- "completions/max_length": 222.0,
911
- "completions/max_terminated_length": 222.0,
912
- "completions/mean_length": 153.140625,
913
- "completions/mean_terminated_length": 153.140625,
914
- "completions/min_length": 94.0,
915
- "completions/min_terminated_length": 94.0,
916
- "entropy": 0.1478887596167624,
917
- "epoch": 0.00112,
918
- "frac_reward_zero_std": 0.0,
919
- "grad_norm": 0.6470702886581421,
920
- "kl": 0.2168107850011438,
921
- "learning_rate": 7.714285714285716e-06,
922
- "loss": -0.0223,
923
- "num_tokens": 957287.0,
924
- "reward": 18.42925262451172,
925
- "reward_std": 4.345149040222168,
926
- "rewards/rollout_reward_func/mean": 18.42925262451172,
927
- "rewards/rollout_reward_func/std": 9.097280502319336,
928
- "sampling/importance_sampling_ratio/max": 2.399390697479248,
929
- "sampling/importance_sampling_ratio/mean": 1.0000016689300537,
930
- "sampling/importance_sampling_ratio/min": 0.18764659762382507,
931
- "sampling/sampling_logp_difference/max": 1.446092128753662,
932
- "sampling/sampling_logp_difference/mean": 0.07286648452281952,
933
- "step": 28,
934
- "step_time": 7.452310326000543
935
- },
936
- {
937
- "clip_ratio/high_max": 0.015625,
938
- "clip_ratio/high_mean": 0.00390625,
939
- "clip_ratio/low_mean": 0.0026041667442768812,
940
- "clip_ratio/low_min": 0.0,
941
- "clip_ratio/region_mean": 0.006510416744276881,
942
- "completions/clipped_ratio": 0.0,
943
- "completions/max_length": 216.0,
944
- "completions/max_terminated_length": 216.0,
945
- "completions/mean_length": 160.234375,
946
- "completions/mean_terminated_length": 160.234375,
947
- "completions/min_length": 2.0,
948
- "completions/min_terminated_length": 2.0,
949
- "entropy": 0.18869919329881668,
950
- "epoch": 0.00116,
951
- "frac_reward_zero_std": 0.0,
952
- "grad_norm": 0.7812609672546387,
953
- "kl": 0.12577429198427126,
954
- "learning_rate": 8.000000000000001e-06,
955
- "loss": 0.0295,
956
- "num_tokens": 992314.0,
957
- "reward": 17.83092498779297,
958
- "reward_std": 4.7564473152160645,
959
- "rewards/rollout_reward_func/mean": 17.83092498779297,
960
- "rewards/rollout_reward_func/std": 8.652295112609863,
961
- "sampling/importance_sampling_ratio/max": 2.8800876140594482,
962
- "sampling/importance_sampling_ratio/mean": 0.9842413067817688,
963
- "sampling/importance_sampling_ratio/min": 0.3151380717754364,
964
- "sampling/sampling_logp_difference/max": 1.1866700649261475,
965
- "sampling/sampling_logp_difference/mean": 0.08438973873853683,
966
- "step": 29,
967
- "step_time": 9.375173230999735
968
- },
969
- {
970
- "clip_ratio/high_max": 0.0,
971
- "clip_ratio/high_mean": 0.0,
972
- "clip_ratio/low_mean": 0.0,
973
- "clip_ratio/low_min": 0.0,
974
- "clip_ratio/region_mean": 0.0,
975
- "completions/clipped_ratio": 0.0,
976
- "completions/max_length": 221.0,
977
- "completions/max_terminated_length": 221.0,
978
- "completions/mean_length": 131.859375,
979
- "completions/mean_terminated_length": 131.859375,
980
- "completions/min_length": 2.0,
981
- "completions/min_terminated_length": 2.0,
982
- "entropy": 0.1357386689633131,
983
- "epoch": 0.0012,
984
- "frac_reward_zero_std": 0.0,
985
- "grad_norm": 0.8130350112915039,
986
- "kl": 0.08997523193829693,
987
- "learning_rate": 8.285714285714287e-06,
988
- "loss": -0.0283,
989
- "num_tokens": 1027697.0,
990
- "reward": 13.44178295135498,
991
- "reward_std": 2.9926509857177734,
992
- "rewards/rollout_reward_func/mean": 13.441783905029297,
993
- "rewards/rollout_reward_func/std": 4.6520209312438965,
994
- "sampling/importance_sampling_ratio/max": 2.0554587841033936,
995
- "sampling/importance_sampling_ratio/mean": 0.9725006818771362,
996
- "sampling/importance_sampling_ratio/min": 0.0,
997
- "sampling/sampling_logp_difference/max": 1.2659821510314941,
998
- "sampling/sampling_logp_difference/mean": 0.05908963084220886,
999
- "step": 30,
1000
- "step_time": 7.3954712499999005
1001
- },
1002
- {
1003
- "clip_ratio/high_max": 0.046875000931322575,
1004
- "clip_ratio/high_mean": 0.011718750232830644,
1005
- "clip_ratio/low_mean": 0.0,
1006
- "clip_ratio/low_min": 0.0,
1007
- "clip_ratio/region_mean": 0.011718750232830644,
1008
- "completions/clipped_ratio": 0.0,
1009
- "completions/max_length": 222.0,
1010
- "completions/max_terminated_length": 222.0,
1011
- "completions/mean_length": 145.84375,
1012
- "completions/mean_terminated_length": 145.84375,
1013
- "completions/min_length": 94.0,
1014
- "completions/min_terminated_length": 94.0,
1015
- "entropy": 0.16315596690401435,
1016
- "epoch": 0.00124,
1017
- "frac_reward_zero_std": 0.0,
1018
- "grad_norm": 0.6934608221054077,
1019
- "kl": 0.1859159953892231,
1020
- "learning_rate": 8.571428571428571e-06,
1021
- "loss": 0.0146,
1022
- "num_tokens": 1063055.0,
1023
- "reward": 18.718799591064453,
1024
- "reward_std": 4.135279655456543,
1025
- "rewards/rollout_reward_func/mean": 18.718799591064453,
1026
- "rewards/rollout_reward_func/std": 7.419597148895264,
1027
- "sampling/importance_sampling_ratio/max": 2.9382729530334473,
1028
- "sampling/importance_sampling_ratio/mean": 1.0185017585754395,
1029
- "sampling/importance_sampling_ratio/min": 0.15695802867412567,
1030
- "sampling/sampling_logp_difference/max": 1.3760042190551758,
1031
- "sampling/sampling_logp_difference/mean": 0.10743667185306549,
1032
- "step": 31,
1033
- "step_time": 7.8455955710001035
1034
  }
1035
  ],
1036
  "logging_steps": 1.0,
1037
  "max_steps": 600,
1038
- "num_input_tokens_seen": 1063055,
1039
  "num_train_epochs": 1,
1040
  "save_steps": 500,
1041
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.00036,
6
  "eval_steps": 500,
7
+ "global_step": 9,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
+ "completions/max_length": 1073.0,
20
+ "completions/max_terminated_length": 1073.0,
21
+ "completions/mean_length": 927.5625,
22
+ "completions/mean_terminated_length": 927.5625,
23
+ "completions/min_length": 289.0,
24
+ "completions/min_terminated_length": 289.0,
25
+ "entropy": 0.575973030179739,
26
  "epoch": 4e-05,
27
+ "frac_reward_zero_std": 0.0,
28
+ "grad_norm": 0.9458226561546326,
29
  "kl": 0.0,
30
  "learning_rate": 0.0,
31
+ "loss": -0.0108,
32
+ "num_tokens": 129845.0,
33
+ "reward": -0.488802433013916,
34
+ "reward_std": 0.18876618146896362,
35
+ "rewards/rollout_reward_func/mean": -0.488802433013916,
36
+ "rewards/rollout_reward_func/std": 0.1920391470193863,
37
+ "sampling/importance_sampling_ratio/max": 1.2831777334213257,
38
+ "sampling/importance_sampling_ratio/mean": 1.0011134147644043,
39
+ "sampling/importance_sampling_ratio/min": 0.7999967932701111,
40
+ "sampling/sampling_logp_difference/max": 0.24933958053588867,
41
+ "sampling/sampling_logp_difference/mean": 0.01707676611840725,
42
  "step": 1,
43
+ "step_time": 18.75738557600016
44
  },
45
  {
46
  "clip_ratio/high_max": 0.0,
 
49
  "clip_ratio/low_min": 0.0,
50
  "clip_ratio/region_mean": 0.0,
51
  "completions/clipped_ratio": 0.0,
52
+ "completions/max_length": 1054.0,
53
+ "completions/max_terminated_length": 1054.0,
54
+ "completions/mean_length": 954.1875,
55
+ "completions/mean_terminated_length": 954.1875,
56
+ "completions/min_length": 691.0,
57
+ "completions/min_terminated_length": 691.0,
58
+ "entropy": 0.5630898363888264,
59
  "epoch": 8e-05,
60
+ "frac_reward_zero_std": 0.0,
61
+ "grad_norm": 0.8123396039009094,
62
  "kl": 0.0,
63
  "learning_rate": 2.8571428571428575e-07,
64
+ "loss": -0.0021,
65
+ "num_tokens": 261450.0,
66
+ "reward": -0.4951375424861908,
67
+ "reward_std": 0.18179327249526978,
68
+ "rewards/rollout_reward_func/mean": -0.4951375424861908,
69
+ "rewards/rollout_reward_func/std": 0.18824981153011322,
70
+ "sampling/importance_sampling_ratio/max": 1.2119427919387817,
71
+ "sampling/importance_sampling_ratio/mean": 1.002686858177185,
72
+ "sampling/importance_sampling_ratio/min": 0.8387882113456726,
73
+ "sampling/sampling_logp_difference/max": 0.19222474098205566,
74
+ "sampling/sampling_logp_difference/mean": 0.01698336750268936,
75
  "step": 2,
76
+ "step_time": 18.354596549000235
77
  },
78
  {
79
  "clip_ratio/high_max": 0.0,
 
82
  "clip_ratio/low_min": 0.0,
83
  "clip_ratio/region_mean": 0.0,
84
  "completions/clipped_ratio": 0.0,
85
+ "completions/max_length": 1039.0,
86
+ "completions/max_terminated_length": 1039.0,
87
+ "completions/mean_length": 948.046875,
88
+ "completions/mean_terminated_length": 948.046875,
89
+ "completions/min_length": 711.0,
90
+ "completions/min_terminated_length": 711.0,
91
+ "entropy": 0.567595299333334,
92
  "epoch": 0.00012,
93
+ "frac_reward_zero_std": 0.0,
94
+ "grad_norm": 0.7247381210327148,
95
+ "kl": 0.0005365689157770248,
96
  "learning_rate": 5.714285714285715e-07,
97
+ "loss": 0.0017,
98
+ "num_tokens": 395514.0,
99
+ "reward": -0.5213372707366943,
100
+ "reward_std": 0.18358027935028076,
101
+ "rewards/rollout_reward_func/mean": -0.5213372707366943,
102
+ "rewards/rollout_reward_func/std": 0.1988830268383026,
103
+ "sampling/importance_sampling_ratio/max": 1.2515581846237183,
104
+ "sampling/importance_sampling_ratio/mean": 1.0002529621124268,
105
+ "sampling/importance_sampling_ratio/min": 0.8503696918487549,
106
+ "sampling/sampling_logp_difference/max": 0.22438931465148926,
107
+ "sampling/sampling_logp_difference/mean": 0.017911788076162338,
108
  "step": 3,
109
+ "step_time": 21.38302714500037
110
  },
111
  {
112
+ "clip_ratio/high_max": 0.0062500000931322575,
113
+ "clip_ratio/high_mean": 0.0015625000232830644,
114
+ "clip_ratio/low_mean": 0.0015625000232830644,
115
  "clip_ratio/low_min": 0.0,
116
+ "clip_ratio/region_mean": 0.0031250000465661287,
117
  "completions/clipped_ratio": 0.0,
118
+ "completions/max_length": 1031.0,
119
+ "completions/max_terminated_length": 1031.0,
120
+ "completions/mean_length": 936.15625,
121
+ "completions/mean_terminated_length": 936.15625,
122
+ "completions/min_length": 653.0,
123
+ "completions/min_terminated_length": 653.0,
124
+ "entropy": 0.5737008973956108,
125
  "epoch": 0.00016,
126
+ "frac_reward_zero_std": 0.0,
127
+ "grad_norm": 0.7101178765296936,
128
+ "kl": 0.0005989186938677449,
129
  "learning_rate": 8.571428571428572e-07,
130
+ "loss": -0.0103,
131
+ "num_tokens": 532318.0,
132
+ "reward": -0.4770505428314209,
133
+ "reward_std": 0.20475129783153534,
134
+ "rewards/rollout_reward_func/mean": -0.4770505130290985,
135
+ "rewards/rollout_reward_func/std": 0.21575571596622467,
136
+ "sampling/importance_sampling_ratio/max": 1.1857075691223145,
137
+ "sampling/importance_sampling_ratio/mean": 1.0007572174072266,
138
+ "sampling/importance_sampling_ratio/min": 0.7442525625228882,
139
+ "sampling/sampling_logp_difference/max": 0.29537487030029297,
140
+ "sampling/sampling_logp_difference/mean": 0.018634025007486343,
141
  "step": 4,
142
+ "step_time": 21.295585246000428
143
  },
144
  {
145
+ "clip_ratio/high_max": 0.0052083334885537624,
146
+ "clip_ratio/high_mean": 0.0013020833721384406,
147
  "clip_ratio/low_mean": 0.0,
148
  "clip_ratio/low_min": 0.0,
149
+ "clip_ratio/region_mean": 0.0013020833721384406,
150
  "completions/clipped_ratio": 0.0,
151
+ "completions/max_length": 1354.0,
152
+ "completions/max_terminated_length": 1354.0,
153
+ "completions/mean_length": 1222.15625,
154
+ "completions/mean_terminated_length": 1222.15625,
155
+ "completions/min_length": 868.0,
156
+ "completions/min_terminated_length": 868.0,
157
+ "entropy": 0.5727464407682419,
158
  "epoch": 0.0002,
159
+ "frac_reward_zero_std": 0.0,
160
+ "grad_norm": 0.7757085561752319,
161
+ "kl": 0.0007553649911642424,
162
  "learning_rate": 1.142857142857143e-06,
163
+ "loss": -0.0049,
164
+ "num_tokens": 685325.0,
165
+ "reward": -0.4957820177078247,
166
+ "reward_std": 0.15593528747558594,
167
+ "rewards/rollout_reward_func/mean": -0.4957820177078247,
168
+ "rewards/rollout_reward_func/std": 0.15906599164009094,
169
+ "sampling/importance_sampling_ratio/max": 1.3013360500335693,
170
+ "sampling/importance_sampling_ratio/mean": 0.9994803667068481,
171
+ "sampling/importance_sampling_ratio/min": 0.7946972846984863,
172
+ "sampling/sampling_logp_difference/max": 0.26339149475097656,
173
+ "sampling/sampling_logp_difference/mean": 0.017627151682972908,
174
  "step": 5,
175
+ "step_time": 28.664589129999968
176
  },
177
  {
178
+ "clip_ratio/high_max": 0.0052083334885537624,
179
+ "clip_ratio/high_mean": 0.0013020833721384406,
180
+ "clip_ratio/low_mean": 0.0,
181
  "clip_ratio/low_min": 0.0,
182
+ "clip_ratio/region_mean": 0.0013020833721384406,
183
  "completions/clipped_ratio": 0.0,
184
+ "completions/max_length": 1351.0,
185
+ "completions/max_terminated_length": 1351.0,
186
+ "completions/mean_length": 1219.46875,
187
+ "completions/mean_terminated_length": 1219.46875,
188
+ "completions/min_length": 1012.0,
189
+ "completions/min_terminated_length": 1012.0,
190
+ "entropy": 0.6081138551235199,
191
  "epoch": 0.00024,
192
+ "frac_reward_zero_std": 0.0,
193
+ "grad_norm": 0.6916493773460388,
194
+ "kl": 0.0005982896527712,
195
  "learning_rate": 1.4285714285714286e-06,
196
+ "loss": -0.0088,
197
+ "num_tokens": 840964.0,
198
+ "reward": -0.5021334886550903,
199
+ "reward_std": 0.17595317959785461,
200
+ "rewards/rollout_reward_func/mean": -0.5021334886550903,
201
+ "rewards/rollout_reward_func/std": 0.17878760397434235,
202
+ "sampling/importance_sampling_ratio/max": 1.214083194732666,
203
+ "sampling/importance_sampling_ratio/mean": 1.0006656646728516,
204
+ "sampling/importance_sampling_ratio/min": 0.8280104994773865,
205
+ "sampling/sampling_logp_difference/max": 0.19398927688598633,
206
+ "sampling/sampling_logp_difference/mean": 0.018302714452147484,
207
  "step": 6,
208
+ "step_time": 30.956883577000326
209
  },
210
  {
211
  "clip_ratio/high_max": 0.0,
 
213
  "clip_ratio/low_mean": 0.0,
214
  "clip_ratio/low_min": 0.0,
215
  "clip_ratio/region_mean": 0.0,
216
+ "completions/clipped_ratio": 0.015625,
217
+ "completions/max_length": 1358.0,
218
+ "completions/max_terminated_length": 1358.0,
219
+ "completions/mean_length": 1176.265625,
220
+ "completions/mean_terminated_length": 1175.71435546875,
221
+ "completions/min_length": 289.0,
222
+ "completions/min_terminated_length": 289.0,
223
+ "entropy": 0.5972259026020765,
224
  "epoch": 0.00028,
225
+ "frac_reward_zero_std": 0.0,
226
+ "grad_norm": 0.7177211046218872,
227
+ "kl": 0.0005861011686647544,
228
  "learning_rate": 1.7142857142857145e-06,
229
+ "loss": -0.0394,
230
+ "num_tokens": 996609.0,
231
+ "reward": -0.492123544216156,
232
+ "reward_std": 0.16847285628318787,
233
+ "rewards/rollout_reward_func/mean": -0.492123544216156,
234
+ "rewards/rollout_reward_func/std": 0.18208079040050507,
235
+ "sampling/importance_sampling_ratio/max": 1.2812676429748535,
236
+ "sampling/importance_sampling_ratio/mean": 0.9978591203689575,
237
+ "sampling/importance_sampling_ratio/min": 0.7220999002456665,
238
+ "sampling/sampling_logp_difference/max": 0.3255918025970459,
239
+ "sampling/sampling_logp_difference/mean": 0.017380019649863243,
240
  "step": 7,
241
+ "step_time": 32.67134758100042
242
  },
243
  {
244
+ "clip_ratio/high_max": 0.0052083334885537624,
245
+ "clip_ratio/high_mean": 0.0013020833721384406,
246
  "clip_ratio/low_mean": 0.0,
247
  "clip_ratio/low_min": 0.0,
248
+ "clip_ratio/region_mean": 0.0013020833721384406,
249
  "completions/clipped_ratio": 0.0,
250
+ "completions/max_length": 1358.0,
251
+ "completions/max_terminated_length": 1358.0,
252
+ "completions/mean_length": 1221.671875,
253
+ "completions/mean_terminated_length": 1221.671875,
254
+ "completions/min_length": 790.0,
255
+ "completions/min_terminated_length": 790.0,
256
+ "entropy": 0.6069452427327633,
257
  "epoch": 0.00032,
258
+ "frac_reward_zero_std": 0.0,
259
+ "grad_norm": 0.7911893129348755,
260
+ "kl": 0.0007064663786877645,
261
  "learning_rate": 2.0000000000000003e-06,
262
+ "loss": 0.0074,
263
+ "num_tokens": 1151043.0,
264
+ "reward": -0.5468676686286926,
265
+ "reward_std": 0.17733421921730042,
266
+ "rewards/rollout_reward_func/mean": -0.5468676686286926,
267
+ "rewards/rollout_reward_func/std": 0.1824226826429367,
268
+ "sampling/importance_sampling_ratio/max": 1.1498843431472778,
269
+ "sampling/importance_sampling_ratio/mean": 0.9998562335968018,
270
+ "sampling/importance_sampling_ratio/min": 0.7584721446037292,
271
+ "sampling/sampling_logp_difference/max": 0.27644920349121094,
272
+ "sampling/sampling_logp_difference/mean": 0.01727226749062538,
273
  "step": 8,
274
+ "step_time": 33.65800914100009
275
  },
276
  {
277
+ "clip_ratio/high_max": 0.004464285913854837,
278
+ "clip_ratio/high_mean": 0.0011160714784637094,
279
+ "clip_ratio/low_mean": 0.0011160714784637094,
280
  "clip_ratio/low_min": 0.0,
281
+ "clip_ratio/region_mean": 0.0022321429569274187,
282
  "completions/clipped_ratio": 0.0,
283
+ "completions/max_length": 1576.0,
284
+ "completions/max_terminated_length": 1576.0,
285
+ "completions/mean_length": 1432.5625,
286
+ "completions/mean_terminated_length": 1432.5625,
287
+ "completions/min_length": 1167.0,
288
+ "completions/min_terminated_length": 1167.0,
289
+ "entropy": 0.6040426194667816,
290
  "epoch": 0.00036,
291
+ "frac_reward_zero_std": 0.0,
292
+ "grad_norm": 0.9153677821159363,
293
+ "kl": 0.0006420876543415943,
294
  "learning_rate": 2.285714285714286e-06,
295
+ "loss": 0.0259,
296
+ "num_tokens": 1321059.0,
297
+ "reward": -0.5375348925590515,
298
+ "reward_std": 0.19819244742393494,
299
+ "rewards/rollout_reward_func/mean": -0.5375348925590515,
300
+ "rewards/rollout_reward_func/std": 0.20910826325416565,
301
+ "sampling/importance_sampling_ratio/max": 1.264775037765503,
302
+ "sampling/importance_sampling_ratio/mean": 1.0003395080566406,
303
+ "sampling/importance_sampling_ratio/min": 0.7903153896331787,
304
+ "sampling/sampling_logp_difference/max": 0.23532319068908691,
305
+ "sampling/sampling_logp_difference/mean": 0.01806110143661499,
306
  "step": 9,
307
+ "step_time": 38.66699894299927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  }
309
  ],
310
  "logging_steps": 1.0,
311
  "max_steps": 600,
312
+ "num_input_tokens_seen": 1321059,
313
  "num_train_epochs": 1,
314
  "save_steps": 500,
315
  "stateful_callbacks": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a775c2b74232ad23885d216dd244a4a85eb0fae28310f94063736202117b7c8
3
  size 8145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8421f1a93a2c8cf9dd383121cf9ef8942a208c5e26428c5eb482d90b23265824
3
  size 8145