Training in progress, step 51600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1187,144 +1187,13 @@ You can finetune this model on your own dataset.
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
| 1190 |
-
<details><summary>Click to expand</summary>
|
| 1191 |
-
|
| 1192 |
| Epoch | Step | Training Loss |
|
| 1193 |
|:------:|:-----:|:-------------:|
|
| 1194 |
-
| 0.
|
| 1195 |
-
| 0.
|
| 1196 |
-
| 0.
|
| 1197 |
-
| 0.
|
| 1198 |
-
| 0.7961 | 45050 | 0.2596 |
|
| 1199 |
-
| 0.7969 | 45100 | 0.311 |
|
| 1200 |
-
| 0.7978 | 45150 | 0.2368 |
|
| 1201 |
-
| 0.7987 | 45200 | 0.3147 |
|
| 1202 |
-
| 0.7996 | 45250 | 0.2964 |
|
| 1203 |
-
| 0.8005 | 45300 | 0.34 |
|
| 1204 |
-
| 0.8014 | 45350 | 0.3249 |
|
| 1205 |
-
| 0.8022 | 45400 | 0.4092 |
|
| 1206 |
-
| 0.8031 | 45450 | 0.2601 |
|
| 1207 |
-
| 0.8040 | 45500 | 0.306 |
|
| 1208 |
-
| 0.8049 | 45550 | 0.2888 |
|
| 1209 |
-
| 0.8058 | 45600 | 0.3101 |
|
| 1210 |
-
| 0.8067 | 45650 | 0.3148 |
|
| 1211 |
-
| 0.8075 | 45700 | 0.3975 |
|
| 1212 |
-
| 0.8084 | 45750 | 0.391 |
|
| 1213 |
-
| 0.8093 | 45800 | 0.3055 |
|
| 1214 |
-
| 0.8102 | 45850 | 0.2434 |
|
| 1215 |
-
| 0.8111 | 45900 | 0.285 |
|
| 1216 |
-
| 0.8120 | 45950 | 0.3952 |
|
| 1217 |
-
| 0.8129 | 46000 | 0.2802 |
|
| 1218 |
-
| 0.8137 | 46050 | 0.2687 |
|
| 1219 |
-
| 0.8146 | 46100 | 0.2787 |
|
| 1220 |
-
| 0.8155 | 46150 | 0.2943 |
|
| 1221 |
-
| 0.8164 | 46200 | 0.3386 |
|
| 1222 |
-
| 0.8173 | 46250 | 0.3227 |
|
| 1223 |
-
| 0.8182 | 46300 | 0.2582 |
|
| 1224 |
-
| 0.8190 | 46350 | 0.285 |
|
| 1225 |
-
| 0.8199 | 46400 | 0.2989 |
|
| 1226 |
-
| 0.8208 | 46450 | 0.2761 |
|
| 1227 |
-
| 0.8217 | 46500 | 0.299 |
|
| 1228 |
-
| 0.8226 | 46550 | 0.2908 |
|
| 1229 |
-
| 0.8235 | 46600 | 0.3134 |
|
| 1230 |
-
| 0.8243 | 46650 | 0.2603 |
|
| 1231 |
-
| 0.8252 | 46700 | 0.2965 |
|
| 1232 |
-
| 0.8261 | 46750 | 0.2506 |
|
| 1233 |
-
| 0.8270 | 46800 | 0.2258 |
|
| 1234 |
-
| 0.8279 | 46850 | 0.2209 |
|
| 1235 |
-
| 0.8288 | 46900 | 0.2756 |
|
| 1236 |
-
| 0.8296 | 46950 | 0.3345 |
|
| 1237 |
-
| 0.8305 | 47000 | 0.3016 |
|
| 1238 |
-
| 0.8314 | 47050 | 0.2712 |
|
| 1239 |
-
| 0.8323 | 47100 | 0.3721 |
|
| 1240 |
-
| 0.8332 | 47150 | 0.3483 |
|
| 1241 |
-
| 0.8341 | 47200 | 0.3002 |
|
| 1242 |
-
| 0.8349 | 47250 | 0.2333 |
|
| 1243 |
-
| 0.8358 | 47300 | 0.3043 |
|
| 1244 |
-
| 0.8367 | 47350 | 0.2992 |
|
| 1245 |
-
| 0.8376 | 47400 | 0.3367 |
|
| 1246 |
-
| 0.8385 | 47450 | 0.3135 |
|
| 1247 |
-
| 0.8394 | 47500 | 0.2681 |
|
| 1248 |
-
| 0.8402 | 47550 | 0.2764 |
|
| 1249 |
-
| 0.8411 | 47600 | 0.3211 |
|
| 1250 |
-
| 0.8420 | 47650 | 0.3081 |
|
| 1251 |
-
| 0.8429 | 47700 | 0.2929 |
|
| 1252 |
-
| 0.8438 | 47750 | 0.3466 |
|
| 1253 |
-
| 0.8447 | 47800 | 0.3012 |
|
| 1254 |
-
| 0.8455 | 47850 | 0.2953 |
|
| 1255 |
-
| 0.8464 | 47900 | 0.2914 |
|
| 1256 |
-
| 0.8473 | 47950 | 0.3219 |
|
| 1257 |
-
| 0.8482 | 48000 | 0.3104 |
|
| 1258 |
-
| 0.8491 | 48050 | 0.3676 |
|
| 1259 |
-
| 0.8500 | 48100 | 0.2989 |
|
| 1260 |
-
| 0.8508 | 48150 | 0.3259 |
|
| 1261 |
-
| 0.8517 | 48200 | 0.2949 |
|
| 1262 |
-
| 0.8526 | 48250 | 0.3914 |
|
| 1263 |
-
| 0.8535 | 48300 | 0.2645 |
|
| 1264 |
-
| 0.8544 | 48350 | 0.3358 |
|
| 1265 |
-
| 0.8553 | 48400 | 0.2939 |
|
| 1266 |
-
| 0.8561 | 48450 | 0.4063 |
|
| 1267 |
-
| 0.8570 | 48500 | 0.3492 |
|
| 1268 |
-
| 0.8579 | 48550 | 0.2794 |
|
| 1269 |
-
| 0.8588 | 48600 | 0.2854 |
|
| 1270 |
-
| 0.8597 | 48650 | 0.2664 |
|
| 1271 |
-
| 0.8606 | 48700 | 0.3028 |
|
| 1272 |
-
| 0.8614 | 48750 | 0.2579 |
|
| 1273 |
-
| 0.8623 | 48800 | 0.3404 |
|
| 1274 |
-
| 0.8632 | 48850 | 0.3535 |
|
| 1275 |
-
| 0.8641 | 48900 | 0.2224 |
|
| 1276 |
-
| 0.8650 | 48950 | 0.2701 |
|
| 1277 |
-
| 0.8659 | 49000 | 0.2506 |
|
| 1278 |
-
| 0.8667 | 49050 | 0.3032 |
|
| 1279 |
-
| 0.8676 | 49100 | 0.3881 |
|
| 1280 |
-
| 0.8685 | 49150 | 0.253 |
|
| 1281 |
-
| 0.8694 | 49200 | 0.2827 |
|
| 1282 |
-
| 0.8703 | 49250 | 0.266 |
|
| 1283 |
-
| 0.8712 | 49300 | 0.3008 |
|
| 1284 |
-
| 0.8720 | 49350 | 0.3406 |
|
| 1285 |
-
| 0.8729 | 49400 | 0.3348 |
|
| 1286 |
-
| 0.8738 | 49450 | 0.3021 |
|
| 1287 |
-
| 0.8747 | 49500 | 0.3182 |
|
| 1288 |
-
| 0.8756 | 49550 | 0.3063 |
|
| 1289 |
-
| 0.8765 | 49600 | 0.2385 |
|
| 1290 |
-
| 0.8773 | 49650 | 0.3152 |
|
| 1291 |
-
| 0.8782 | 49700 | 0.267 |
|
| 1292 |
-
| 0.8791 | 49750 | 0.2599 |
|
| 1293 |
-
| 0.8800 | 49800 | 0.3226 |
|
| 1294 |
-
| 0.8809 | 49850 | 0.3516 |
|
| 1295 |
-
| 0.8818 | 49900 | 0.2912 |
|
| 1296 |
-
| 0.8826 | 49950 | 0.2853 |
|
| 1297 |
-
| 0.8835 | 50000 | 0.3212 |
|
| 1298 |
-
| 0.8844 | 50050 | 0.3399 |
|
| 1299 |
-
| 0.8853 | 50100 | 0.3104 |
|
| 1300 |
-
| 0.8862 | 50150 | 0.368 |
|
| 1301 |
-
| 0.8871 | 50200 | 0.2848 |
|
| 1302 |
-
| 0.8880 | 50250 | 0.4137 |
|
| 1303 |
-
| 0.8888 | 50300 | 0.3597 |
|
| 1304 |
-
| 0.8897 | 50350 | 0.3246 |
|
| 1305 |
-
| 0.8906 | 50400 | 0.2658 |
|
| 1306 |
-
| 0.8915 | 50450 | 0.2954 |
|
| 1307 |
-
| 0.8924 | 50500 | 0.3035 |
|
| 1308 |
-
| 0.8933 | 50550 | 0.2654 |
|
| 1309 |
-
| 0.8941 | 50600 | 0.2742 |
|
| 1310 |
-
| 0.8950 | 50650 | 0.3435 |
|
| 1311 |
-
| 0.8959 | 50700 | 0.2947 |
|
| 1312 |
-
| 0.8968 | 50750 | 0.3013 |
|
| 1313 |
-
| 0.8977 | 50800 | 0.3845 |
|
| 1314 |
-
| 0.8986 | 50850 | 0.2882 |
|
| 1315 |
-
| 0.8994 | 50900 | 0.3639 |
|
| 1316 |
-
| 0.9003 | 50950 | 0.2332 |
|
| 1317 |
-
| 0.9012 | 51000 | 0.3363 |
|
| 1318 |
-
| 0.9021 | 51050 | 0.2674 |
|
| 1319 |
-
| 0.9030 | 51100 | 0.331 |
|
| 1320 |
-
| 0.9039 | 51150 | 0.2881 |
|
| 1321 |
-
| 0.9047 | 51200 | 0.3553 |
|
| 1322 |
-
| 0.9056 | 51250 | 0.2936 |
|
| 1323 |
-
| 0.9065 | 51300 | 0.2987 |
|
| 1324 |
-
| 0.9074 | 51350 | 0.3343 |
|
| 1325 |
-
| 0.9083 | 51400 | 0.2919 |
|
| 1326 |
|
| 1327 |
-
</details>
|
| 1328 |
|
| 1329 |
### Framework Versions
|
| 1330 |
- Python: 3.11.13
|
|
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
|
|
|
|
|
|
| 1190 |
| Epoch | Step | Training Loss |
|
| 1191 |
|:------:|:-----:|:-------------:|
|
| 1192 |
+
| 0.9092 | 51450 | 0.2997 |
|
| 1193 |
+
| 0.9100 | 51500 | 0.2247 |
|
| 1194 |
+
| 0.9109 | 51550 | 0.2918 |
|
| 1195 |
+
| 0.9118 | 51600 | 0.3295 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1196 |
|
|
|
|
| 1197 |
|
| 1198 |
### Framework Versions
|
| 1199 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeecd8d9471d7a0b6627d01abbb6ab9354f23702be741b49535069a09a804f63
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33a86535350dee0d838aabde4505a86ae7f966c06b02b476617bf119348d925b
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a589e7cc1c3f0f337824a3d6a8bc421970447f40e56c215be58c6b1d6c52a5a0
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39475f4e00274ea95478c0aa6d264be5db3bd87ad877f934d4b5cd1f7f759e82
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0adae6525b0259ff389512f804b95631a5bbbd0e13a53824c06deb281fe18e7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7204,6 +7204,34 @@
|
|
| 7204 |
"learning_rate": 5.1167265516090395e-06,
|
| 7205 |
"loss": 0.2919,
|
| 7206 |
"step": 51400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7207 |
}
|
| 7208 |
],
|
| 7209 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.9118057641674471,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 51600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7204 |
"learning_rate": 5.1167265516090395e-06,
|
| 7205 |
"loss": 0.2919,
|
| 7206 |
"step": 51400
|
| 7207 |
+
},
|
| 7208 |
+
{
|
| 7209 |
+
"epoch": 0.9091551660157976,
|
| 7210 |
+
"grad_norm": 1.3142715692520142,
|
| 7211 |
+
"learning_rate": 5.067640533270503e-06,
|
| 7212 |
+
"loss": 0.2997,
|
| 7213 |
+
"step": 51450
|
| 7214 |
+
},
|
| 7215 |
+
{
|
| 7216 |
+
"epoch": 0.9100386987330141,
|
| 7217 |
+
"grad_norm": 1.3387079238891602,
|
| 7218 |
+
"learning_rate": 5.018554514931967e-06,
|
| 7219 |
+
"loss": 0.2247,
|
| 7220 |
+
"step": 51500
|
| 7221 |
+
},
|
| 7222 |
+
{
|
| 7223 |
+
"epoch": 0.9109222314502307,
|
| 7224 |
+
"grad_norm": 1.9581636190414429,
|
| 7225 |
+
"learning_rate": 4.96946849659343e-06,
|
| 7226 |
+
"loss": 0.2918,
|
| 7227 |
+
"step": 51550
|
| 7228 |
+
},
|
| 7229 |
+
{
|
| 7230 |
+
"epoch": 0.9118057641674471,
|
| 7231 |
+
"grad_norm": 1.3822007179260254,
|
| 7232 |
+
"learning_rate": 4.920382478254894e-06,
|
| 7233 |
+
"loss": 0.3295,
|
| 7234 |
+
"step": 51600
|
| 7235 |
}
|
| 7236 |
],
|
| 7237 |
"logging_steps": 50,
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5560
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b1eb037399f5df91069f9b474c7a40c3e0d7a309caabb274d665499c0377b52
|
| 3 |
size 5560
|