| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 1689, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0002, |
| "loss": 0.7526, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_f1": 0.7867768595041322, |
| "eval_loss": 0.501287043094635, |
| "eval_runtime": 111.669, |
| "eval_samples_per_second": 8.955, |
| "eval_steps_per_second": 1.119, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_f1": 0.731527093596059, |
| "eval_loss": 0.45942792296409607, |
| "eval_runtime": 14.4836, |
| "eval_samples_per_second": 69.044, |
| "eval_steps_per_second": 8.63, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_f1": 0.768503937007874, |
| "eval_loss": 0.6004758477210999, |
| "eval_runtime": 13.7722, |
| "eval_samples_per_second": 72.61, |
| "eval_steps_per_second": 9.076, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_f1": 0.8773784355179703, |
| "eval_loss": 0.293561190366745, |
| "eval_runtime": 16.0256, |
| "eval_samples_per_second": 62.4, |
| "eval_steps_per_second": 7.8, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.43, |
| "eval_f1": 0.8644444444444445, |
| "eval_loss": 0.3103949725627899, |
| "eval_runtime": 13.1537, |
| "eval_samples_per_second": 76.024, |
| "eval_steps_per_second": 9.503, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_f1": 0.903353057199211, |
| "eval_loss": 0.2314113825559616, |
| "eval_runtime": 13.9609, |
| "eval_samples_per_second": 71.629, |
| "eval_steps_per_second": 8.954, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_f1": 0.904950495049505, |
| "eval_loss": 0.23260511457920074, |
| "eval_runtime": 10.3047, |
| "eval_samples_per_second": 97.043, |
| "eval_steps_per_second": 12.13, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_f1": 0.8210526315789474, |
| "eval_loss": 0.39369526505470276, |
| "eval_runtime": 13.155, |
| "eval_samples_per_second": 76.017, |
| "eval_steps_per_second": 9.502, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_f1": 0.9147609147609148, |
| "eval_loss": 0.20550310611724854, |
| "eval_runtime": 45.6928, |
| "eval_samples_per_second": 21.885, |
| "eval_steps_per_second": 2.736, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_f1": 0.8367816091954022, |
| "eval_loss": 0.3017582893371582, |
| "eval_runtime": 22.6976, |
| "eval_samples_per_second": 44.057, |
| "eval_steps_per_second": 5.507, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.94, |
| "eval_f1": 0.9271653543307087, |
| "eval_loss": 0.19784249365329742, |
| "eval_runtime": 12.076, |
| "eval_samples_per_second": 82.809, |
| "eval_steps_per_second": 10.351, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_f1": 0.9368104312938816, |
| "eval_loss": 0.17989766597747803, |
| "eval_runtime": 49.8486, |
| "eval_samples_per_second": 20.061, |
| "eval_steps_per_second": 2.508, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_f1": 0.895424836601307, |
| "eval_loss": 0.23422278463840485, |
| "eval_runtime": 11.6376, |
| "eval_samples_per_second": 85.928, |
| "eval_steps_per_second": 10.741, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_f1": 0.9256360078277887, |
| "eval_loss": 0.22150222957134247, |
| "eval_runtime": 11.4494, |
| "eval_samples_per_second": 87.341, |
| "eval_steps_per_second": 10.918, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_f1": 0.9051724137931034, |
| "eval_loss": 0.22654065489768982, |
| "eval_runtime": 11.6176, |
| "eval_samples_per_second": 86.076, |
| "eval_steps_per_second": 10.76, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_f1": 0.9389389389389389, |
| "eval_loss": 0.1583857387304306, |
| "eval_runtime": 15.3861, |
| "eval_samples_per_second": 64.994, |
| "eval_steps_per_second": 8.124, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_f1": 0.921487603305785, |
| "eval_loss": 0.22388193011283875, |
| "eval_runtime": 14.1559, |
| "eval_samples_per_second": 70.642, |
| "eval_steps_per_second": 8.83, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_f1": 0.9282868525896415, |
| "eval_loss": 0.1805724799633026, |
| "eval_runtime": 11.7527, |
| "eval_samples_per_second": 85.087, |
| "eval_steps_per_second": 10.636, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_f1": 0.8998899889988998, |
| "eval_loss": 0.22186490893363953, |
| "eval_runtime": 11.5055, |
| "eval_samples_per_second": 86.915, |
| "eval_steps_per_second": 10.864, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_f1": 0.9466263846928499, |
| "eval_loss": 0.15720757842063904, |
| "eval_runtime": 11.3153, |
| "eval_samples_per_second": 88.376, |
| "eval_steps_per_second": 11.047, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_f1": 0.9457523029682703, |
| "eval_loss": 0.15362103283405304, |
| "eval_runtime": 11.6626, |
| "eval_samples_per_second": 85.744, |
| "eval_steps_per_second": 10.718, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.88, |
| "eval_f1": 0.9403714565004888, |
| "eval_loss": 0.15603256225585938, |
| "eval_runtime": 11.4304, |
| "eval_samples_per_second": 87.486, |
| "eval_steps_per_second": 10.936, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_f1": 0.9567901234567902, |
| "eval_loss": 0.13539032638072968, |
| "eval_runtime": 11.6426, |
| "eval_samples_per_second": 85.891, |
| "eval_steps_per_second": 10.736, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.714031971580817e-05, |
| "loss": 0.2638, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.05, |
| "eval_f1": 0.9402390438247011, |
| "eval_loss": 0.2028977870941162, |
| "eval_runtime": 11.4905, |
| "eval_samples_per_second": 87.029, |
| "eval_steps_per_second": 10.879, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.13, |
| "eval_f1": 0.9477911646586347, |
| "eval_loss": 0.158633291721344, |
| "eval_runtime": 11.6696, |
| "eval_samples_per_second": 85.693, |
| "eval_steps_per_second": 10.712, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.22, |
| "eval_f1": 0.9363920750782065, |
| "eval_loss": 0.16604219377040863, |
| "eval_runtime": 11.6366, |
| "eval_samples_per_second": 85.936, |
| "eval_steps_per_second": 10.742, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_f1": 0.946611909650924, |
| "eval_loss": 0.16741609573364258, |
| "eval_runtime": 11.5595, |
| "eval_samples_per_second": 86.509, |
| "eval_steps_per_second": 10.814, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.39, |
| "eval_f1": 0.9566094853683148, |
| "eval_loss": 0.15039804577827454, |
| "eval_runtime": 11.5775, |
| "eval_samples_per_second": 86.374, |
| "eval_steps_per_second": 10.797, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.47, |
| "eval_f1": 0.9386892177589852, |
| "eval_loss": 0.1896335780620575, |
| "eval_runtime": 11.4785, |
| "eval_samples_per_second": 87.12, |
| "eval_steps_per_second": 10.89, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_f1": 0.9516129032258065, |
| "eval_loss": 0.1552370935678482, |
| "eval_runtime": 11.6346, |
| "eval_samples_per_second": 85.951, |
| "eval_steps_per_second": 10.744, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.64, |
| "eval_f1": 0.9494949494949494, |
| "eval_loss": 0.17198829352855682, |
| "eval_runtime": 11.4094, |
| "eval_samples_per_second": 87.647, |
| "eval_steps_per_second": 10.956, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.73, |
| "eval_f1": 0.9596774193548386, |
| "eval_loss": 0.14362381398677826, |
| "eval_runtime": 13.3792, |
| "eval_samples_per_second": 74.743, |
| "eval_steps_per_second": 9.343, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.81, |
| "eval_f1": 0.9530469530469531, |
| "eval_loss": 0.15035580098628998, |
| "eval_runtime": 12.9698, |
| "eval_samples_per_second": 77.102, |
| "eval_steps_per_second": 9.638, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.9, |
| "eval_f1": 0.9538461538461538, |
| "eval_loss": 0.1516994833946228, |
| "eval_runtime": 11.9429, |
| "eval_samples_per_second": 83.732, |
| "eval_steps_per_second": 10.466, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.98, |
| "eval_f1": 0.9538461538461538, |
| "eval_loss": 0.15105971693992615, |
| "eval_runtime": 13.9387, |
| "eval_samples_per_second": 71.743, |
| "eval_steps_per_second": 8.968, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1689, |
| "total_flos": 4.178103906557952e+18, |
| "train_loss": 0.20243481533847807, |
| "train_runtime": 2207.1281, |
| "train_samples_per_second": 24.466, |
| "train_steps_per_second": 0.765 |
| } |
| ], |
| "max_steps": 1689, |
| "num_train_epochs": 3, |
| "total_flos": 4.178103906557952e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|