| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 150.0, | |
| "eval_steps": 14790, | |
| "global_step": 221850, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.06951820850372314, | |
| "learning_rate": 9.00054090601758e-06, | |
| "loss": 0.2989, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 6.212554454803467, | |
| "eval_runtime": 32.6367, | |
| "eval_samples_per_second": 297.671, | |
| "eval_steps_per_second": 6.772, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.0010786975035443902, | |
| "learning_rate": 8.001014198782963e-06, | |
| "loss": 0.0626, | |
| "step": 29580 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 6.155207633972168, | |
| "eval_runtime": 30.9179, | |
| "eval_samples_per_second": 314.22, | |
| "eval_steps_per_second": 7.148, | |
| "step": 29580 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.027268769219517708, | |
| "learning_rate": 7.001487491548344e-06, | |
| "loss": 0.0467, | |
| "step": 44370 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 6.024782180786133, | |
| "eval_runtime": 30.7841, | |
| "eval_samples_per_second": 315.585, | |
| "eval_steps_per_second": 7.179, | |
| "step": 44370 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 0.0007825565990060568, | |
| "learning_rate": 6.001893171061528e-06, | |
| "loss": 0.0383, | |
| "step": 59160 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 6.02599573135376, | |
| "eval_runtime": 30.7709, | |
| "eval_samples_per_second": 315.721, | |
| "eval_steps_per_second": 7.182, | |
| "step": 59160 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.002434077079108e-06, | |
| "loss": 0.0333, | |
| "step": 73950 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 5.985586643218994, | |
| "eval_runtime": 30.6402, | |
| "eval_samples_per_second": 317.068, | |
| "eval_steps_per_second": 7.213, | |
| "step": 73950 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 8.283340866910294e-05, | |
| "learning_rate": 4.002772143340095e-06, | |
| "loss": 0.0301, | |
| "step": 88740 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_loss": 5.848947525024414, | |
| "eval_runtime": 30.8176, | |
| "eval_samples_per_second": 315.242, | |
| "eval_steps_per_second": 7.171, | |
| "step": 88740 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "grad_norm": 0.0011442904360592365, | |
| "learning_rate": 3.0033130493576744e-06, | |
| "loss": 0.0275, | |
| "step": 103530 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_loss": 5.8452348709106445, | |
| "eval_runtime": 32.9595, | |
| "eval_samples_per_second": 294.756, | |
| "eval_steps_per_second": 6.705, | |
| "step": 103530 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 0.061361562460660934, | |
| "learning_rate": 2.0037863421230565e-06, | |
| "loss": 0.0255, | |
| "step": 118320 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_loss": 5.728600978851318, | |
| "eval_runtime": 31.4319, | |
| "eval_samples_per_second": 309.081, | |
| "eval_steps_per_second": 7.031, | |
| "step": 118320 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "grad_norm": 0.0459863506257534, | |
| "learning_rate": 1.0042596348884382e-06, | |
| "loss": 0.0238, | |
| "step": 133110 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_loss": 5.677281856536865, | |
| "eval_runtime": 30.8915, | |
| "eval_samples_per_second": 314.488, | |
| "eval_steps_per_second": 7.154, | |
| "step": 133110 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.732927653820149e-09, | |
| "loss": 0.0225, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 5.574370384216309, | |
| "eval_runtime": 30.8147, | |
| "eval_samples_per_second": 315.271, | |
| "eval_steps_per_second": 7.172, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 110.0, | |
| "grad_norm": 0.292227566242218, | |
| "learning_rate": 2.6701825557809337e-06, | |
| "loss": 0.0243, | |
| "step": 162690 | |
| }, | |
| { | |
| "epoch": 110.0, | |
| "eval_loss": 5.654893398284912, | |
| "eval_runtime": 30.718, | |
| "eval_samples_per_second": 316.265, | |
| "eval_steps_per_second": 7.194, | |
| "step": 162690 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "grad_norm": 0.000675542454700917, | |
| "learning_rate": 2.0038314176245213e-06, | |
| "loss": 0.024, | |
| "step": 177480 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "eval_loss": 5.59971284866333, | |
| "eval_runtime": 30.8062, | |
| "eval_samples_per_second": 315.359, | |
| "eval_steps_per_second": 7.174, | |
| "step": 177480 | |
| }, | |
| { | |
| "epoch": 130.0, | |
| "grad_norm": 0.00018167876987718046, | |
| "learning_rate": 1.3374802794681092e-06, | |
| "loss": 0.0227, | |
| "step": 192270 | |
| }, | |
| { | |
| "epoch": 130.0, | |
| "eval_loss": 5.560417175292969, | |
| "eval_runtime": 30.9576, | |
| "eval_samples_per_second": 313.816, | |
| "eval_steps_per_second": 7.139, | |
| "step": 192270 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "grad_norm": 0.0007145697018131614, | |
| "learning_rate": 6.711291413116972e-07, | |
| "loss": 0.0219, | |
| "step": 207060 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "eval_loss": 5.54286527633667, | |
| "eval_runtime": 31.0413, | |
| "eval_samples_per_second": 312.97, | |
| "eval_steps_per_second": 7.12, | |
| "step": 207060 | |
| }, | |
| { | |
| "epoch": 150.0, | |
| "grad_norm": 0.0009066470083780587, | |
| "learning_rate": 4.778003155285103e-09, | |
| "loss": 0.0217, | |
| "step": 221850 | |
| }, | |
| { | |
| "epoch": 150.0, | |
| "eval_loss": 5.543883323669434, | |
| "eval_runtime": 30.764, | |
| "eval_samples_per_second": 315.791, | |
| "eval_steps_per_second": 7.184, | |
| "step": 221850 | |
| }, | |
| { | |
| "epoch": 150.0, | |
| "step": 221850, | |
| "total_flos": 3.5039444859207936e+18, | |
| "train_loss": 0.007636452434781384, | |
| "train_runtime": 24973.9811, | |
| "train_samples_per_second": 355.144, | |
| "train_steps_per_second": 8.883 | |
| } | |
| ], | |
| "logging_steps": 14790, | |
| "max_steps": 221850, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 150, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.5039444859207936e+18, | |
| "train_batch_size": 40, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |