| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9883495145631067, | |
| "eval_steps": 500, | |
| "global_step": 128, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.8233, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.9878, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2e-05, | |
| "loss": 1.9236, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.999429490929718e-05, | |
| "loss": 2.0953, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9977186146800707e-05, | |
| "loss": 2.0993, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.994869323391895e-05, | |
| "loss": 2.1089, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.990884868158239e-05, | |
| "loss": 2.1726, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.985769795314804e-05, | |
| "loss": 2.4714, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9795299412524948e-05, | |
| "loss": 1.8168, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9721724257579907e-05, | |
| "loss": 1.8571, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.963705643889941e-05, | |
| "loss": 1.7876, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.954139256400049e-05, | |
| "loss": 2.0869, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.9434841787099804e-05, | |
| "loss": 2.1225, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.9317525684566686e-05, | |
| "loss": 2.2122, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.918957811620231e-05, | |
| "loss": 2.1554, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.9051145072503216e-05, | |
| "loss": 2.5127, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.8902384508083518e-05, | |
| "loss": 1.801, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.8743466161445823e-05, | |
| "loss": 1.938, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.857457136130651e-05, | |
| "loss": 2.0655, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.839589281969639e-05, | |
| "loss": 1.9769, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.8207634412072765e-05, | |
| "loss": 2.1012, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.8010010944693846e-05, | |
| "loss": 2.12, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.780324790952092e-05, | |
| "loss": 2.0332, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.758758122692791e-05, | |
| "loss": 2.3212, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.7363256976511972e-05, | |
| "loss": 1.8738, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.7130531116312202e-05, | |
| "loss": 1.9489, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.688966919075687e-05, | |
| "loss": 1.9085, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.6640946027672395e-05, | |
| "loss": 1.8755, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.6384645424699835e-05, | |
| "loss": 2.1313, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.612105982547663e-05, | |
| "loss": 1.938, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.5850489985953076e-05, | |
| "loss": 2.1761, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.5573244631224364e-05, | |
| "loss": 2.1247, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 1.5289640103269626e-05, | |
| "loss": 1.8631, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 1.8933, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.4704654806027558e-05, | |
| "loss": 1.7232, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.4403941515576344e-05, | |
| "loss": 1.9163, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.4098203247965876e-05, | |
| "loss": 2.0748, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.3787788856105762e-05, | |
| "loss": 1.9534, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.3473052528448203e-05, | |
| "loss": 2.0542, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.3154353384852559e-05, | |
| "loss": 1.9958, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.283205506682304e-05, | |
| "loss": 1.7535, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.2506525322587207e-05, | |
| "loss": 1.8274, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.2178135587488515e-05, | |
| "loss": 1.8489, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.1847260560171895e-05, | |
| "loss": 1.8733, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.1514277775045768e-05, | |
| "loss": 1.9922, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.1179567171508463e-05, | |
| "loss": 2.0302, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.0843510660430447e-05, | |
| "loss": 1.8805, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.0506491688387128e-05, | |
| "loss": 2.1553, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.0168894800139311e-05, | |
| "loss": 1.9718, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.83110519986069e-06, | |
| "loss": 1.8164, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 9.493508311612874e-06, | |
| "loss": 1.8331, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.156489339569555e-06, | |
| "loss": 1.9306, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.820432828491542e-06, | |
| "loss": 1.9804, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.485722224954237e-06, | |
| "loss": 2.1306, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 8.15273943982811e-06, | |
| "loss": 2.1067, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 7.821864412511485e-06, | |
| "loss": 1.9919, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 7.493474677412795e-06, | |
| "loss": 1.9345, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 7.16794493317696e-06, | |
| "loss": 1.9166, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 6.845646615147445e-06, | |
| "loss": 1.998, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 6.526947471551799e-06, | |
| "loss": 1.8984, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 6.21221114389424e-06, | |
| "loss": 1.9596, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 5.901796752034128e-06, | |
| "loss": 1.8888, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 5.5960584844236565e-06, | |
| "loss": 2.1129, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 5.295345193972445e-06, | |
| "loss": 2.0109, | |
| "step": 128 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 192, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 2.4420751512354816e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |