| { | |
| "best_metric": 88.58308022828253, | |
| "best_model_checkpoint": "output/bert/checkpoint-20500", | |
| "epoch": 1.8525212362190493, | |
| "global_step": 20500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.9322248328212544e-05, | |
| "loss": 2.6168, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_exact_match": 60.0, | |
| "eval_f1": 71.21519064840525, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.8644496656425085e-05, | |
| "loss": 1.6041, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_exact_match": 67.11447492904446, | |
| "eval_f1": 77.66743510634613, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.7966744984637632e-05, | |
| "loss": 1.4496, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_exact_match": 69.66887417218543, | |
| "eval_f1": 79.43837393741833, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.7288993312850172e-05, | |
| "loss": 1.3674, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_exact_match": 72.5922421948912, | |
| "eval_f1": 81.81276872830003, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.6611241641062716e-05, | |
| "loss": 1.3504, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_exact_match": 73.52885525070955, | |
| "eval_f1": 82.34072045279031, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.5933489969275256e-05, | |
| "loss": 1.3002, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_exact_match": 74.13434247871334, | |
| "eval_f1": 83.19942219951206, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 2.5255738297487804e-05, | |
| "loss": 1.2499, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_exact_match": 74.12488174077578, | |
| "eval_f1": 83.16211440023876, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.4577986625700344e-05, | |
| "loss": 1.1953, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_exact_match": 75.37369914853359, | |
| "eval_f1": 84.23388802584633, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.3900234953912888e-05, | |
| "loss": 1.1952, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_exact_match": 76.14001892147587, | |
| "eval_f1": 84.76099418267141, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.3222483282125428e-05, | |
| "loss": 1.2084, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_exact_match": 77.11447492904446, | |
| "eval_f1": 85.29225320990346, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.2544731610337975e-05, | |
| "loss": 1.1546, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_exact_match": 76.65089877010406, | |
| "eval_f1": 85.47014269865286, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.1866979938550515e-05, | |
| "loss": 1.1018, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_exact_match": 76.9441816461684, | |
| "eval_f1": 85.33429382849097, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.118922826676306e-05, | |
| "loss": 1.0937, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_exact_match": 77.07663197729423, | |
| "eval_f1": 85.52813865025963, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.05114765949756e-05, | |
| "loss": 1.0422, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_exact_match": 78.0794701986755, | |
| "eval_f1": 86.03681982738262, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.9833724923188147e-05, | |
| "loss": 1.116, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_exact_match": 77.95648060548723, | |
| "eval_f1": 86.26399611648696, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.9155973251400687e-05, | |
| "loss": 1.1176, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_exact_match": 78.59035004730369, | |
| "eval_f1": 86.54207970028193, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.847822157961323e-05, | |
| "loss": 1.1029, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_exact_match": 78.66603595080416, | |
| "eval_f1": 86.56865525427538, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.780046990782577e-05, | |
| "loss": 1.0594, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_exact_match": 78.9120151371807, | |
| "eval_f1": 86.80153948101524, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.7122718236038318e-05, | |
| "loss": 1.0266, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_exact_match": 78.90255439924314, | |
| "eval_f1": 86.69867179951433, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.644496656425086e-05, | |
| "loss": 1.063, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_exact_match": 79.5837275307474, | |
| "eval_f1": 87.50572394546504, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5767214892463402e-05, | |
| "loss": 1.0353, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_exact_match": 80.10406811731315, | |
| "eval_f1": 87.58460117305698, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.5089463220675944e-05, | |
| "loss": 1.0198, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_exact_match": 80.1135288552507, | |
| "eval_f1": 87.63553208656376, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.4411711548888486e-05, | |
| "loss": 0.7558, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_exact_match": 80.00946073793756, | |
| "eval_f1": 87.60930859917372, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 1.373395987710103e-05, | |
| "loss": 0.7337, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_exact_match": 79.92431409649953, | |
| "eval_f1": 87.57399698225302, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.3056208205313572e-05, | |
| "loss": 0.6884, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_exact_match": 80.2554399243141, | |
| "eval_f1": 87.80990616116381, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.2378456533526116e-05, | |
| "loss": 0.7082, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_exact_match": 80.17975402081362, | |
| "eval_f1": 87.80432576834488, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.1700704861738658e-05, | |
| "loss": 0.6914, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_exact_match": 79.87701040681173, | |
| "eval_f1": 87.89607002450978, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.1022953189951202e-05, | |
| "loss": 0.7051, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_exact_match": 79.80132450331126, | |
| "eval_f1": 87.69384875801946, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.0345201518163744e-05, | |
| "loss": 0.7302, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_exact_match": 80.2081362346263, | |
| "eval_f1": 88.06697491299282, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 9.667449846376287e-06, | |
| "loss": 0.6824, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_exact_match": 80.50141911069063, | |
| "eval_f1": 87.97379997606235, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 8.98969817458883e-06, | |
| "loss": 0.7169, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_exact_match": 80.35004730368969, | |
| "eval_f1": 88.00649157738846, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 8.311946502801373e-06, | |
| "loss": 0.7149, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_exact_match": 80.74739829706716, | |
| "eval_f1": 87.99961546836674, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 7.634194831013915e-06, | |
| "loss": 0.6726, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_exact_match": 80.93661305581836, | |
| "eval_f1": 88.06055493342305, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 6.95644315922646e-06, | |
| "loss": 0.6761, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_exact_match": 80.50141911069063, | |
| "eval_f1": 87.98739385338236, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 6.278691487439003e-06, | |
| "loss": 0.712, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_exact_match": 81.0406811731315, | |
| "eval_f1": 88.4045499003394, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 5.6009398156515455e-06, | |
| "loss": 0.693, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_exact_match": 80.69063386944181, | |
| "eval_f1": 88.19624181405425, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 4.923188143864088e-06, | |
| "loss": 0.6594, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_exact_match": 80.97445600756859, | |
| "eval_f1": 88.26284168848002, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.245436472076631e-06, | |
| "loss": 0.6743, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_exact_match": 80.43519394512772, | |
| "eval_f1": 88.07199660434318, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.567684800289174e-06, | |
| "loss": 0.6876, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_exact_match": 80.88930936613056, | |
| "eval_f1": 88.35346447229631, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.889933128501717e-06, | |
| "loss": 0.6913, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_exact_match": 81.22989593188268, | |
| "eval_f1": 88.55310003155216, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.21218145671426e-06, | |
| "loss": 0.6383, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_exact_match": 81.35288552507096, | |
| "eval_f1": 88.58308022828253, | |
| "step": 20500 | |
| } | |
| ], | |
| "max_steps": 22132, | |
| "num_train_epochs": 2, | |
| "total_flos": 3.213871718825779e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |