| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005, |
| "grad_norm": 370.1074523925781, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 3.3662, |
| "num_input_tokens_seen": 8388608, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.001, |
| "grad_norm": 380.1726379394531, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 3.3291, |
| "num_input_tokens_seen": 16777216, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0015, |
| "grad_norm": 414.3145446777344, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 3.2201, |
| "num_input_tokens_seen": 25165824, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 389.34332275390625, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 3.2735, |
| "num_input_tokens_seen": 33554432, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 385.0985412597656, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 3.2293, |
| "num_input_tokens_seen": 41943040, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 267.23883056640625, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 3.1203, |
| "num_input_tokens_seen": 50331648, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0035, |
| "grad_norm": 249.7722930908203, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 3.1226, |
| "num_input_tokens_seen": 58720256, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 119.61425018310547, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 2.6311, |
| "num_input_tokens_seen": 67108864, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0045, |
| "grad_norm": 108.58890533447266, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 2.6811, |
| "num_input_tokens_seen": 75497472, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 98.306884765625, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.6586, |
| "num_input_tokens_seen": 83886080, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0055, |
| "grad_norm": 47.70505142211914, |
| "learning_rate": 2.2e-06, |
| "loss": 2.0387, |
| "num_input_tokens_seen": 92274688, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 44.65153121948242, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 2.1509, |
| "num_input_tokens_seen": 100663296, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0065, |
| "grad_norm": 36.97843551635742, |
| "learning_rate": 2.6e-06, |
| "loss": 2.0554, |
| "num_input_tokens_seen": 109051904, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 27.567747116088867, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.9823, |
| "num_input_tokens_seen": 117440512, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 24.83814239501953, |
| "learning_rate": 3e-06, |
| "loss": 1.9501, |
| "num_input_tokens_seen": 125829120, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 19.835155487060547, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.8774, |
| "num_input_tokens_seen": 134217728, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0085, |
| "grad_norm": 14.070314407348633, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 1.8801, |
| "num_input_tokens_seen": 142606336, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 14.519227981567383, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.9272, |
| "num_input_tokens_seen": 150994944, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0095, |
| "grad_norm": 14.988875389099121, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.9902, |
| "num_input_tokens_seen": 159383552, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 9.906997680664062, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.9851, |
| "num_input_tokens_seen": 167772160, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0105, |
| "grad_norm": 7.837595462799072, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.9127, |
| "num_input_tokens_seen": 176160768, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 4.843110084533691, |
| "learning_rate": 4.4e-06, |
| "loss": 1.8756, |
| "num_input_tokens_seen": 184549376, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0115, |
| "grad_norm": 4.830221176147461, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.691, |
| "num_input_tokens_seen": 192937984, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 2.5043883323669434, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.8997, |
| "num_input_tokens_seen": 201326592, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0125, |
| "grad_norm": 2.168509006500244, |
| "learning_rate": 5e-06, |
| "loss": 1.8075, |
| "num_input_tokens_seen": 209715200, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 2.0097780227661133, |
| "learning_rate": 5.2e-06, |
| "loss": 1.8527, |
| "num_input_tokens_seen": 218103808, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0135, |
| "grad_norm": 3.2067172527313232, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 1.6836, |
| "num_input_tokens_seen": 226492416, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 2.623101234436035, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 1.6736, |
| "num_input_tokens_seen": 234881024, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0145, |
| "grad_norm": 3.066000461578369, |
| "learning_rate": 5.8e-06, |
| "loss": 1.6637, |
| "num_input_tokens_seen": 243269632, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 2.0795412063598633, |
| "learning_rate": 6e-06, |
| "loss": 1.6973, |
| "num_input_tokens_seen": 251658240, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0155, |
| "grad_norm": 2.8886075019836426, |
| "learning_rate": 6.200000000000001e-06, |
| "loss": 1.9608, |
| "num_input_tokens_seen": 260046848, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 2.7311999797821045, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 1.8288, |
| "num_input_tokens_seen": 268435456, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0165, |
| "grad_norm": 1.648906946182251, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 1.797, |
| "num_input_tokens_seen": 276824064, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 1.3969415426254272, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 1.7316, |
| "num_input_tokens_seen": 285212672, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0175, |
| "grad_norm": 1.9224807024002075, |
| "learning_rate": 7e-06, |
| "loss": 1.8746, |
| "num_input_tokens_seen": 293601280, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 1.349591851234436, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 1.8283, |
| "num_input_tokens_seen": 301989888, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0185, |
| "grad_norm": 1.09238600730896, |
| "learning_rate": 7.4e-06, |
| "loss": 2.0065, |
| "num_input_tokens_seen": 310378496, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 2.277617931365967, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 1.7303, |
| "num_input_tokens_seen": 318767104, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0195, |
| "grad_norm": 1.2706860303878784, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 1.8113, |
| "num_input_tokens_seen": 327155712, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.9849238395690918, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.7541, |
| "num_input_tokens_seen": 335544320, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0205, |
| "grad_norm": 0.8385749459266663, |
| "learning_rate": 8.2e-06, |
| "loss": 1.5761, |
| "num_input_tokens_seen": 343932928, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 1.082259178161621, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 2.0085, |
| "num_input_tokens_seen": 352321536, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0215, |
| "grad_norm": 1.1942423582077026, |
| "learning_rate": 8.6e-06, |
| "loss": 1.6454, |
| "num_input_tokens_seen": 360710144, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 1.2553157806396484, |
| "learning_rate": 8.8e-06, |
| "loss": 1.8204, |
| "num_input_tokens_seen": 369098752, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0225, |
| "grad_norm": 0.9502832293510437, |
| "learning_rate": 9e-06, |
| "loss": 1.7104, |
| "num_input_tokens_seen": 377487360, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 0.9545429348945618, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 1.7269, |
| "num_input_tokens_seen": 385875968, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0235, |
| "grad_norm": 0.993373692035675, |
| "learning_rate": 9.4e-06, |
| "loss": 1.8579, |
| "num_input_tokens_seen": 394264576, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 1.3005881309509277, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 1.8702, |
| "num_input_tokens_seen": 402653184, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0245, |
| "grad_norm": 1.0115833282470703, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 1.7222, |
| "num_input_tokens_seen": 411041792, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 2.056337833404541, |
| "learning_rate": 1e-05, |
| "loss": 1.5305, |
| "num_input_tokens_seen": 419430400, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0255, |
| "grad_norm": 1.122148036956787, |
| "learning_rate": 1.02e-05, |
| "loss": 1.6988, |
| "num_input_tokens_seen": 427819008, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 1.1124475002288818, |
| "learning_rate": 1.04e-05, |
| "loss": 1.8053, |
| "num_input_tokens_seen": 436207616, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0265, |
| "grad_norm": 0.7354093194007874, |
| "learning_rate": 1.0600000000000002e-05, |
| "loss": 1.7792, |
| "num_input_tokens_seen": 444596224, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.027, |
| "grad_norm": 1.4597609043121338, |
| "learning_rate": 1.0800000000000002e-05, |
| "loss": 1.7007, |
| "num_input_tokens_seen": 452984832, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0275, |
| "grad_norm": 1.0347814559936523, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 1.7212, |
| "num_input_tokens_seen": 461373440, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 1.1491434574127197, |
| "learning_rate": 1.1200000000000001e-05, |
| "loss": 1.8258, |
| "num_input_tokens_seen": 469762048, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0285, |
| "grad_norm": 1.086042881011963, |
| "learning_rate": 1.14e-05, |
| "loss": 1.7252, |
| "num_input_tokens_seen": 478150656, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.029, |
| "grad_norm": 1.2974258661270142, |
| "learning_rate": 1.16e-05, |
| "loss": 1.5316, |
| "num_input_tokens_seen": 486539264, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0295, |
| "grad_norm": 1.7874411344528198, |
| "learning_rate": 1.18e-05, |
| "loss": 1.7952, |
| "num_input_tokens_seen": 494927872, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.7470626831054688, |
| "learning_rate": 1.2e-05, |
| "loss": 1.4796, |
| "num_input_tokens_seen": 503316480, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0305, |
| "grad_norm": 2.2033004760742188, |
| "learning_rate": 1.22e-05, |
| "loss": 1.6184, |
| "num_input_tokens_seen": 511705088, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.031, |
| "grad_norm": 1.5556191205978394, |
| "learning_rate": 1.2400000000000002e-05, |
| "loss": 1.749, |
| "num_input_tokens_seen": 520093696, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0315, |
| "grad_norm": 1.4915621280670166, |
| "learning_rate": 1.2600000000000001e-05, |
| "loss": 1.6627, |
| "num_input_tokens_seen": 528482304, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 1.6399468183517456, |
| "learning_rate": 1.2800000000000001e-05, |
| "loss": 1.653, |
| "num_input_tokens_seen": 536870912, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0325, |
| "grad_norm": 1.6420996189117432, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 1.7104, |
| "num_input_tokens_seen": 545259520, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.033, |
| "grad_norm": 1.2998722791671753, |
| "learning_rate": 1.3200000000000002e-05, |
| "loss": 1.8448, |
| "num_input_tokens_seen": 553648128, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0335, |
| "grad_norm": 1.1958801746368408, |
| "learning_rate": 1.3400000000000002e-05, |
| "loss": 1.7359, |
| "num_input_tokens_seen": 562036736, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 1.1419895887374878, |
| "learning_rate": 1.3600000000000002e-05, |
| "loss": 1.7173, |
| "num_input_tokens_seen": 570425344, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0345, |
| "grad_norm": 1.4425727128982544, |
| "learning_rate": 1.38e-05, |
| "loss": 1.5033, |
| "num_input_tokens_seen": 578813952, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 1.5337550640106201, |
| "learning_rate": 1.4e-05, |
| "loss": 1.7122, |
| "num_input_tokens_seen": 587202560, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0355, |
| "grad_norm": 0.8321288228034973, |
| "learning_rate": 1.4200000000000001e-05, |
| "loss": 1.6591, |
| "num_input_tokens_seen": 595591168, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 1.6972109079360962, |
| "learning_rate": 1.4400000000000001e-05, |
| "loss": 1.6779, |
| "num_input_tokens_seen": 603979776, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0365, |
| "grad_norm": 1.3916453123092651, |
| "learning_rate": 1.46e-05, |
| "loss": 1.5565, |
| "num_input_tokens_seen": 612368384, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.037, |
| "grad_norm": 1.3113203048706055, |
| "learning_rate": 1.48e-05, |
| "loss": 1.6577, |
| "num_input_tokens_seen": 620756992, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0375, |
| "grad_norm": 1.0601086616516113, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 1.7513, |
| "num_input_tokens_seen": 629145600, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 1.5599173307418823, |
| "learning_rate": 1.5200000000000002e-05, |
| "loss": 1.7799, |
| "num_input_tokens_seen": 637534208, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0385, |
| "grad_norm": 0.8914459943771362, |
| "learning_rate": 1.54e-05, |
| "loss": 1.7983, |
| "num_input_tokens_seen": 645922816, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.039, |
| "grad_norm": 1.0717469453811646, |
| "learning_rate": 1.5600000000000003e-05, |
| "loss": 1.8112, |
| "num_input_tokens_seen": 654311424, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0395, |
| "grad_norm": 0.9615082144737244, |
| "learning_rate": 1.58e-05, |
| "loss": 1.6442, |
| "num_input_tokens_seen": 662700032, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.2537950277328491, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 1.836, |
| "num_input_tokens_seen": 671088640, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0405, |
| "grad_norm": 1.1260099411010742, |
| "learning_rate": 1.62e-05, |
| "loss": 1.642, |
| "num_input_tokens_seen": 679477248, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.041, |
| "grad_norm": 1.5180790424346924, |
| "learning_rate": 1.64e-05, |
| "loss": 1.6642, |
| "num_input_tokens_seen": 687865856, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0415, |
| "grad_norm": 1.1912261247634888, |
| "learning_rate": 1.66e-05, |
| "loss": 1.8096, |
| "num_input_tokens_seen": 696254464, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 1.0683045387268066, |
| "learning_rate": 1.6800000000000002e-05, |
| "loss": 1.7821, |
| "num_input_tokens_seen": 704643072, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0425, |
| "grad_norm": 1.234241247177124, |
| "learning_rate": 1.7e-05, |
| "loss": 1.7426, |
| "num_input_tokens_seen": 713031680, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.043, |
| "grad_norm": 1.2772578001022339, |
| "learning_rate": 1.72e-05, |
| "loss": 1.6232, |
| "num_input_tokens_seen": 721420288, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0435, |
| "grad_norm": 1.238016963005066, |
| "learning_rate": 1.7400000000000003e-05, |
| "loss": 1.6116, |
| "num_input_tokens_seen": 729808896, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 0.9299501180648804, |
| "learning_rate": 1.76e-05, |
| "loss": 1.8499, |
| "num_input_tokens_seen": 738197504, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0445, |
| "grad_norm": 1.0743695497512817, |
| "learning_rate": 1.7800000000000002e-05, |
| "loss": 1.7032, |
| "num_input_tokens_seen": 746586112, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 1.1992101669311523, |
| "learning_rate": 1.8e-05, |
| "loss": 1.5495, |
| "num_input_tokens_seen": 754974720, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0455, |
| "grad_norm": 1.2545678615570068, |
| "learning_rate": 1.8200000000000002e-05, |
| "loss": 1.7138, |
| "num_input_tokens_seen": 763363328, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 1.389168381690979, |
| "learning_rate": 1.8400000000000003e-05, |
| "loss": 1.7343, |
| "num_input_tokens_seen": 771751936, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0465, |
| "grad_norm": 0.9460492730140686, |
| "learning_rate": 1.86e-05, |
| "loss": 1.7358, |
| "num_input_tokens_seen": 780140544, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.047, |
| "grad_norm": 0.9978250861167908, |
| "learning_rate": 1.88e-05, |
| "loss": 1.7163, |
| "num_input_tokens_seen": 788529152, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.0475, |
| "grad_norm": 2.179802656173706, |
| "learning_rate": 1.9e-05, |
| "loss": 1.449, |
| "num_input_tokens_seen": 796917760, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 1.2629632949829102, |
| "learning_rate": 1.9200000000000003e-05, |
| "loss": 1.6696, |
| "num_input_tokens_seen": 805306368, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0485, |
| "grad_norm": 1.7194193601608276, |
| "learning_rate": 1.94e-05, |
| "loss": 1.6729, |
| "num_input_tokens_seen": 813694976, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.049, |
| "grad_norm": 1.1110029220581055, |
| "learning_rate": 1.9600000000000002e-05, |
| "loss": 1.7822, |
| "num_input_tokens_seen": 822083584, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0495, |
| "grad_norm": 1.1010093688964844, |
| "learning_rate": 1.98e-05, |
| "loss": 1.5114, |
| "num_input_tokens_seen": 830472192, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.3180491924285889, |
| "learning_rate": 2e-05, |
| "loss": 1.5707, |
| "num_input_tokens_seen": 838860800, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0505, |
| "grad_norm": 1.1497364044189453, |
| "learning_rate": 2.0200000000000003e-05, |
| "loss": 1.6285, |
| "num_input_tokens_seen": 847249408, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.051, |
| "grad_norm": 1.4788284301757812, |
| "learning_rate": 2.04e-05, |
| "loss": 1.7649, |
| "num_input_tokens_seen": 855638016, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.0515, |
| "grad_norm": 1.2098913192749023, |
| "learning_rate": 2.0600000000000003e-05, |
| "loss": 1.6458, |
| "num_input_tokens_seen": 864026624, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 1.4255143404006958, |
| "learning_rate": 2.08e-05, |
| "loss": 1.6966, |
| "num_input_tokens_seen": 872415232, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0525, |
| "grad_norm": 1.2970800399780273, |
| "learning_rate": 2.1000000000000002e-05, |
| "loss": 1.6893, |
| "num_input_tokens_seen": 880803840, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.053, |
| "grad_norm": 1.4322800636291504, |
| "learning_rate": 2.1200000000000004e-05, |
| "loss": 1.7252, |
| "num_input_tokens_seen": 889192448, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0535, |
| "grad_norm": 1.0862427949905396, |
| "learning_rate": 2.1400000000000002e-05, |
| "loss": 1.702, |
| "num_input_tokens_seen": 897581056, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 1.2055437564849854, |
| "learning_rate": 2.1600000000000003e-05, |
| "loss": 1.6486, |
| "num_input_tokens_seen": 905969664, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0545, |
| "grad_norm": 1.0794836282730103, |
| "learning_rate": 2.1800000000000005e-05, |
| "loss": 1.6812, |
| "num_input_tokens_seen": 914358272, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 1.3784294128417969, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 1.5237, |
| "num_input_tokens_seen": 922746880, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0555, |
| "grad_norm": 1.5001763105392456, |
| "learning_rate": 2.2200000000000004e-05, |
| "loss": 1.6246, |
| "num_input_tokens_seen": 931135488, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 1.0864592790603638, |
| "learning_rate": 2.2400000000000002e-05, |
| "loss": 1.6973, |
| "num_input_tokens_seen": 939524096, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0565, |
| "grad_norm": 1.2886756658554077, |
| "learning_rate": 2.26e-05, |
| "loss": 1.6475, |
| "num_input_tokens_seen": 947912704, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.057, |
| "grad_norm": 1.4063009023666382, |
| "learning_rate": 2.28e-05, |
| "loss": 1.7949, |
| "num_input_tokens_seen": 956301312, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0575, |
| "grad_norm": 1.1816203594207764, |
| "learning_rate": 2.3e-05, |
| "loss": 1.6827, |
| "num_input_tokens_seen": 964689920, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 1.368019938468933, |
| "learning_rate": 2.32e-05, |
| "loss": 1.5163, |
| "num_input_tokens_seen": 973078528, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0585, |
| "grad_norm": 1.1925565004348755, |
| "learning_rate": 2.34e-05, |
| "loss": 1.7815, |
| "num_input_tokens_seen": 981467136, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.059, |
| "grad_norm": 1.7603892087936401, |
| "learning_rate": 2.36e-05, |
| "loss": 1.5604, |
| "num_input_tokens_seen": 989855744, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0595, |
| "grad_norm": 1.140766978263855, |
| "learning_rate": 2.3800000000000003e-05, |
| "loss": 1.7546, |
| "num_input_tokens_seen": 998244352, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.0011487007141113, |
| "learning_rate": 2.4e-05, |
| "loss": 1.7174, |
| "num_input_tokens_seen": 1006632960, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0605, |
| "grad_norm": 1.3519712686538696, |
| "learning_rate": 2.4200000000000002e-05, |
| "loss": 1.6414, |
| "num_input_tokens_seen": 1015021568, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.061, |
| "grad_norm": 1.0491664409637451, |
| "learning_rate": 2.44e-05, |
| "loss": 1.698, |
| "num_input_tokens_seen": 1023410176, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0615, |
| "grad_norm": 1.5017552375793457, |
| "learning_rate": 2.46e-05, |
| "loss": 1.7974, |
| "num_input_tokens_seen": 1031798784, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 0.9958243370056152, |
| "learning_rate": 2.4800000000000003e-05, |
| "loss": 1.6468, |
| "num_input_tokens_seen": 1040187392, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 2.0464437007904053, |
| "learning_rate": 2.5e-05, |
| "loss": 1.6911, |
| "num_input_tokens_seen": 1048576000, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.063, |
| "grad_norm": 1.2215771675109863, |
| "learning_rate": 2.5200000000000003e-05, |
| "loss": 1.7483, |
| "num_input_tokens_seen": 1056964608, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0635, |
| "grad_norm": 1.4541150331497192, |
| "learning_rate": 2.54e-05, |
| "loss": 1.7045, |
| "num_input_tokens_seen": 1065353216, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 1.6184258460998535, |
| "learning_rate": 2.5600000000000002e-05, |
| "loss": 1.7767, |
| "num_input_tokens_seen": 1073741824, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0645, |
| "grad_norm": 1.029843807220459, |
| "learning_rate": 2.5800000000000004e-05, |
| "loss": 1.7934, |
| "num_input_tokens_seen": 1082130432, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 2.1942873001098633, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 1.5101, |
| "num_input_tokens_seen": 1090519040, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0655, |
| "grad_norm": 1.411434292793274, |
| "learning_rate": 2.6200000000000003e-05, |
| "loss": 1.6257, |
| "num_input_tokens_seen": 1098907648, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 0.9594506621360779, |
| "learning_rate": 2.6400000000000005e-05, |
| "loss": 1.8266, |
| "num_input_tokens_seen": 1107296256, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0665, |
| "grad_norm": 1.4641342163085938, |
| "learning_rate": 2.6600000000000003e-05, |
| "loss": 1.712, |
| "num_input_tokens_seen": 1115684864, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.067, |
| "grad_norm": 1.3084245920181274, |
| "learning_rate": 2.6800000000000004e-05, |
| "loss": 1.604, |
| "num_input_tokens_seen": 1124073472, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.0675, |
| "grad_norm": 1.2237942218780518, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 1.5766, |
| "num_input_tokens_seen": 1132462080, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 1.1463581323623657, |
| "learning_rate": 2.7200000000000004e-05, |
| "loss": 1.6984, |
| "num_input_tokens_seen": 1140850688, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0685, |
| "grad_norm": 1.579965591430664, |
| "learning_rate": 2.7400000000000005e-05, |
| "loss": 1.7595, |
| "num_input_tokens_seen": 1149239296, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.069, |
| "grad_norm": 1.154305338859558, |
| "learning_rate": 2.76e-05, |
| "loss": 1.7695, |
| "num_input_tokens_seen": 1157627904, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0695, |
| "grad_norm": 1.2724696397781372, |
| "learning_rate": 2.78e-05, |
| "loss": 1.6492, |
| "num_input_tokens_seen": 1166016512, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.0162241458892822, |
| "learning_rate": 2.8e-05, |
| "loss": 1.659, |
| "num_input_tokens_seen": 1174405120, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0705, |
| "grad_norm": 1.206682562828064, |
| "learning_rate": 2.82e-05, |
| "loss": 1.8174, |
| "num_input_tokens_seen": 1182793728, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.071, |
| "grad_norm": 0.8725315928459167, |
| "learning_rate": 2.8400000000000003e-05, |
| "loss": 1.5187, |
| "num_input_tokens_seen": 1191182336, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.0715, |
| "grad_norm": 1.4840545654296875, |
| "learning_rate": 2.86e-05, |
| "loss": 1.5984, |
| "num_input_tokens_seen": 1199570944, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 1.3734935522079468, |
| "learning_rate": 2.8800000000000002e-05, |
| "loss": 1.5661, |
| "num_input_tokens_seen": 1207959552, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0725, |
| "grad_norm": 1.2102882862091064, |
| "learning_rate": 2.9e-05, |
| "loss": 1.6857, |
| "num_input_tokens_seen": 1216348160, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.073, |
| "grad_norm": 1.3926680088043213, |
| "learning_rate": 2.92e-05, |
| "loss": 1.695, |
| "num_input_tokens_seen": 1224736768, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0735, |
| "grad_norm": 1.3040108680725098, |
| "learning_rate": 2.9400000000000003e-05, |
| "loss": 1.6561, |
| "num_input_tokens_seen": 1233125376, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 1.168774127960205, |
| "learning_rate": 2.96e-05, |
| "loss": 1.7053, |
| "num_input_tokens_seen": 1241513984, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.0745, |
| "grad_norm": 1.0817087888717651, |
| "learning_rate": 2.9800000000000003e-05, |
| "loss": 1.6884, |
| "num_input_tokens_seen": 1249902592, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 1.2117892503738403, |
| "learning_rate": 3.0000000000000004e-05, |
| "loss": 1.6538, |
| "num_input_tokens_seen": 1258291200, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0755, |
| "grad_norm": 1.4789024591445923, |
| "learning_rate": 3.0200000000000002e-05, |
| "loss": 1.6475, |
| "num_input_tokens_seen": 1266679808, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 1.3695178031921387, |
| "learning_rate": 3.0400000000000004e-05, |
| "loss": 1.725, |
| "num_input_tokens_seen": 1275068416, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.0765, |
| "grad_norm": 1.3215945959091187, |
| "learning_rate": 3.0600000000000005e-05, |
| "loss": 1.6756, |
| "num_input_tokens_seen": 1283457024, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.077, |
| "grad_norm": 1.1858457326889038, |
| "learning_rate": 3.08e-05, |
| "loss": 1.755, |
| "num_input_tokens_seen": 1291845632, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0775, |
| "grad_norm": 1.4984840154647827, |
| "learning_rate": 3.1e-05, |
| "loss": 1.7219, |
| "num_input_tokens_seen": 1300234240, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 1.2197405099868774, |
| "learning_rate": 3.1200000000000006e-05, |
| "loss": 1.7356, |
| "num_input_tokens_seen": 1308622848, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0785, |
| "grad_norm": 1.474221110343933, |
| "learning_rate": 3.1400000000000004e-05, |
| "loss": 1.5129, |
| "num_input_tokens_seen": 1317011456, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.079, |
| "grad_norm": 1.5716886520385742, |
| "learning_rate": 3.16e-05, |
| "loss": 1.7484, |
| "num_input_tokens_seen": 1325400064, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.0795, |
| "grad_norm": 0.9521049857139587, |
| "learning_rate": 3.180000000000001e-05, |
| "loss": 1.6262, |
| "num_input_tokens_seen": 1333788672, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.921792984008789, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 1.5914, |
| "num_input_tokens_seen": 1342177280, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0805, |
| "grad_norm": 1.6440759897232056, |
| "learning_rate": 3.2200000000000003e-05, |
| "loss": 1.7481, |
| "num_input_tokens_seen": 1350565888, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.081, |
| "grad_norm": 1.7987762689590454, |
| "learning_rate": 3.24e-05, |
| "loss": 1.6367, |
| "num_input_tokens_seen": 1358954496, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0815, |
| "grad_norm": 1.4722107648849487, |
| "learning_rate": 3.26e-05, |
| "loss": 1.6079, |
| "num_input_tokens_seen": 1367343104, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 1.5897142887115479, |
| "learning_rate": 3.28e-05, |
| "loss": 1.5902, |
| "num_input_tokens_seen": 1375731712, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0825, |
| "grad_norm": 1.5487194061279297, |
| "learning_rate": 3.3e-05, |
| "loss": 1.8002, |
| "num_input_tokens_seen": 1384120320, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.083, |
| "grad_norm": 1.122914433479309, |
| "learning_rate": 3.32e-05, |
| "loss": 1.6447, |
| "num_input_tokens_seen": 1392508928, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.0835, |
| "grad_norm": 1.5491061210632324, |
| "learning_rate": 3.34e-05, |
| "loss": 1.7569, |
| "num_input_tokens_seen": 1400897536, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 1.2212718725204468, |
| "learning_rate": 3.3600000000000004e-05, |
| "loss": 1.5337, |
| "num_input_tokens_seen": 1409286144, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0845, |
| "grad_norm": 1.2301714420318604, |
| "learning_rate": 3.38e-05, |
| "loss": 1.6417, |
| "num_input_tokens_seen": 1417674752, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.085, |
| "grad_norm": 1.242119312286377, |
| "learning_rate": 3.4e-05, |
| "loss": 1.8315, |
| "num_input_tokens_seen": 1426063360, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0855, |
| "grad_norm": 1.4329941272735596, |
| "learning_rate": 3.4200000000000005e-05, |
| "loss": 1.682, |
| "num_input_tokens_seen": 1434451968, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 1.633201241493225, |
| "learning_rate": 3.44e-05, |
| "loss": 1.6389, |
| "num_input_tokens_seen": 1442840576, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.0865, |
| "grad_norm": 1.184312343597412, |
| "learning_rate": 3.46e-05, |
| "loss": 1.727, |
| "num_input_tokens_seen": 1451229184, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.087, |
| "grad_norm": 1.2135035991668701, |
| "learning_rate": 3.4800000000000006e-05, |
| "loss": 1.7893, |
| "num_input_tokens_seen": 1459617792, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0875, |
| "grad_norm": 0.9943047761917114, |
| "learning_rate": 3.5000000000000004e-05, |
| "loss": 1.8099, |
| "num_input_tokens_seen": 1468006400, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 1.4616221189498901, |
| "learning_rate": 3.52e-05, |
| "loss": 1.7955, |
| "num_input_tokens_seen": 1476395008, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0885, |
| "grad_norm": 0.9395477771759033, |
| "learning_rate": 3.54e-05, |
| "loss": 1.5458, |
| "num_input_tokens_seen": 1484783616, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.089, |
| "grad_norm": 1.5338860750198364, |
| "learning_rate": 3.5600000000000005e-05, |
| "loss": 1.7537, |
| "num_input_tokens_seen": 1493172224, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.0895, |
| "grad_norm": 1.1360806226730347, |
| "learning_rate": 3.58e-05, |
| "loss": 1.58, |
| "num_input_tokens_seen": 1501560832, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.797577142715454, |
| "learning_rate": 3.6e-05, |
| "loss": 1.7989, |
| "num_input_tokens_seen": 1509949440, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0905, |
| "grad_norm": 1.5540266036987305, |
| "learning_rate": 3.6200000000000006e-05, |
| "loss": 1.4828, |
| "num_input_tokens_seen": 1518338048, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.091, |
| "grad_norm": 1.776328206062317, |
| "learning_rate": 3.6400000000000004e-05, |
| "loss": 1.6651, |
| "num_input_tokens_seen": 1526726656, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0915, |
| "grad_norm": 1.5140923261642456, |
| "learning_rate": 3.66e-05, |
| "loss": 1.702, |
| "num_input_tokens_seen": 1535115264, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 1.5927739143371582, |
| "learning_rate": 3.680000000000001e-05, |
| "loss": 1.6738, |
| "num_input_tokens_seen": 1543503872, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0925, |
| "grad_norm": 1.1876591444015503, |
| "learning_rate": 3.7000000000000005e-05, |
| "loss": 1.673, |
| "num_input_tokens_seen": 1551892480, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.093, |
| "grad_norm": 1.176761269569397, |
| "learning_rate": 3.72e-05, |
| "loss": 1.7108, |
| "num_input_tokens_seen": 1560281088, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.0935, |
| "grad_norm": 1.4902335405349731, |
| "learning_rate": 3.740000000000001e-05, |
| "loss": 1.8053, |
| "num_input_tokens_seen": 1568669696, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 1.1420718431472778, |
| "learning_rate": 3.76e-05, |
| "loss": 1.7571, |
| "num_input_tokens_seen": 1577058304, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.0945, |
| "grad_norm": 0.99260413646698, |
| "learning_rate": 3.7800000000000004e-05, |
| "loss": 1.6794, |
| "num_input_tokens_seen": 1585446912, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.095, |
| "grad_norm": 1.57101309299469, |
| "learning_rate": 3.8e-05, |
| "loss": 1.6479, |
| "num_input_tokens_seen": 1593835520, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0955, |
| "grad_norm": 1.5510618686676025, |
| "learning_rate": 3.82e-05, |
| "loss": 1.6632, |
| "num_input_tokens_seen": 1602224128, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.442740559577942, |
| "learning_rate": 3.8400000000000005e-05, |
| "loss": 1.8864, |
| "num_input_tokens_seen": 1610612736, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.0965, |
| "grad_norm": 1.0593072175979614, |
| "learning_rate": 3.86e-05, |
| "loss": 1.5798, |
| "num_input_tokens_seen": 1619001344, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.097, |
| "grad_norm": 2.065847396850586, |
| "learning_rate": 3.88e-05, |
| "loss": 1.5443, |
| "num_input_tokens_seen": 1627389952, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.0975, |
| "grad_norm": 1.2346482276916504, |
| "learning_rate": 3.9e-05, |
| "loss": 1.6052, |
| "num_input_tokens_seen": 1635778560, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 1.419586181640625, |
| "learning_rate": 3.9200000000000004e-05, |
| "loss": 1.5901, |
| "num_input_tokens_seen": 1644167168, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.0985, |
| "grad_norm": 1.2483519315719604, |
| "learning_rate": 3.94e-05, |
| "loss": 1.6552, |
| "num_input_tokens_seen": 1652555776, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.099, |
| "grad_norm": 1.5695958137512207, |
| "learning_rate": 3.96e-05, |
| "loss": 1.5986, |
| "num_input_tokens_seen": 1660944384, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0995, |
| "grad_norm": 1.6311231851577759, |
| "learning_rate": 3.9800000000000005e-05, |
| "loss": 1.778, |
| "num_input_tokens_seen": 1669332992, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.5190160274505615, |
| "learning_rate": 4e-05, |
| "loss": 1.8327, |
| "num_input_tokens_seen": 1677721600, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1005, |
| "grad_norm": 1.2366491556167603, |
| "learning_rate": 3.9999845787629415e-05, |
| "loss": 1.7143, |
| "num_input_tokens_seen": 1686110208, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.101, |
| "grad_norm": 2.084810495376587, |
| "learning_rate": 3.99993831528958e-05, |
| "loss": 1.6544, |
| "num_input_tokens_seen": 1694498816, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1015, |
| "grad_norm": 1.5253264904022217, |
| "learning_rate": 3.9998612102933544e-05, |
| "loss": 1.6143, |
| "num_input_tokens_seen": 1702887424, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 1.0702577829360962, |
| "learning_rate": 3.999753264963321e-05, |
| "loss": 1.6856, |
| "num_input_tokens_seen": 1711276032, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.1025, |
| "grad_norm": 1.9343763589859009, |
| "learning_rate": 3.9996144809641296e-05, |
| "loss": 1.786, |
| "num_input_tokens_seen": 1719664640, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.103, |
| "grad_norm": 1.1403006315231323, |
| "learning_rate": 3.9994448604360016e-05, |
| "loss": 1.6452, |
| "num_input_tokens_seen": 1728053248, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1035, |
| "grad_norm": 2.63930606842041, |
| "learning_rate": 3.999244405994694e-05, |
| "loss": 1.7236, |
| "num_input_tokens_seen": 1736441856, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 1.6718311309814453, |
| "learning_rate": 3.9990131207314634e-05, |
| "loss": 1.5877, |
| "num_input_tokens_seen": 1744830464, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.1045, |
| "grad_norm": 2.7320733070373535, |
| "learning_rate": 3.998751008213014e-05, |
| "loss": 1.7808, |
| "num_input_tokens_seen": 1753219072, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.105, |
| "grad_norm": 2.0388472080230713, |
| "learning_rate": 3.9984580724814464e-05, |
| "loss": 1.7625, |
| "num_input_tokens_seen": 1761607680, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1055, |
| "grad_norm": 2.2198638916015625, |
| "learning_rate": 3.99813431805419e-05, |
| "loss": 1.6662, |
| "num_input_tokens_seen": 1769996288, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 1.7465423345565796, |
| "learning_rate": 3.9977797499239404e-05, |
| "loss": 1.6335, |
| "num_input_tokens_seen": 1778384896, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.1065, |
| "grad_norm": 1.6627084016799927, |
| "learning_rate": 3.997394373558576e-05, |
| "loss": 1.6865, |
| "num_input_tokens_seen": 1786773504, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.107, |
| "grad_norm": 1.486066222190857, |
| "learning_rate": 3.996978194901077e-05, |
| "loss": 1.5947, |
| "num_input_tokens_seen": 1795162112, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1075, |
| "grad_norm": 1.2178465127944946, |
| "learning_rate": 3.996531220369432e-05, |
| "loss": 1.8448, |
| "num_input_tokens_seen": 1803550720, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 1.1107075214385986, |
| "learning_rate": 3.9960534568565436e-05, |
| "loss": 1.7868, |
| "num_input_tokens_seen": 1811939328, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.1085, |
| "grad_norm": 1.3619362115859985, |
| "learning_rate": 3.995544911730115e-05, |
| "loss": 1.742, |
| "num_input_tokens_seen": 1820327936, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.109, |
| "grad_norm": 1.0947133302688599, |
| "learning_rate": 3.995005592832541e-05, |
| "loss": 1.7386, |
| "num_input_tokens_seen": 1828716544, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1095, |
| "grad_norm": 1.4069256782531738, |
| "learning_rate": 3.994435508480786e-05, |
| "loss": 1.7462, |
| "num_input_tokens_seen": 1837105152, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.4184787273406982, |
| "learning_rate": 3.9938346674662565e-05, |
| "loss": 1.7545, |
| "num_input_tokens_seen": 1845493760, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1105, |
| "grad_norm": 1.4119457006454468, |
| "learning_rate": 3.9932030790546636e-05, |
| "loss": 1.6184, |
| "num_input_tokens_seen": 1853882368, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.111, |
| "grad_norm": 1.1133450269699097, |
| "learning_rate": 3.9925407529858826e-05, |
| "loss": 1.67, |
| "num_input_tokens_seen": 1862270976, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.1115, |
| "grad_norm": 1.3021692037582397, |
| "learning_rate": 3.991847699473801e-05, |
| "loss": 1.6534, |
| "num_input_tokens_seen": 1870659584, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.10936439037323, |
| "learning_rate": 3.99112392920616e-05, |
| "loss": 1.7029, |
| "num_input_tokens_seen": 1879048192, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.1125, |
| "grad_norm": 0.9188879728317261, |
| "learning_rate": 3.990369453344394e-05, |
| "loss": 1.6314, |
| "num_input_tokens_seen": 1887436800, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.113, |
| "grad_norm": 1.4214354753494263, |
| "learning_rate": 3.989584283523453e-05, |
| "loss": 1.6977, |
| "num_input_tokens_seen": 1895825408, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1135, |
| "grad_norm": 1.3106968402862549, |
| "learning_rate": 3.988768431851628e-05, |
| "loss": 1.613, |
| "num_input_tokens_seen": 1904214016, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 1.2800140380859375, |
| "learning_rate": 3.98792191091036e-05, |
| "loss": 1.7323, |
| "num_input_tokens_seen": 1912602624, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.1145, |
| "grad_norm": 0.9443020224571228, |
| "learning_rate": 3.987044733754049e-05, |
| "loss": 1.5173, |
| "num_input_tokens_seen": 1920991232, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.115, |
| "grad_norm": 1.4971925020217896, |
| "learning_rate": 3.986136913909853e-05, |
| "loss": 1.8041, |
| "num_input_tokens_seen": 1929379840, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1155, |
| "grad_norm": 0.9276608824729919, |
| "learning_rate": 3.985198465377476e-05, |
| "loss": 1.8106, |
| "num_input_tokens_seen": 1937768448, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 1.3482216596603394, |
| "learning_rate": 3.9842294026289565e-05, |
| "loss": 1.6773, |
| "num_input_tokens_seen": 1946157056, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.1165, |
| "grad_norm": 1.3610389232635498, |
| "learning_rate": 3.9832297406084386e-05, |
| "loss": 1.631, |
| "num_input_tokens_seen": 1954545664, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.117, |
| "grad_norm": 1.1721453666687012, |
| "learning_rate": 3.98219949473195e-05, |
| "loss": 1.7756, |
| "num_input_tokens_seen": 1962934272, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1175, |
| "grad_norm": 0.8579085469245911, |
| "learning_rate": 3.981138680887154e-05, |
| "loss": 1.8969, |
| "num_input_tokens_seen": 1971322880, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 0.8704617023468018, |
| "learning_rate": 3.980047315433116e-05, |
| "loss": 1.6432, |
| "num_input_tokens_seen": 1979711488, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1185, |
| "grad_norm": 1.1833432912826538, |
| "learning_rate": 3.978925415200037e-05, |
| "loss": 1.6492, |
| "num_input_tokens_seen": 1988100096, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.119, |
| "grad_norm": 1.2760549783706665, |
| "learning_rate": 3.97777299748901e-05, |
| "loss": 1.6491, |
| "num_input_tokens_seen": 1996488704, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1195, |
| "grad_norm": 0.8377672433853149, |
| "learning_rate": 3.976590080071739e-05, |
| "loss": 1.7364, |
| "num_input_tokens_seen": 2004877312, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.6421141624450684, |
| "learning_rate": 3.9753766811902756e-05, |
| "loss": 1.4844, |
| "num_input_tokens_seen": 2013265920, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1205, |
| "grad_norm": 1.1873599290847778, |
| "learning_rate": 3.974132819556731e-05, |
| "loss": 1.7483, |
| "num_input_tokens_seen": 2021654528, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.121, |
| "grad_norm": 1.206591248512268, |
| "learning_rate": 3.972858514352991e-05, |
| "loss": 1.7342, |
| "num_input_tokens_seen": 2030043136, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1215, |
| "grad_norm": 0.9489218592643738, |
| "learning_rate": 3.971553785230418e-05, |
| "loss": 1.7117, |
| "num_input_tokens_seen": 2038431744, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 1.023648977279663, |
| "learning_rate": 3.970218652309548e-05, |
| "loss": 1.5842, |
| "num_input_tokens_seen": 2046820352, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.1225, |
| "grad_norm": 1.1487878561019897, |
| "learning_rate": 3.9688531361797834e-05, |
| "loss": 1.7356, |
| "num_input_tokens_seen": 2055208960, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.123, |
| "grad_norm": 1.0160244703292847, |
| "learning_rate": 3.9674572578990724e-05, |
| "loss": 1.682, |
| "num_input_tokens_seen": 2063597568, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.1235, |
| "grad_norm": 1.105434536933899, |
| "learning_rate": 3.9660310389935837e-05, |
| "loss": 1.6695, |
| "num_input_tokens_seen": 2071986176, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 1.0896682739257812, |
| "learning_rate": 3.964574501457378e-05, |
| "loss": 1.7885, |
| "num_input_tokens_seen": 2080374784, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1245, |
| "grad_norm": 1.0165361166000366, |
| "learning_rate": 3.9630876677520656e-05, |
| "loss": 1.579, |
| "num_input_tokens_seen": 2088763392, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 1.211041808128357, |
| "learning_rate": 3.961570560806461e-05, |
| "loss": 1.6509, |
| "num_input_tokens_seen": 2097152000, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1255, |
| "grad_norm": 1.0447190999984741, |
| "learning_rate": 3.960023204016231e-05, |
| "loss": 1.8698, |
| "num_input_tokens_seen": 2105540608, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 1.3021482229232788, |
| "learning_rate": 3.958445621243532e-05, |
| "loss": 1.7579, |
| "num_input_tokens_seen": 2113929216, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1265, |
| "grad_norm": 0.9119377732276917, |
| "learning_rate": 3.9568378368166406e-05, |
| "loss": 1.6945, |
| "num_input_tokens_seen": 2122317824, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.127, |
| "grad_norm": 0.8316543698310852, |
| "learning_rate": 3.955199875529582e-05, |
| "loss": 1.6739, |
| "num_input_tokens_seen": 2130706432, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.1275, |
| "grad_norm": 0.9795516133308411, |
| "learning_rate": 3.953531762641745e-05, |
| "loss": 1.7518, |
| "num_input_tokens_seen": 2139095040, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.9370044469833374, |
| "learning_rate": 3.951833523877495e-05, |
| "loss": 1.6134, |
| "num_input_tokens_seen": 2147483648, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.1285, |
| "grad_norm": 1.6189143657684326, |
| "learning_rate": 3.9501051854257745e-05, |
| "loss": 1.6523, |
| "num_input_tokens_seen": 2155872256, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.129, |
| "grad_norm": 1.1166945695877075, |
| "learning_rate": 3.948346773939699e-05, |
| "loss": 1.7523, |
| "num_input_tokens_seen": 2164260864, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1295, |
| "grad_norm": 1.0886536836624146, |
| "learning_rate": 3.94655831653615e-05, |
| "loss": 1.622, |
| "num_input_tokens_seen": 2172649472, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.7367981672286987, |
| "learning_rate": 3.9447398407953536e-05, |
| "loss": 1.7358, |
| "num_input_tokens_seen": 2181038080, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1305, |
| "grad_norm": 1.2050344944000244, |
| "learning_rate": 3.942891374760455e-05, |
| "loss": 1.5398, |
| "num_input_tokens_seen": 2189426688, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.131, |
| "grad_norm": 2.18412709236145, |
| "learning_rate": 3.941012946937085e-05, |
| "loss": 1.5405, |
| "num_input_tokens_seen": 2197815296, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.1315, |
| "grad_norm": 1.3102896213531494, |
| "learning_rate": 3.9391045862929275e-05, |
| "loss": 1.8383, |
| "num_input_tokens_seen": 2206203904, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 1.4639437198638916, |
| "learning_rate": 3.9371663222572625e-05, |
| "loss": 1.6872, |
| "num_input_tokens_seen": 2214592512, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1325, |
| "grad_norm": 1.346665859222412, |
| "learning_rate": 3.93519818472052e-05, |
| "loss": 1.7861, |
| "num_input_tokens_seen": 2222981120, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.133, |
| "grad_norm": 1.4441630840301514, |
| "learning_rate": 3.933200204033815e-05, |
| "loss": 1.4984, |
| "num_input_tokens_seen": 2231369728, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.1335, |
| "grad_norm": 1.3043477535247803, |
| "learning_rate": 3.931172411008482e-05, |
| "loss": 1.7591, |
| "num_input_tokens_seen": 2239758336, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 1.2089747190475464, |
| "learning_rate": 3.9291148369155964e-05, |
| "loss": 1.7255, |
| "num_input_tokens_seen": 2248146944, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1345, |
| "grad_norm": 0.9772351980209351, |
| "learning_rate": 3.927027513485498e-05, |
| "loss": 1.6875, |
| "num_input_tokens_seen": 2256535552, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.135, |
| "grad_norm": 0.9826438426971436, |
| "learning_rate": 3.9249104729072944e-05, |
| "loss": 1.7025, |
| "num_input_tokens_seen": 2264924160, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1355, |
| "grad_norm": 1.421626329421997, |
| "learning_rate": 3.9227637478283725e-05, |
| "loss": 1.7574, |
| "num_input_tokens_seen": 2273312768, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 1.3830302953720093, |
| "learning_rate": 3.9205873713538864e-05, |
| "loss": 1.7882, |
| "num_input_tokens_seen": 2281701376, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.1365, |
| "grad_norm": 1.0225855112075806, |
| "learning_rate": 3.918381377046255e-05, |
| "loss": 1.5312, |
| "num_input_tokens_seen": 2290089984, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.137, |
| "grad_norm": 1.8110359907150269, |
| "learning_rate": 3.916145798924639e-05, |
| "loss": 1.7218, |
| "num_input_tokens_seen": 2298478592, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1375, |
| "grad_norm": 1.0985193252563477, |
| "learning_rate": 3.913880671464418e-05, |
| "loss": 1.6563, |
| "num_input_tokens_seen": 2306867200, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 1.680709719657898, |
| "learning_rate": 3.911586029596661e-05, |
| "loss": 1.6444, |
| "num_input_tokens_seen": 2315255808, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1385, |
| "grad_norm": 1.201578974723816, |
| "learning_rate": 3.9092619087075825e-05, |
| "loss": 1.6551, |
| "num_input_tokens_seen": 2323644416, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.139, |
| "grad_norm": 1.4566112756729126, |
| "learning_rate": 3.906908344638002e-05, |
| "loss": 1.6357, |
| "num_input_tokens_seen": 2332033024, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1395, |
| "grad_norm": 1.2596595287322998, |
| "learning_rate": 3.904525373682791e-05, |
| "loss": 1.7801, |
| "num_input_tokens_seen": 2340421632, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.625712513923645, |
| "learning_rate": 3.9021130325903076e-05, |
| "loss": 1.7944, |
| "num_input_tokens_seen": 2348810240, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1405, |
| "grad_norm": 1.0394383668899536, |
| "learning_rate": 3.8996713585618354e-05, |
| "loss": 1.7922, |
| "num_input_tokens_seen": 2357198848, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.141, |
| "grad_norm": 1.8562290668487549, |
| "learning_rate": 3.897200389251009e-05, |
| "loss": 1.7078, |
| "num_input_tokens_seen": 2365587456, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1415, |
| "grad_norm": 1.245281457901001, |
| "learning_rate": 3.8947001627632326e-05, |
| "loss": 1.6801, |
| "num_input_tokens_seen": 2373976064, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 1.5256640911102295, |
| "learning_rate": 3.892170717655091e-05, |
| "loss": 1.7155, |
| "num_input_tokens_seen": 2382364672, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1425, |
| "grad_norm": 1.0656960010528564, |
| "learning_rate": 3.889612092933756e-05, |
| "loss": 1.75, |
| "num_input_tokens_seen": 2390753280, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.143, |
| "grad_norm": 1.204822063446045, |
| "learning_rate": 3.887024328056387e-05, |
| "loss": 1.7464, |
| "num_input_tokens_seen": 2399141888, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1435, |
| "grad_norm": 1.1441707611083984, |
| "learning_rate": 3.88440746292952e-05, |
| "loss": 1.6608, |
| "num_input_tokens_seen": 2407530496, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 1.2725075483322144, |
| "learning_rate": 3.8817615379084514e-05, |
| "loss": 1.5159, |
| "num_input_tokens_seen": 2415919104, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.1445, |
| "grad_norm": 1.1600598096847534, |
| "learning_rate": 3.879086593796618e-05, |
| "loss": 1.5823, |
| "num_input_tokens_seen": 2424307712, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.145, |
| "grad_norm": 1.1082223653793335, |
| "learning_rate": 3.876382671844969e-05, |
| "loss": 1.6134, |
| "num_input_tokens_seen": 2432696320, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1455, |
| "grad_norm": 1.1749600172042847, |
| "learning_rate": 3.873649813751323e-05, |
| "loss": 1.5625, |
| "num_input_tokens_seen": 2441084928, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 1.005366325378418, |
| "learning_rate": 3.870888061659735e-05, |
| "loss": 1.6975, |
| "num_input_tokens_seen": 2449473536, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.1465, |
| "grad_norm": 0.8248304724693298, |
| "learning_rate": 3.8680974581598375e-05, |
| "loss": 1.607, |
| "num_input_tokens_seen": 2457862144, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.147, |
| "grad_norm": 0.8967552185058594, |
| "learning_rate": 3.865278046286189e-05, |
| "loss": 1.3385, |
| "num_input_tokens_seen": 2466250752, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1475, |
| "grad_norm": 1.328721046447754, |
| "learning_rate": 3.862429869517607e-05, |
| "loss": 1.6221, |
| "num_input_tokens_seen": 2474639360, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 0.917224645614624, |
| "learning_rate": 3.859552971776503e-05, |
| "loss": 1.6286, |
| "num_input_tokens_seen": 2483027968, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1485, |
| "grad_norm": 0.88497394323349, |
| "learning_rate": 3.856647397428198e-05, |
| "loss": 1.6966, |
| "num_input_tokens_seen": 2491416576, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.149, |
| "grad_norm": 1.0170094966888428, |
| "learning_rate": 3.853713191280242e-05, |
| "loss": 1.6923, |
| "num_input_tokens_seen": 2499805184, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1495, |
| "grad_norm": 1.137525200843811, |
| "learning_rate": 3.850750398581725e-05, |
| "loss": 1.7734, |
| "num_input_tokens_seen": 2508193792, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.124685287475586, |
| "learning_rate": 3.8477590650225735e-05, |
| "loss": 1.6916, |
| "num_input_tokens_seen": 2516582400, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1505, |
| "grad_norm": 0.8273991942405701, |
| "learning_rate": 3.8447392367328535e-05, |
| "loss": 1.7422, |
| "num_input_tokens_seen": 2524971008, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.151, |
| "grad_norm": 0.7442718744277954, |
| "learning_rate": 3.8416909602820534e-05, |
| "loss": 1.7876, |
| "num_input_tokens_seen": 2533359616, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.1515, |
| "grad_norm": 1.1291152238845825, |
| "learning_rate": 3.8386142826783645e-05, |
| "loss": 1.7041, |
| "num_input_tokens_seen": 2541748224, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.8179329037666321, |
| "learning_rate": 3.835509251367963e-05, |
| "loss": 1.6661, |
| "num_input_tokens_seen": 2550136832, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1525, |
| "grad_norm": 0.7719331383705139, |
| "learning_rate": 3.832375914234272e-05, |
| "loss": 1.6901, |
| "num_input_tokens_seen": 2558525440, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.153, |
| "grad_norm": 0.8355837464332581, |
| "learning_rate": 3.829214319597228e-05, |
| "loss": 1.6282, |
| "num_input_tokens_seen": 2566914048, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.1535, |
| "grad_norm": 1.228195071220398, |
| "learning_rate": 3.826024516212529e-05, |
| "loss": 1.5801, |
| "num_input_tokens_seen": 2575302656, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 1.644123911857605, |
| "learning_rate": 3.8228065532708905e-05, |
| "loss": 1.626, |
| "num_input_tokens_seen": 2583691264, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1545, |
| "grad_norm": 0.9140821695327759, |
| "learning_rate": 3.819560480397282e-05, |
| "loss": 1.671, |
| "num_input_tokens_seen": 2592079872, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.155, |
| "grad_norm": 1.3930050134658813, |
| "learning_rate": 3.816286347650163e-05, |
| "loss": 1.5599, |
| "num_input_tokens_seen": 2600468480, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1555, |
| "grad_norm": 1.069976568222046, |
| "learning_rate": 3.81298420552071e-05, |
| "loss": 1.6816, |
| "num_input_tokens_seen": 2608857088, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 1.2245848178863525, |
| "learning_rate": 3.809654104932039e-05, |
| "loss": 1.4973, |
| "num_input_tokens_seen": 2617245696, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.1565, |
| "grad_norm": 1.045593500137329, |
| "learning_rate": 3.8062960972384223e-05, |
| "loss": 1.6431, |
| "num_input_tokens_seen": 2625634304, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.157, |
| "grad_norm": 1.0730377435684204, |
| "learning_rate": 3.802910234224491e-05, |
| "loss": 1.8426, |
| "num_input_tokens_seen": 2634022912, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1575, |
| "grad_norm": 1.0670175552368164, |
| "learning_rate": 3.7994965681044436e-05, |
| "loss": 1.6098, |
| "num_input_tokens_seen": 2642411520, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 1.2215880155563354, |
| "learning_rate": 3.796055151521231e-05, |
| "loss": 1.6359, |
| "num_input_tokens_seen": 2650800128, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.1585, |
| "grad_norm": 0.8806067705154419, |
| "learning_rate": 3.792586037545758e-05, |
| "loss": 1.5976, |
| "num_input_tokens_seen": 2659188736, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.159, |
| "grad_norm": 0.8471524119377136, |
| "learning_rate": 3.78908927967605e-05, |
| "loss": 1.6655, |
| "num_input_tokens_seen": 2667577344, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1595, |
| "grad_norm": 0.8181973099708557, |
| "learning_rate": 3.785564931836442e-05, |
| "loss": 1.6711, |
| "num_input_tokens_seen": 2675965952, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.8937894105911255, |
| "learning_rate": 3.782013048376736e-05, |
| "loss": 1.5473, |
| "num_input_tokens_seen": 2684354560, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1605, |
| "grad_norm": 1.0847939252853394, |
| "learning_rate": 3.778433684071369e-05, |
| "loss": 1.6359, |
| "num_input_tokens_seen": 2692743168, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.161, |
| "grad_norm": 1.1049737930297852, |
| "learning_rate": 3.774826894118567e-05, |
| "loss": 1.7373, |
| "num_input_tokens_seen": 2701131776, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1615, |
| "grad_norm": 0.8523493409156799, |
| "learning_rate": 3.7711927341394916e-05, |
| "loss": 1.5551, |
| "num_input_tokens_seen": 2709520384, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 0.9004189372062683, |
| "learning_rate": 3.7675312601773874e-05, |
| "loss": 1.5898, |
| "num_input_tokens_seen": 2717908992, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.1625, |
| "grad_norm": 1.4729281663894653, |
| "learning_rate": 3.76384252869671e-05, |
| "loss": 1.6389, |
| "num_input_tokens_seen": 2726297600, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.163, |
| "grad_norm": 0.9593777656555176, |
| "learning_rate": 3.760126596582264e-05, |
| "loss": 1.674, |
| "num_input_tokens_seen": 2734686208, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.1635, |
| "grad_norm": 0.8906177878379822, |
| "learning_rate": 3.756383521138319e-05, |
| "loss": 1.5994, |
| "num_input_tokens_seen": 2743074816, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 0.7630260586738586, |
| "learning_rate": 3.7526133600877275e-05, |
| "loss": 1.6662, |
| "num_input_tokens_seen": 2751463424, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.1645, |
| "grad_norm": 0.837302029132843, |
| "learning_rate": 3.748816171571038e-05, |
| "loss": 1.739, |
| "num_input_tokens_seen": 2759852032, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.165, |
| "grad_norm": 0.9471412897109985, |
| "learning_rate": 3.744992014145595e-05, |
| "loss": 1.6867, |
| "num_input_tokens_seen": 2768240640, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1655, |
| "grad_norm": 0.7041006684303284, |
| "learning_rate": 3.741140946784635e-05, |
| "loss": 1.7248, |
| "num_input_tokens_seen": 2776629248, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 0.7114289999008179, |
| "learning_rate": 3.737263028876383e-05, |
| "loss": 1.5945, |
| "num_input_tokens_seen": 2785017856, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.1665, |
| "grad_norm": 0.7291207313537598, |
| "learning_rate": 3.733358320223128e-05, |
| "loss": 1.4206, |
| "num_input_tokens_seen": 2793406464, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.167, |
| "grad_norm": 0.7624229192733765, |
| "learning_rate": 3.729426881040311e-05, |
| "loss": 1.587, |
| "num_input_tokens_seen": 2801795072, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.1675, |
| "grad_norm": 0.6349395513534546, |
| "learning_rate": 3.725468771955584e-05, |
| "loss": 1.8279, |
| "num_input_tokens_seen": 2810183680, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.6584758162498474, |
| "learning_rate": 3.721484054007888e-05, |
| "loss": 1.5794, |
| "num_input_tokens_seen": 2818572288, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.1685, |
| "grad_norm": 0.8144023418426514, |
| "learning_rate": 3.717472788646501e-05, |
| "loss": 1.7197, |
| "num_input_tokens_seen": 2826960896, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.169, |
| "grad_norm": 0.9008212685585022, |
| "learning_rate": 3.7134350377301e-05, |
| "loss": 1.5056, |
| "num_input_tokens_seen": 2835349504, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.1695, |
| "grad_norm": 1.026350498199463, |
| "learning_rate": 3.709370863525796e-05, |
| "loss": 1.6921, |
| "num_input_tokens_seen": 2843738112, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.0004777908325195, |
| "learning_rate": 3.705280328708185e-05, |
| "loss": 1.6245, |
| "num_input_tokens_seen": 2852126720, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1705, |
| "grad_norm": 1.0942559242248535, |
| "learning_rate": 3.701163496358373e-05, |
| "loss": 1.5393, |
| "num_input_tokens_seen": 2860515328, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.171, |
| "grad_norm": 0.7471975684165955, |
| "learning_rate": 3.6970204299630077e-05, |
| "loss": 1.7802, |
| "num_input_tokens_seen": 2868903936, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.1715, |
| "grad_norm": 0.9047533869743347, |
| "learning_rate": 3.692851193413299e-05, |
| "loss": 1.5245, |
| "num_input_tokens_seen": 2877292544, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 1.4945957660675049, |
| "learning_rate": 3.6886558510040305e-05, |
| "loss": 1.6707, |
| "num_input_tokens_seen": 2885681152, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.1725, |
| "grad_norm": 0.808014452457428, |
| "learning_rate": 3.684434467432573e-05, |
| "loss": 1.7578, |
| "num_input_tokens_seen": 2894069760, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.173, |
| "grad_norm": 0.9657407402992249, |
| "learning_rate": 3.680187107797884e-05, |
| "loss": 1.6556, |
| "num_input_tokens_seen": 2902458368, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.1735, |
| "grad_norm": 1.2704793214797974, |
| "learning_rate": 3.675913837599503e-05, |
| "loss": 1.5934, |
| "num_input_tokens_seen": 2910846976, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 0.90053790807724, |
| "learning_rate": 3.671614722736541e-05, |
| "loss": 1.7298, |
| "num_input_tokens_seen": 2919235584, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.1745, |
| "grad_norm": 0.810030460357666, |
| "learning_rate": 3.667289829506669e-05, |
| "loss": 1.5586, |
| "num_input_tokens_seen": 2927624192, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 1.1691352128982544, |
| "learning_rate": 3.662939224605091e-05, |
| "loss": 1.6294, |
| "num_input_tokens_seen": 2936012800, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1755, |
| "grad_norm": 1.1451619863510132, |
| "learning_rate": 3.658562975123516e-05, |
| "loss": 1.4937, |
| "num_input_tokens_seen": 2944401408, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.9522868990898132, |
| "learning_rate": 3.654161148549124e-05, |
| "loss": 1.7785, |
| "num_input_tokens_seen": 2952790016, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1765, |
| "grad_norm": 0.7397971153259277, |
| "learning_rate": 3.649733812763527e-05, |
| "loss": 1.713, |
| "num_input_tokens_seen": 2961178624, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.177, |
| "grad_norm": 0.7993081212043762, |
| "learning_rate": 3.64528103604172e-05, |
| "loss": 1.5611, |
| "num_input_tokens_seen": 2969567232, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.1775, |
| "grad_norm": 0.8007155060768127, |
| "learning_rate": 3.640802887051027e-05, |
| "loss": 1.5691, |
| "num_input_tokens_seen": 2977955840, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 0.8291440606117249, |
| "learning_rate": 3.636299434850047e-05, |
| "loss": 1.4725, |
| "num_input_tokens_seen": 2986344448, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1785, |
| "grad_norm": 1.0753155946731567, |
| "learning_rate": 3.631770748887583e-05, |
| "loss": 1.5618, |
| "num_input_tokens_seen": 2994733056, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.179, |
| "grad_norm": 0.8969347476959229, |
| "learning_rate": 3.627216899001575e-05, |
| "loss": 1.6699, |
| "num_input_tokens_seen": 3003121664, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.1795, |
| "grad_norm": 0.8943629264831543, |
| "learning_rate": 3.62263795541802e-05, |
| "loss": 1.7087, |
| "num_input_tokens_seen": 3011510272, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.9483537077903748, |
| "learning_rate": 3.6180339887498953e-05, |
| "loss": 1.6257, |
| "num_input_tokens_seen": 3019898880, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1805, |
| "grad_norm": 0.8657635450363159, |
| "learning_rate": 3.6134050699960604e-05, |
| "loss": 1.689, |
| "num_input_tokens_seen": 3028287488, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.181, |
| "grad_norm": 0.7815025448799133, |
| "learning_rate": 3.608751270540169e-05, |
| "loss": 1.6175, |
| "num_input_tokens_seen": 3036676096, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1815, |
| "grad_norm": 0.7870849967002869, |
| "learning_rate": 3.604072662149567e-05, |
| "loss": 1.5586, |
| "num_input_tokens_seen": 3045064704, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 0.8339277505874634, |
| "learning_rate": 3.599369316974182e-05, |
| "loss": 1.5271, |
| "num_input_tokens_seen": 3053453312, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.1825, |
| "grad_norm": 0.9285105466842651, |
| "learning_rate": 3.594641307545414e-05, |
| "loss": 1.7041, |
| "num_input_tokens_seen": 3061841920, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.183, |
| "grad_norm": 0.8689367175102234, |
| "learning_rate": 3.58988870677502e-05, |
| "loss": 1.4738, |
| "num_input_tokens_seen": 3070230528, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.1835, |
| "grad_norm": 0.929764986038208, |
| "learning_rate": 3.585111587953982e-05, |
| "loss": 1.7297, |
| "num_input_tokens_seen": 3078619136, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 1.05793035030365, |
| "learning_rate": 3.580310024751381e-05, |
| "loss": 1.5431, |
| "num_input_tokens_seen": 3087007744, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.1845, |
| "grad_norm": 0.8866976499557495, |
| "learning_rate": 3.575484091213262e-05, |
| "loss": 1.6682, |
| "num_input_tokens_seen": 3095396352, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.185, |
| "grad_norm": 0.7259969115257263, |
| "learning_rate": 3.57063386176149e-05, |
| "loss": 1.6123, |
| "num_input_tokens_seen": 3103784960, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1855, |
| "grad_norm": 0.9831200838088989, |
| "learning_rate": 3.565759411192604e-05, |
| "loss": 1.5845, |
| "num_input_tokens_seen": 3112173568, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 0.9731907248497009, |
| "learning_rate": 3.5608608146766597e-05, |
| "loss": 1.7179, |
| "num_input_tokens_seen": 3120562176, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1865, |
| "grad_norm": 1.0148561000823975, |
| "learning_rate": 3.555938147756077e-05, |
| "loss": 1.9192, |
| "num_input_tokens_seen": 3128950784, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.187, |
| "grad_norm": 0.9084612131118774, |
| "learning_rate": 3.5509914863444694e-05, |
| "loss": 1.5912, |
| "num_input_tokens_seen": 3137339392, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.9402855038642883, |
| "learning_rate": 3.546020906725474e-05, |
| "loss": 1.5803, |
| "num_input_tokens_seen": 3145728000, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 0.9808095693588257, |
| "learning_rate": 3.541026485551579e-05, |
| "loss": 1.8292, |
| "num_input_tokens_seen": 3154116608, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.1885, |
| "grad_norm": 1.1110026836395264, |
| "learning_rate": 3.536008299842936e-05, |
| "loss": 1.6698, |
| "num_input_tokens_seen": 3162505216, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.189, |
| "grad_norm": 1.0544601678848267, |
| "learning_rate": 3.530966426986177e-05, |
| "loss": 1.6978, |
| "num_input_tokens_seen": 3170893824, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1895, |
| "grad_norm": 0.7350985407829285, |
| "learning_rate": 3.525900944733218e-05, |
| "loss": 1.6923, |
| "num_input_tokens_seen": 3179282432, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.9171492457389832, |
| "learning_rate": 3.520811931200063e-05, |
| "loss": 1.6079, |
| "num_input_tokens_seen": 3187671040, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1905, |
| "grad_norm": 1.1049323081970215, |
| "learning_rate": 3.515699464865594e-05, |
| "loss": 1.6878, |
| "num_input_tokens_seen": 3196059648, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.191, |
| "grad_norm": 0.776717483997345, |
| "learning_rate": 3.5105636245703675e-05, |
| "loss": 1.5757, |
| "num_input_tokens_seen": 3204448256, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.1915, |
| "grad_norm": 0.8234217166900635, |
| "learning_rate": 3.505404489515394e-05, |
| "loss": 1.6609, |
| "num_input_tokens_seen": 3212836864, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.854977011680603, |
| "learning_rate": 3.5002221392609196e-05, |
| "loss": 1.6726, |
| "num_input_tokens_seen": 3221225472, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1925, |
| "grad_norm": 0.6885860562324524, |
| "learning_rate": 3.495016653725194e-05, |
| "loss": 1.7057, |
| "num_input_tokens_seen": 3229614080, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.193, |
| "grad_norm": 0.8042221665382385, |
| "learning_rate": 3.489788113183244e-05, |
| "loss": 1.6693, |
| "num_input_tokens_seen": 3238002688, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.1935, |
| "grad_norm": 0.8148207664489746, |
| "learning_rate": 3.484536598265634e-05, |
| "loss": 1.5554, |
| "num_input_tokens_seen": 3246391296, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 0.8488610982894897, |
| "learning_rate": 3.47926218995722e-05, |
| "loss": 1.5941, |
| "num_input_tokens_seen": 3254779904, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1945, |
| "grad_norm": 0.7625729441642761, |
| "learning_rate": 3.473964969595902e-05, |
| "loss": 1.5805, |
| "num_input_tokens_seen": 3263168512, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.195, |
| "grad_norm": 0.8852419853210449, |
| "learning_rate": 3.468645018871371e-05, |
| "loss": 1.6924, |
| "num_input_tokens_seen": 3271557120, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1955, |
| "grad_norm": 0.7517937421798706, |
| "learning_rate": 3.46330241982385e-05, |
| "loss": 1.6021, |
| "num_input_tokens_seen": 3279945728, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 0.676200807094574, |
| "learning_rate": 3.457937254842823e-05, |
| "loss": 1.738, |
| "num_input_tokens_seen": 3288334336, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.1965, |
| "grad_norm": 0.9149476289749146, |
| "learning_rate": 3.4525496066657735e-05, |
| "loss": 1.7369, |
| "num_input_tokens_seen": 3296722944, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.197, |
| "grad_norm": 0.8072534203529358, |
| "learning_rate": 3.4471395583768985e-05, |
| "loss": 1.6982, |
| "num_input_tokens_seen": 3305111552, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1975, |
| "grad_norm": 0.7343615889549255, |
| "learning_rate": 3.441707193405838e-05, |
| "loss": 1.5764, |
| "num_input_tokens_seen": 3313500160, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 1.0707248449325562, |
| "learning_rate": 3.436252595526378e-05, |
| "loss": 1.5652, |
| "num_input_tokens_seen": 3321888768, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.1985, |
| "grad_norm": 1.0053772926330566, |
| "learning_rate": 3.430775848855166e-05, |
| "loss": 1.6278, |
| "num_input_tokens_seen": 3330277376, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.199, |
| "grad_norm": 1.151880145072937, |
| "learning_rate": 3.425277037850411e-05, |
| "loss": 1.7023, |
| "num_input_tokens_seen": 3338665984, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.1995, |
| "grad_norm": 0.7218210697174072, |
| "learning_rate": 3.419756247310581e-05, |
| "loss": 1.5999, |
| "num_input_tokens_seen": 3347054592, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.1546841859817505, |
| "learning_rate": 3.4142135623730954e-05, |
| "loss": 1.6385, |
| "num_input_tokens_seen": 3355443200, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2005, |
| "grad_norm": 0.7345328330993652, |
| "learning_rate": 3.408649068513013e-05, |
| "loss": 1.7926, |
| "num_input_tokens_seen": 3363831808, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.201, |
| "grad_norm": 0.7448058724403381, |
| "learning_rate": 3.403062851541712e-05, |
| "loss": 1.698, |
| "num_input_tokens_seen": 3372220416, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.2015, |
| "grad_norm": 0.912766695022583, |
| "learning_rate": 3.397454997605569e-05, |
| "loss": 1.6496, |
| "num_input_tokens_seen": 3380609024, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 0.7980219721794128, |
| "learning_rate": 3.391825593184629e-05, |
| "loss": 1.6537, |
| "num_input_tokens_seen": 3388997632, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2025, |
| "grad_norm": 0.7381040453910828, |
| "learning_rate": 3.3861747250912724e-05, |
| "loss": 1.6003, |
| "num_input_tokens_seen": 3397386240, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.203, |
| "grad_norm": 0.7264212369918823, |
| "learning_rate": 3.3805024804688745e-05, |
| "loss": 1.4873, |
| "num_input_tokens_seen": 3405774848, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2035, |
| "grad_norm": 0.7035699486732483, |
| "learning_rate": 3.374808946790466e-05, |
| "loss": 1.7535, |
| "num_input_tokens_seen": 3414163456, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 0.7741231918334961, |
| "learning_rate": 3.369094211857378e-05, |
| "loss": 1.6001, |
| "num_input_tokens_seen": 3422552064, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2045, |
| "grad_norm": 0.9337701201438904, |
| "learning_rate": 3.363358363797893e-05, |
| "loss": 1.7443, |
| "num_input_tokens_seen": 3430940672, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.205, |
| "grad_norm": 0.7415249347686768, |
| "learning_rate": 3.357601491065884e-05, |
| "loss": 1.5493, |
| "num_input_tokens_seen": 3439329280, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2055, |
| "grad_norm": 0.9262260794639587, |
| "learning_rate": 3.35182368243945e-05, |
| "loss": 1.5924, |
| "num_input_tokens_seen": 3447717888, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 0.9444292783737183, |
| "learning_rate": 3.346025027019547e-05, |
| "loss": 1.5166, |
| "num_input_tokens_seen": 3456106496, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2065, |
| "grad_norm": 0.9404441118240356, |
| "learning_rate": 3.3402056142286156e-05, |
| "loss": 1.4745, |
| "num_input_tokens_seen": 3464495104, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.207, |
| "grad_norm": 0.9392333030700684, |
| "learning_rate": 3.3343655338091996e-05, |
| "loss": 1.7033, |
| "num_input_tokens_seen": 3472883712, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2075, |
| "grad_norm": 0.8478058576583862, |
| "learning_rate": 3.328504875822564e-05, |
| "loss": 1.5295, |
| "num_input_tokens_seen": 3481272320, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.0962209701538086, |
| "learning_rate": 3.322623730647304e-05, |
| "loss": 1.485, |
| "num_input_tokens_seen": 3489660928, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2085, |
| "grad_norm": 0.8134574890136719, |
| "learning_rate": 3.316722188977955e-05, |
| "loss": 1.6648, |
| "num_input_tokens_seen": 3498049536, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.209, |
| "grad_norm": 0.7560186982154846, |
| "learning_rate": 3.310800341823588e-05, |
| "loss": 1.7324, |
| "num_input_tokens_seen": 3506438144, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2095, |
| "grad_norm": 0.8222687244415283, |
| "learning_rate": 3.3048582805064137e-05, |
| "loss": 1.7548, |
| "num_input_tokens_seen": 3514826752, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.6891204714775085, |
| "learning_rate": 3.298896096660367e-05, |
| "loss": 1.6091, |
| "num_input_tokens_seen": 3523215360, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2105, |
| "grad_norm": 0.8553364276885986, |
| "learning_rate": 3.2929138822297004e-05, |
| "loss": 1.6255, |
| "num_input_tokens_seen": 3531603968, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.211, |
| "grad_norm": 0.6773521900177002, |
| "learning_rate": 3.286911729467558e-05, |
| "loss": 1.6468, |
| "num_input_tokens_seen": 3539992576, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.2115, |
| "grad_norm": 0.6326610445976257, |
| "learning_rate": 3.280889730934562e-05, |
| "loss": 1.5726, |
| "num_input_tokens_seen": 3548381184, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 0.6619254946708679, |
| "learning_rate": 3.27484797949738e-05, |
| "loss": 1.7451, |
| "num_input_tokens_seen": 3556769792, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2125, |
| "grad_norm": 0.6987465620040894, |
| "learning_rate": 3.268786568327291e-05, |
| "loss": 1.5267, |
| "num_input_tokens_seen": 3565158400, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.213, |
| "grad_norm": 1.052083969116211, |
| "learning_rate": 3.262705590898756e-05, |
| "loss": 1.5756, |
| "num_input_tokens_seen": 3573547008, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.2135, |
| "grad_norm": 1.3585137128829956, |
| "learning_rate": 3.2566051409879676e-05, |
| "loss": 1.6222, |
| "num_input_tokens_seen": 3581935616, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 0.7190059423446655, |
| "learning_rate": 3.250485312671411e-05, |
| "loss": 1.5788, |
| "num_input_tokens_seen": 3590324224, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.2145, |
| "grad_norm": 1.0504647493362427, |
| "learning_rate": 3.244346200324409e-05, |
| "loss": 1.7213, |
| "num_input_tokens_seen": 3598712832, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.215, |
| "grad_norm": 1.4357564449310303, |
| "learning_rate": 3.238187898619669e-05, |
| "loss": 1.9118, |
| "num_input_tokens_seen": 3607101440, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2155, |
| "grad_norm": 0.7912888526916504, |
| "learning_rate": 3.23201050252582e-05, |
| "loss": 1.5837, |
| "num_input_tokens_seen": 3615490048, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 1.4020187854766846, |
| "learning_rate": 3.2258141073059533e-05, |
| "loss": 1.602, |
| "num_input_tokens_seen": 3623878656, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2165, |
| "grad_norm": 0.8430026173591614, |
| "learning_rate": 3.219598808516148e-05, |
| "loss": 1.5711, |
| "num_input_tokens_seen": 3632267264, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.217, |
| "grad_norm": 1.3351163864135742, |
| "learning_rate": 3.2133647020039995e-05, |
| "loss": 1.64, |
| "num_input_tokens_seen": 3640655872, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.2175, |
| "grad_norm": 0.8527347445487976, |
| "learning_rate": 3.207111883907143e-05, |
| "loss": 1.6443, |
| "num_input_tokens_seen": 3649044480, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 1.2249008417129517, |
| "learning_rate": 3.200840450651769e-05, |
| "loss": 1.7433, |
| "num_input_tokens_seen": 3657433088, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.2185, |
| "grad_norm": 0.8667119741439819, |
| "learning_rate": 3.194550498951134e-05, |
| "loss": 1.6459, |
| "num_input_tokens_seen": 3665821696, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.219, |
| "grad_norm": 1.0201963186264038, |
| "learning_rate": 3.188242125804078e-05, |
| "loss": 1.5644, |
| "num_input_tokens_seen": 3674210304, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2195, |
| "grad_norm": 0.6974561214447021, |
| "learning_rate": 3.181915428493515e-05, |
| "loss": 1.6249, |
| "num_input_tokens_seen": 3682598912, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.920744001865387, |
| "learning_rate": 3.1755705045849465e-05, |
| "loss": 1.5955, |
| "num_input_tokens_seen": 3690987520, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2205, |
| "grad_norm": 0.7391747832298279, |
| "learning_rate": 3.1692074519249476e-05, |
| "loss": 1.5939, |
| "num_input_tokens_seen": 3699376128, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.221, |
| "grad_norm": 0.7469002604484558, |
| "learning_rate": 3.1628263686396614e-05, |
| "loss": 1.9061, |
| "num_input_tokens_seen": 3707764736, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2215, |
| "grad_norm": 0.8305338025093079, |
| "learning_rate": 3.156427353133286e-05, |
| "loss": 1.7827, |
| "num_input_tokens_seen": 3716153344, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 0.666205644607544, |
| "learning_rate": 3.150010504086558e-05, |
| "loss": 1.5269, |
| "num_input_tokens_seen": 3724541952, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2225, |
| "grad_norm": 0.6806762218475342, |
| "learning_rate": 3.1435759204552246e-05, |
| "loss": 1.7997, |
| "num_input_tokens_seen": 3732930560, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.223, |
| "grad_norm": 0.6571435332298279, |
| "learning_rate": 3.1371237014685285e-05, |
| "loss": 1.6215, |
| "num_input_tokens_seen": 3741319168, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.2235, |
| "grad_norm": 0.685133159160614, |
| "learning_rate": 3.130653946627666e-05, |
| "loss": 1.7139, |
| "num_input_tokens_seen": 3749707776, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.6187507510185242, |
| "learning_rate": 3.124166755704261e-05, |
| "loss": 1.6145, |
| "num_input_tokens_seen": 3758096384, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2245, |
| "grad_norm": 0.5842682719230652, |
| "learning_rate": 3.117662228738823e-05, |
| "loss": 1.6303, |
| "num_input_tokens_seen": 3766484992, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.626196026802063, |
| "learning_rate": 3.111140466039205e-05, |
| "loss": 1.4281, |
| "num_input_tokens_seen": 3774873600, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2255, |
| "grad_norm": 0.8410570025444031, |
| "learning_rate": 3.104601568179054e-05, |
| "loss": 1.5349, |
| "num_input_tokens_seen": 3783262208, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 0.9013755321502686, |
| "learning_rate": 3.098045635996264e-05, |
| "loss": 1.6157, |
| "num_input_tokens_seen": 3791650816, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2265, |
| "grad_norm": 0.8393816351890564, |
| "learning_rate": 3.09147277059142e-05, |
| "loss": 1.5574, |
| "num_input_tokens_seen": 3800039424, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.227, |
| "grad_norm": 0.8969332575798035, |
| "learning_rate": 3.084883073326238e-05, |
| "loss": 1.8073, |
| "num_input_tokens_seen": 3808428032, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2275, |
| "grad_norm": 0.9891500473022461, |
| "learning_rate": 3.078276645822001e-05, |
| "loss": 1.6999, |
| "num_input_tokens_seen": 3816816640, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 0.6412311792373657, |
| "learning_rate": 3.0716535899579936e-05, |
| "loss": 1.7065, |
| "num_input_tokens_seen": 3825205248, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2285, |
| "grad_norm": 1.021301031112671, |
| "learning_rate": 3.065014007869931e-05, |
| "loss": 1.6141, |
| "num_input_tokens_seen": 3833593856, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.229, |
| "grad_norm": 0.933661162853241, |
| "learning_rate": 3.058358001948381e-05, |
| "loss": 1.5318, |
| "num_input_tokens_seen": 3841982464, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2295, |
| "grad_norm": 0.7375662922859192, |
| "learning_rate": 3.0516856748371914e-05, |
| "loss": 1.6712, |
| "num_input_tokens_seen": 3850371072, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.7946784496307373, |
| "learning_rate": 3.0449971294318977e-05, |
| "loss": 1.6786, |
| "num_input_tokens_seen": 3858759680, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2305, |
| "grad_norm": 0.8587263822555542, |
| "learning_rate": 3.0382924688781462e-05, |
| "loss": 1.6399, |
| "num_input_tokens_seen": 3867148288, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.231, |
| "grad_norm": 0.836588442325592, |
| "learning_rate": 3.031571796570095e-05, |
| "loss": 1.5581, |
| "num_input_tokens_seen": 3875536896, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2315, |
| "grad_norm": 0.70086270570755, |
| "learning_rate": 3.0248352161488267e-05, |
| "loss": 1.4948, |
| "num_input_tokens_seen": 3883925504, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.7697365283966064, |
| "learning_rate": 3.018082831500743e-05, |
| "loss": 1.6849, |
| "num_input_tokens_seen": 3892314112, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2325, |
| "grad_norm": 0.7808486819267273, |
| "learning_rate": 3.0113147467559697e-05, |
| "loss": 1.5426, |
| "num_input_tokens_seen": 3900702720, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.233, |
| "grad_norm": 0.6648643016815186, |
| "learning_rate": 3.004531066286745e-05, |
| "loss": 1.6239, |
| "num_input_tokens_seen": 3909091328, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2335, |
| "grad_norm": 0.6827794909477234, |
| "learning_rate": 2.997731894705815e-05, |
| "loss": 1.5759, |
| "num_input_tokens_seen": 3917479936, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 0.7341679334640503, |
| "learning_rate": 2.9909173368648154e-05, |
| "loss": 1.7485, |
| "num_input_tokens_seen": 3925868544, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2345, |
| "grad_norm": 0.6696147322654724, |
| "learning_rate": 2.9840874978526582e-05, |
| "loss": 1.5589, |
| "num_input_tokens_seen": 3934257152, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.235, |
| "grad_norm": 0.6791352033615112, |
| "learning_rate": 2.9772424829939103e-05, |
| "loss": 1.5359, |
| "num_input_tokens_seen": 3942645760, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2355, |
| "grad_norm": 0.5946401357650757, |
| "learning_rate": 2.9703823978471676e-05, |
| "loss": 1.6936, |
| "num_input_tokens_seen": 3951034368, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 0.5652855038642883, |
| "learning_rate": 2.9635073482034307e-05, |
| "loss": 1.5851, |
| "num_input_tokens_seen": 3959422976, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2365, |
| "grad_norm": 0.6535623669624329, |
| "learning_rate": 2.9566174400844692e-05, |
| "loss": 1.6653, |
| "num_input_tokens_seen": 3967811584, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.237, |
| "grad_norm": 0.6742349863052368, |
| "learning_rate": 2.949712779741189e-05, |
| "loss": 1.5506, |
| "num_input_tokens_seen": 3976200192, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2375, |
| "grad_norm": 0.8731011748313904, |
| "learning_rate": 2.9427934736519962e-05, |
| "loss": 1.6377, |
| "num_input_tokens_seen": 3984588800, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 0.834618866443634, |
| "learning_rate": 2.935859628521147e-05, |
| "loss": 1.6262, |
| "num_input_tokens_seen": 3992977408, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2385, |
| "grad_norm": 0.7309226393699646, |
| "learning_rate": 2.9289113512771133e-05, |
| "loss": 1.6611, |
| "num_input_tokens_seen": 4001366016, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.239, |
| "grad_norm": 0.5517134070396423, |
| "learning_rate": 2.921948749070925e-05, |
| "loss": 1.6789, |
| "num_input_tokens_seen": 4009754624, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2395, |
| "grad_norm": 0.6876691579818726, |
| "learning_rate": 2.914971929274521e-05, |
| "loss": 1.5779, |
| "num_input_tokens_seen": 4018143232, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.7181439399719238, |
| "learning_rate": 2.9079809994790937e-05, |
| "loss": 1.7478, |
| "num_input_tokens_seen": 4026531840, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2405, |
| "grad_norm": 0.546655535697937, |
| "learning_rate": 2.900976067493429e-05, |
| "loss": 1.6137, |
| "num_input_tokens_seen": 4034920448, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.241, |
| "grad_norm": 0.8767627477645874, |
| "learning_rate": 2.8939572413422426e-05, |
| "loss": 1.7662, |
| "num_input_tokens_seen": 4043309056, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2415, |
| "grad_norm": 1.0640788078308105, |
| "learning_rate": 2.886924629264517e-05, |
| "loss": 1.6892, |
| "num_input_tokens_seen": 4051697664, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 0.7425519824028015, |
| "learning_rate": 2.8798783397118305e-05, |
| "loss": 1.6408, |
| "num_input_tokens_seen": 4060086272, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.2425, |
| "grad_norm": 0.5955070853233337, |
| "learning_rate": 2.872818481346684e-05, |
| "loss": 1.6736, |
| "num_input_tokens_seen": 4068474880, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.243, |
| "grad_norm": 0.7473011612892151, |
| "learning_rate": 2.8657451630408287e-05, |
| "loss": 1.6427, |
| "num_input_tokens_seen": 4076863488, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.2435, |
| "grad_norm": 0.7602230310440063, |
| "learning_rate": 2.85865849387358e-05, |
| "loss": 1.7862, |
| "num_input_tokens_seen": 4085252096, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 0.5774588584899902, |
| "learning_rate": 2.8515585831301456e-05, |
| "loss": 1.4897, |
| "num_input_tokens_seen": 4093640704, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.2445, |
| "grad_norm": 1.0716197490692139, |
| "learning_rate": 2.844445540299931e-05, |
| "loss": 1.7469, |
| "num_input_tokens_seen": 4102029312, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.245, |
| "grad_norm": 0.9976931810379028, |
| "learning_rate": 2.8373194750748566e-05, |
| "loss": 1.7009, |
| "num_input_tokens_seen": 4110417920, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2455, |
| "grad_norm": 0.6268777251243591, |
| "learning_rate": 2.8301804973476628e-05, |
| "loss": 1.5147, |
| "num_input_tokens_seen": 4118806528, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 0.8679307103157043, |
| "learning_rate": 2.823028717210218e-05, |
| "loss": 1.6155, |
| "num_input_tokens_seen": 4127195136, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2465, |
| "grad_norm": 0.7973881959915161, |
| "learning_rate": 2.8158642449518186e-05, |
| "loss": 1.6257, |
| "num_input_tokens_seen": 4135583744, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.247, |
| "grad_norm": 0.5766689777374268, |
| "learning_rate": 2.8086871910574904e-05, |
| "loss": 1.6149, |
| "num_input_tokens_seen": 4143972352, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.2475, |
| "grad_norm": 0.6986018419265747, |
| "learning_rate": 2.8014976662062818e-05, |
| "loss": 1.571, |
| "num_input_tokens_seen": 4152360960, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.9588996171951294, |
| "learning_rate": 2.7942957812695613e-05, |
| "loss": 1.6737, |
| "num_input_tokens_seen": 4160749568, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.2485, |
| "grad_norm": 0.5572625398635864, |
| "learning_rate": 2.787081647309303e-05, |
| "loss": 1.6634, |
| "num_input_tokens_seen": 4169138176, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.249, |
| "grad_norm": 0.7389435172080994, |
| "learning_rate": 2.7798553755763768e-05, |
| "loss": 1.4613, |
| "num_input_tokens_seen": 4177526784, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2495, |
| "grad_norm": 0.7594336867332458, |
| "learning_rate": 2.7726170775088324e-05, |
| "loss": 1.7553, |
| "num_input_tokens_seen": 4185915392, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5780491232872009, |
| "learning_rate": 2.7653668647301797e-05, |
| "loss": 1.674, |
| "num_input_tokens_seen": 4194304000, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2505, |
| "grad_norm": 0.7985231876373291, |
| "learning_rate": 2.7581048490476695e-05, |
| "loss": 1.6012, |
| "num_input_tokens_seen": 4202692608, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.251, |
| "grad_norm": 0.6286901831626892, |
| "learning_rate": 2.7508311424505665e-05, |
| "loss": 1.8672, |
| "num_input_tokens_seen": 4211081216, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.2515, |
| "grad_norm": 0.7128638029098511, |
| "learning_rate": 2.7435458571084247e-05, |
| "loss": 1.5799, |
| "num_input_tokens_seen": 4219469824, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 0.6798509359359741, |
| "learning_rate": 2.7362491053693564e-05, |
| "loss": 1.6685, |
| "num_input_tokens_seen": 4227858432, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.2525, |
| "grad_norm": 0.8275137543678284, |
| "learning_rate": 2.7289409997583002e-05, |
| "loss": 1.6989, |
| "num_input_tokens_seen": 4236247040, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.253, |
| "grad_norm": 0.5991443991661072, |
| "learning_rate": 2.7216216529752836e-05, |
| "loss": 1.509, |
| "num_input_tokens_seen": 4244635648, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.2535, |
| "grad_norm": 0.7469322681427002, |
| "learning_rate": 2.7142911778936913e-05, |
| "loss": 1.4821, |
| "num_input_tokens_seen": 4253024256, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 0.7251424789428711, |
| "learning_rate": 2.7069496875585145e-05, |
| "loss": 1.6557, |
| "num_input_tokens_seen": 4261412864, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.2545, |
| "grad_norm": 0.5844877362251282, |
| "learning_rate": 2.6995972951846177e-05, |
| "loss": 1.6354, |
| "num_input_tokens_seen": 4269801472, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.255, |
| "grad_norm": 0.6762341260910034, |
| "learning_rate": 2.692234114154986e-05, |
| "loss": 1.6662, |
| "num_input_tokens_seen": 4278190080, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2555, |
| "grad_norm": 0.6216310262680054, |
| "learning_rate": 2.68486025801898e-05, |
| "loss": 1.6086, |
| "num_input_tokens_seen": 4286578688, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.6491274833679199, |
| "learning_rate": 2.6774758404905833e-05, |
| "loss": 1.6203, |
| "num_input_tokens_seen": 4294967296, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2565, |
| "grad_norm": 0.5345763564109802, |
| "learning_rate": 2.670080975446648e-05, |
| "loss": 1.7299, |
| "num_input_tokens_seen": 4303355904, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.257, |
| "grad_norm": 0.60438072681427, |
| "learning_rate": 2.662675776925142e-05, |
| "loss": 1.5607, |
| "num_input_tokens_seen": 4311744512, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.2575, |
| "grad_norm": 0.48150795698165894, |
| "learning_rate": 2.6552603591233875e-05, |
| "loss": 1.5468, |
| "num_input_tokens_seen": 4320133120, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 0.7575556635856628, |
| "learning_rate": 2.647834836396299e-05, |
| "loss": 1.5371, |
| "num_input_tokens_seen": 4328521728, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2585, |
| "grad_norm": 0.8684462308883667, |
| "learning_rate": 2.6403993232546235e-05, |
| "loss": 1.3988, |
| "num_input_tokens_seen": 4336910336, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.259, |
| "grad_norm": 1.1402355432510376, |
| "learning_rate": 2.6329539343631725e-05, |
| "loss": 1.5219, |
| "num_input_tokens_seen": 4345298944, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.2595, |
| "grad_norm": 0.7717565894126892, |
| "learning_rate": 2.625498784539052e-05, |
| "loss": 1.6181, |
| "num_input_tokens_seen": 4353687552, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.6470482349395752, |
| "learning_rate": 2.618033988749895e-05, |
| "loss": 1.8013, |
| "num_input_tokens_seen": 4362076160, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2605, |
| "grad_norm": 0.9011819958686829, |
| "learning_rate": 2.6105596621120873e-05, |
| "loss": 1.5728, |
| "num_input_tokens_seen": 4370464768, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.261, |
| "grad_norm": 0.9903879761695862, |
| "learning_rate": 2.6030759198889915e-05, |
| "loss": 1.5021, |
| "num_input_tokens_seen": 4378853376, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2615, |
| "grad_norm": 0.6056469678878784, |
| "learning_rate": 2.595582877489171e-05, |
| "loss": 1.5722, |
| "num_input_tokens_seen": 4387241984, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 0.7082294225692749, |
| "learning_rate": 2.588080650464608e-05, |
| "loss": 1.5256, |
| "num_input_tokens_seen": 4395630592, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.2625, |
| "grad_norm": 0.8824757933616638, |
| "learning_rate": 2.580569354508925e-05, |
| "loss": 1.6759, |
| "num_input_tokens_seen": 4404019200, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.263, |
| "grad_norm": 0.6927574276924133, |
| "learning_rate": 2.573049105455597e-05, |
| "loss": 1.5, |
| "num_input_tokens_seen": 4412407808, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2635, |
| "grad_norm": 0.8164727687835693, |
| "learning_rate": 2.5655200192761668e-05, |
| "loss": 1.4772, |
| "num_input_tokens_seen": 4420796416, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.6587943434715271, |
| "learning_rate": 2.557982212078459e-05, |
| "loss": 1.6043, |
| "num_input_tokens_seen": 4429185024, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.2645, |
| "grad_norm": 0.6168190836906433, |
| "learning_rate": 2.550435800104783e-05, |
| "loss": 1.5051, |
| "num_input_tokens_seen": 4437573632, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.265, |
| "grad_norm": 0.6624361872673035, |
| "learning_rate": 2.5428808997301486e-05, |
| "loss": 1.6477, |
| "num_input_tokens_seen": 4445962240, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2655, |
| "grad_norm": 0.6016016006469727, |
| "learning_rate": 2.535317627460465e-05, |
| "loss": 1.7103, |
| "num_input_tokens_seen": 4454350848, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 0.7597566246986389, |
| "learning_rate": 2.5277460999307462e-05, |
| "loss": 1.5454, |
| "num_input_tokens_seen": 4462739456, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2665, |
| "grad_norm": 0.5779781937599182, |
| "learning_rate": 2.5201664339033138e-05, |
| "loss": 1.74, |
| "num_input_tokens_seen": 4471128064, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.267, |
| "grad_norm": 0.7097752094268799, |
| "learning_rate": 2.5125787462659937e-05, |
| "loss": 1.6777, |
| "num_input_tokens_seen": 4479516672, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.2675, |
| "grad_norm": 0.6358765959739685, |
| "learning_rate": 2.504983154030316e-05, |
| "loss": 1.69, |
| "num_input_tokens_seen": 4487905280, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 0.8313521146774292, |
| "learning_rate": 2.4973797743297103e-05, |
| "loss": 1.427, |
| "num_input_tokens_seen": 4496293888, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2685, |
| "grad_norm": 1.0910331010818481, |
| "learning_rate": 2.489768724417695e-05, |
| "loss": 1.7318, |
| "num_input_tokens_seen": 4504682496, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.269, |
| "grad_norm": 0.7883654236793518, |
| "learning_rate": 2.4821501216660778e-05, |
| "loss": 1.7768, |
| "num_input_tokens_seen": 4513071104, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.2695, |
| "grad_norm": 0.814295768737793, |
| "learning_rate": 2.474524083563136e-05, |
| "loss": 1.6363, |
| "num_input_tokens_seen": 4521459712, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.8142704367637634, |
| "learning_rate": 2.4668907277118114e-05, |
| "loss": 1.8271, |
| "num_input_tokens_seen": 4529848320, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2705, |
| "grad_norm": 0.6927081942558289, |
| "learning_rate": 2.459250171827894e-05, |
| "loss": 1.5607, |
| "num_input_tokens_seen": 4538236928, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.271, |
| "grad_norm": 0.7037166357040405, |
| "learning_rate": 2.4516025337382078e-05, |
| "loss": 1.606, |
| "num_input_tokens_seen": 4546625536, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2715, |
| "grad_norm": 0.7414228916168213, |
| "learning_rate": 2.443947931378792e-05, |
| "loss": 1.6102, |
| "num_input_tokens_seen": 4555014144, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.734017014503479, |
| "learning_rate": 2.4362864827930855e-05, |
| "loss": 1.6308, |
| "num_input_tokens_seen": 4563402752, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2725, |
| "grad_norm": 0.8329829573631287, |
| "learning_rate": 2.4286183061301016e-05, |
| "loss": 1.6925, |
| "num_input_tokens_seen": 4571791360, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.273, |
| "grad_norm": 0.598017692565918, |
| "learning_rate": 2.4209435196426112e-05, |
| "loss": 1.4737, |
| "num_input_tokens_seen": 4580179968, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.2735, |
| "grad_norm": 0.6858911514282227, |
| "learning_rate": 2.4132622416853164e-05, |
| "loss": 1.6603, |
| "num_input_tokens_seen": 4588568576, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 0.6639436483383179, |
| "learning_rate": 2.405574590713025e-05, |
| "loss": 1.5032, |
| "num_input_tokens_seen": 4596957184, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.2745, |
| "grad_norm": 0.6341187357902527, |
| "learning_rate": 2.3978806852788253e-05, |
| "loss": 1.7347, |
| "num_input_tokens_seen": 4605345792, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.5443724393844604, |
| "learning_rate": 2.390180644032257e-05, |
| "loss": 1.6864, |
| "num_input_tokens_seen": 4613734400, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2755, |
| "grad_norm": 0.6698442101478577, |
| "learning_rate": 2.382474585717481e-05, |
| "loss": 1.7269, |
| "num_input_tokens_seen": 4622123008, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 0.508804976940155, |
| "learning_rate": 2.37476262917145e-05, |
| "loss": 1.6529, |
| "num_input_tokens_seen": 4630511616, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2765, |
| "grad_norm": 0.6291599869728088, |
| "learning_rate": 2.3670448933220732e-05, |
| "loss": 1.6619, |
| "num_input_tokens_seen": 4638900224, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.277, |
| "grad_norm": 0.591014564037323, |
| "learning_rate": 2.3593214971863857e-05, |
| "loss": 1.6256, |
| "num_input_tokens_seen": 4647288832, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2775, |
| "grad_norm": 0.5783071517944336, |
| "learning_rate": 2.3515925598687097e-05, |
| "loss": 1.74, |
| "num_input_tokens_seen": 4655677440, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 0.539421558380127, |
| "learning_rate": 2.3438582005588192e-05, |
| "loss": 1.6832, |
| "num_input_tokens_seen": 4664066048, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2785, |
| "grad_norm": 0.5550200343132019, |
| "learning_rate": 2.3361185385301042e-05, |
| "loss": 1.6972, |
| "num_input_tokens_seen": 4672454656, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.279, |
| "grad_norm": 0.6679721474647522, |
| "learning_rate": 2.328373693137726e-05, |
| "loss": 1.6456, |
| "num_input_tokens_seen": 4680843264, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2795, |
| "grad_norm": 0.49843743443489075, |
| "learning_rate": 2.3206237838167825e-05, |
| "loss": 1.6452, |
| "num_input_tokens_seen": 4689231872, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.6135469079017639, |
| "learning_rate": 2.312868930080462e-05, |
| "loss": 1.6034, |
| "num_input_tokens_seen": 4697620480, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2805, |
| "grad_norm": 0.5259789824485779, |
| "learning_rate": 2.3051092515182022e-05, |
| "loss": 1.5512, |
| "num_input_tokens_seen": 4706009088, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.281, |
| "grad_norm": 0.6396327614784241, |
| "learning_rate": 2.2973448677938466e-05, |
| "loss": 1.6086, |
| "num_input_tokens_seen": 4714397696, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.2815, |
| "grad_norm": 0.523544430732727, |
| "learning_rate": 2.289575898643796e-05, |
| "loss": 1.5549, |
| "num_input_tokens_seen": 4722786304, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 0.5481888651847839, |
| "learning_rate": 2.2818024638751655e-05, |
| "loss": 1.6328, |
| "num_input_tokens_seen": 4731174912, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2825, |
| "grad_norm": 0.5783687233924866, |
| "learning_rate": 2.2740246833639366e-05, |
| "loss": 1.7459, |
| "num_input_tokens_seen": 4739563520, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.283, |
| "grad_norm": 0.5406476259231567, |
| "learning_rate": 2.266242677053105e-05, |
| "loss": 1.481, |
| "num_input_tokens_seen": 4747952128, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.2835, |
| "grad_norm": 0.7908449172973633, |
| "learning_rate": 2.2584565649508355e-05, |
| "loss": 1.6422, |
| "num_input_tokens_seen": 4756340736, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 0.7007967829704285, |
| "learning_rate": 2.2506664671286087e-05, |
| "loss": 1.6323, |
| "num_input_tokens_seen": 4764729344, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2845, |
| "grad_norm": 0.5286893248558044, |
| "learning_rate": 2.2428725037193697e-05, |
| "loss": 1.6692, |
| "num_input_tokens_seen": 4773117952, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.285, |
| "grad_norm": 0.7265971899032593, |
| "learning_rate": 2.2350747949156756e-05, |
| "loss": 1.6742, |
| "num_input_tokens_seen": 4781506560, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2855, |
| "grad_norm": 0.592954158782959, |
| "learning_rate": 2.2272734609678426e-05, |
| "loss": 1.5685, |
| "num_input_tokens_seen": 4789895168, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 0.532684862613678, |
| "learning_rate": 2.2194686221820905e-05, |
| "loss": 1.6014, |
| "num_input_tokens_seen": 4798283776, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.2865, |
| "grad_norm": 0.5546914339065552, |
| "learning_rate": 2.2116603989186895e-05, |
| "loss": 1.4608, |
| "num_input_tokens_seen": 4806672384, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.287, |
| "grad_norm": 0.654141366481781, |
| "learning_rate": 2.2038489115901e-05, |
| "loss": 1.4977, |
| "num_input_tokens_seen": 4815060992, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.2875, |
| "grad_norm": 0.48651596903800964, |
| "learning_rate": 2.196034280659122e-05, |
| "loss": 1.591, |
| "num_input_tokens_seen": 4823449600, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.6360925436019897, |
| "learning_rate": 2.1882166266370292e-05, |
| "loss": 1.5767, |
| "num_input_tokens_seen": 4831838208, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2885, |
| "grad_norm": 0.48844876885414124, |
| "learning_rate": 2.1803960700817185e-05, |
| "loss": 1.8562, |
| "num_input_tokens_seen": 4840226816, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.289, |
| "grad_norm": 0.6248428821563721, |
| "learning_rate": 2.1725727315958473e-05, |
| "loss": 1.5894, |
| "num_input_tokens_seen": 4848615424, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.2895, |
| "grad_norm": 0.7215993404388428, |
| "learning_rate": 2.1647467318249715e-05, |
| "loss": 1.6851, |
| "num_input_tokens_seen": 4857004032, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.6206884980201721, |
| "learning_rate": 2.1569181914556904e-05, |
| "loss": 1.5265, |
| "num_input_tokens_seen": 4865392640, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2905, |
| "grad_norm": 0.5436145067214966, |
| "learning_rate": 2.1490872312137795e-05, |
| "loss": 1.7174, |
| "num_input_tokens_seen": 4873781248, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.291, |
| "grad_norm": 0.6190699338912964, |
| "learning_rate": 2.1412539718623337e-05, |
| "loss": 1.3116, |
| "num_input_tokens_seen": 4882169856, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.2915, |
| "grad_norm": 0.6792662143707275, |
| "learning_rate": 2.1334185341999024e-05, |
| "loss": 1.5902, |
| "num_input_tokens_seen": 4890558464, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 0.5043581128120422, |
| "learning_rate": 2.125581039058627e-05, |
| "loss": 1.6654, |
| "num_input_tokens_seen": 4898947072, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2925, |
| "grad_norm": 0.5902267694473267, |
| "learning_rate": 2.117741607302378e-05, |
| "loss": 1.6671, |
| "num_input_tokens_seen": 4907335680, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.293, |
| "grad_norm": 0.4893746078014374, |
| "learning_rate": 2.109900359824892e-05, |
| "loss": 1.6003, |
| "num_input_tokens_seen": 4915724288, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.2935, |
| "grad_norm": 0.6055283546447754, |
| "learning_rate": 2.1020574175479035e-05, |
| "loss": 1.872, |
| "num_input_tokens_seen": 4924112896, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 0.5110766291618347, |
| "learning_rate": 2.0942129014192854e-05, |
| "loss": 1.5215, |
| "num_input_tokens_seen": 4932501504, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2945, |
| "grad_norm": 0.6477858424186707, |
| "learning_rate": 2.0863669324111807e-05, |
| "loss": 1.6343, |
| "num_input_tokens_seen": 4940890112, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.295, |
| "grad_norm": 0.5806403160095215, |
| "learning_rate": 2.0785196315181374e-05, |
| "loss": 1.4936, |
| "num_input_tokens_seen": 4949278720, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2955, |
| "grad_norm": 0.638688862323761, |
| "learning_rate": 2.0706711197552427e-05, |
| "loss": 1.4357, |
| "num_input_tokens_seen": 4957667328, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.5387598872184753, |
| "learning_rate": 2.0628215181562567e-05, |
| "loss": 1.8016, |
| "num_input_tokens_seen": 4966055936, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2965, |
| "grad_norm": 1.0725306272506714, |
| "learning_rate": 2.054970947771747e-05, |
| "loss": 1.8565, |
| "num_input_tokens_seen": 4974444544, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.297, |
| "grad_norm": 0.649124801158905, |
| "learning_rate": 2.0471195296672207e-05, |
| "loss": 1.5579, |
| "num_input_tokens_seen": 4982833152, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.2975, |
| "grad_norm": 0.7250737547874451, |
| "learning_rate": 2.0392673849212565e-05, |
| "loss": 1.5504, |
| "num_input_tokens_seen": 4991221760, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 0.578586757183075, |
| "learning_rate": 2.0314146346236415e-05, |
| "loss": 1.5464, |
| "num_input_tokens_seen": 4999610368, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2985, |
| "grad_norm": 0.6575354933738708, |
| "learning_rate": 2.0235613998734985e-05, |
| "loss": 1.818, |
| "num_input_tokens_seen": 5007998976, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.299, |
| "grad_norm": 0.4576837718486786, |
| "learning_rate": 2.0157078017774228e-05, |
| "loss": 1.7311, |
| "num_input_tokens_seen": 5016387584, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.2995, |
| "grad_norm": 0.6546321511268616, |
| "learning_rate": 2.0078539614476122e-05, |
| "loss": 1.8055, |
| "num_input_tokens_seen": 5024776192, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.6534265279769897, |
| "learning_rate": 2e-05, |
| "loss": 1.5457, |
| "num_input_tokens_seen": 5033164800, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3005, |
| "grad_norm": 0.687644898891449, |
| "learning_rate": 1.9921460385523884e-05, |
| "loss": 1.5855, |
| "num_input_tokens_seen": 5041553408, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.301, |
| "grad_norm": 0.5359103679656982, |
| "learning_rate": 1.9842921982225782e-05, |
| "loss": 1.5351, |
| "num_input_tokens_seen": 5049942016, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3015, |
| "grad_norm": 0.4846513569355011, |
| "learning_rate": 1.9764386001265015e-05, |
| "loss": 1.518, |
| "num_input_tokens_seen": 5058330624, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 0.59798663854599, |
| "learning_rate": 1.9685853653763592e-05, |
| "loss": 1.5217, |
| "num_input_tokens_seen": 5066719232, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3025, |
| "grad_norm": 0.45739227533340454, |
| "learning_rate": 1.960732615078744e-05, |
| "loss": 1.4634, |
| "num_input_tokens_seen": 5075107840, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.303, |
| "grad_norm": 0.5035345554351807, |
| "learning_rate": 1.95288047033278e-05, |
| "loss": 1.5809, |
| "num_input_tokens_seen": 5083496448, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.3035, |
| "grad_norm": 0.4467843174934387, |
| "learning_rate": 1.9450290522282533e-05, |
| "loss": 1.5883, |
| "num_input_tokens_seen": 5091885056, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.6839386224746704, |
| "learning_rate": 1.9371784818437436e-05, |
| "loss": 1.5005, |
| "num_input_tokens_seen": 5100273664, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3045, |
| "grad_norm": 0.4985339343547821, |
| "learning_rate": 1.929328880244758e-05, |
| "loss": 1.5321, |
| "num_input_tokens_seen": 5108662272, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.305, |
| "grad_norm": 0.581303596496582, |
| "learning_rate": 1.9214803684818636e-05, |
| "loss": 1.5554, |
| "num_input_tokens_seen": 5117050880, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3055, |
| "grad_norm": 0.4922437369823456, |
| "learning_rate": 1.9136330675888192e-05, |
| "loss": 1.5549, |
| "num_input_tokens_seen": 5125439488, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 0.5247146487236023, |
| "learning_rate": 1.905787098580715e-05, |
| "loss": 1.9025, |
| "num_input_tokens_seen": 5133828096, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.3065, |
| "grad_norm": 0.5503407120704651, |
| "learning_rate": 1.897942582452097e-05, |
| "loss": 1.6479, |
| "num_input_tokens_seen": 5142216704, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.307, |
| "grad_norm": 0.5089061260223389, |
| "learning_rate": 1.890099640175109e-05, |
| "loss": 1.6435, |
| "num_input_tokens_seen": 5150605312, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3075, |
| "grad_norm": 0.5309303998947144, |
| "learning_rate": 1.882258392697622e-05, |
| "loss": 1.5801, |
| "num_input_tokens_seen": 5158993920, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 0.45839405059814453, |
| "learning_rate": 1.8744189609413733e-05, |
| "loss": 1.6478, |
| "num_input_tokens_seen": 5167382528, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3085, |
| "grad_norm": 0.5259501934051514, |
| "learning_rate": 1.8665814658000982e-05, |
| "loss": 1.6075, |
| "num_input_tokens_seen": 5175771136, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.309, |
| "grad_norm": 0.3942556381225586, |
| "learning_rate": 1.8587460281376673e-05, |
| "loss": 1.7839, |
| "num_input_tokens_seen": 5184159744, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3095, |
| "grad_norm": 0.5850613117218018, |
| "learning_rate": 1.8509127687862208e-05, |
| "loss": 1.5596, |
| "num_input_tokens_seen": 5192548352, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.523441731929779, |
| "learning_rate": 1.8430818085443106e-05, |
| "loss": 1.5867, |
| "num_input_tokens_seen": 5200936960, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3105, |
| "grad_norm": 0.5120038986206055, |
| "learning_rate": 1.835253268175029e-05, |
| "loss": 1.5351, |
| "num_input_tokens_seen": 5209325568, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.311, |
| "grad_norm": 0.5440953969955444, |
| "learning_rate": 1.8274272684041537e-05, |
| "loss": 1.5445, |
| "num_input_tokens_seen": 5217714176, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3115, |
| "grad_norm": 0.6131273508071899, |
| "learning_rate": 1.8196039299182818e-05, |
| "loss": 1.541, |
| "num_input_tokens_seen": 5226102784, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.4762137234210968, |
| "learning_rate": 1.8117833733629715e-05, |
| "loss": 1.5922, |
| "num_input_tokens_seen": 5234491392, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 0.5485303401947021, |
| "learning_rate": 1.8039657193408788e-05, |
| "loss": 1.5588, |
| "num_input_tokens_seen": 5242880000, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.313, |
| "grad_norm": 0.4996958374977112, |
| "learning_rate": 1.7961510884099005e-05, |
| "loss": 1.591, |
| "num_input_tokens_seen": 5251268608, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3135, |
| "grad_norm": 0.5148504972457886, |
| "learning_rate": 1.7883396010813116e-05, |
| "loss": 1.5337, |
| "num_input_tokens_seen": 5259657216, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 0.4594258666038513, |
| "learning_rate": 1.7805313778179095e-05, |
| "loss": 1.7369, |
| "num_input_tokens_seen": 5268045824, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3145, |
| "grad_norm": 0.5273151397705078, |
| "learning_rate": 1.772726539032158e-05, |
| "loss": 1.4446, |
| "num_input_tokens_seen": 5276434432, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.315, |
| "grad_norm": 0.5533626079559326, |
| "learning_rate": 1.764925205084325e-05, |
| "loss": 1.5265, |
| "num_input_tokens_seen": 5284823040, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3155, |
| "grad_norm": 0.4903721213340759, |
| "learning_rate": 1.7571274962806316e-05, |
| "loss": 1.7801, |
| "num_input_tokens_seen": 5293211648, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 0.49773064255714417, |
| "learning_rate": 1.7493335328713913e-05, |
| "loss": 1.701, |
| "num_input_tokens_seen": 5301600256, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3165, |
| "grad_norm": 0.4572118818759918, |
| "learning_rate": 1.741543435049165e-05, |
| "loss": 1.6753, |
| "num_input_tokens_seen": 5309988864, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.317, |
| "grad_norm": 0.5031276941299438, |
| "learning_rate": 1.7337573229468958e-05, |
| "loss": 1.5555, |
| "num_input_tokens_seen": 5318377472, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3175, |
| "grad_norm": 0.5255052447319031, |
| "learning_rate": 1.7259753166360644e-05, |
| "loss": 1.4977, |
| "num_input_tokens_seen": 5326766080, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 0.5148425698280334, |
| "learning_rate": 1.7181975361248348e-05, |
| "loss": 1.5357, |
| "num_input_tokens_seen": 5335154688, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3185, |
| "grad_norm": 0.5783670544624329, |
| "learning_rate": 1.7104241013562045e-05, |
| "loss": 1.6418, |
| "num_input_tokens_seen": 5343543296, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.319, |
| "grad_norm": 0.40350809693336487, |
| "learning_rate": 1.702655132206154e-05, |
| "loss": 1.6714, |
| "num_input_tokens_seen": 5351931904, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3195, |
| "grad_norm": 0.49052226543426514, |
| "learning_rate": 1.6948907484817985e-05, |
| "loss": 1.5475, |
| "num_input_tokens_seen": 5360320512, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.45277076959609985, |
| "learning_rate": 1.687131069919538e-05, |
| "loss": 1.5783, |
| "num_input_tokens_seen": 5368709120, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3205, |
| "grad_norm": 0.5141287446022034, |
| "learning_rate": 1.679376216183218e-05, |
| "loss": 1.6031, |
| "num_input_tokens_seen": 5377097728, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.321, |
| "grad_norm": 0.37356165051460266, |
| "learning_rate": 1.6716263068622744e-05, |
| "loss": 1.5794, |
| "num_input_tokens_seen": 5385486336, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.3215, |
| "grad_norm": 0.4839518666267395, |
| "learning_rate": 1.6638814614698965e-05, |
| "loss": 1.6072, |
| "num_input_tokens_seen": 5393874944, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 0.4161027669906616, |
| "learning_rate": 1.6561417994411808e-05, |
| "loss": 1.8477, |
| "num_input_tokens_seen": 5402263552, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3225, |
| "grad_norm": 0.5627133846282959, |
| "learning_rate": 1.648407440131291e-05, |
| "loss": 1.629, |
| "num_input_tokens_seen": 5410652160, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.323, |
| "grad_norm": 0.45590704679489136, |
| "learning_rate": 1.640678502813615e-05, |
| "loss": 1.6007, |
| "num_input_tokens_seen": 5419040768, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3235, |
| "grad_norm": 0.6110590696334839, |
| "learning_rate": 1.6329551066779278e-05, |
| "loss": 1.6822, |
| "num_input_tokens_seen": 5427429376, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 0.442785382270813, |
| "learning_rate": 1.6252373708285505e-05, |
| "loss": 1.8112, |
| "num_input_tokens_seen": 5435817984, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3245, |
| "grad_norm": 0.4794580340385437, |
| "learning_rate": 1.6175254142825196e-05, |
| "loss": 1.59, |
| "num_input_tokens_seen": 5444206592, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.4918476641178131, |
| "learning_rate": 1.609819355967744e-05, |
| "loss": 1.6212, |
| "num_input_tokens_seen": 5452595200, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3255, |
| "grad_norm": 0.5037944316864014, |
| "learning_rate": 1.602119314721175e-05, |
| "loss": 1.4959, |
| "num_input_tokens_seen": 5460983808, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 0.638948380947113, |
| "learning_rate": 1.5944254092869756e-05, |
| "loss": 1.6125, |
| "num_input_tokens_seen": 5469372416, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3265, |
| "grad_norm": 0.46257832646369934, |
| "learning_rate": 1.5867377583146836e-05, |
| "loss": 1.7298, |
| "num_input_tokens_seen": 5477761024, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.327, |
| "grad_norm": 0.5825093984603882, |
| "learning_rate": 1.579056480357389e-05, |
| "loss": 1.6123, |
| "num_input_tokens_seen": 5486149632, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3275, |
| "grad_norm": 0.4797838628292084, |
| "learning_rate": 1.571381693869899e-05, |
| "loss": 1.6064, |
| "num_input_tokens_seen": 5494538240, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.6016621589660645, |
| "learning_rate": 1.5637135172069155e-05, |
| "loss": 1.535, |
| "num_input_tokens_seen": 5502926848, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3285, |
| "grad_norm": 0.48697134852409363, |
| "learning_rate": 1.5560520686212083e-05, |
| "loss": 1.7159, |
| "num_input_tokens_seen": 5511315456, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.329, |
| "grad_norm": 0.5528718829154968, |
| "learning_rate": 1.548397466261793e-05, |
| "loss": 1.5113, |
| "num_input_tokens_seen": 5519704064, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3295, |
| "grad_norm": 0.48319679498672485, |
| "learning_rate": 1.5407498281721063e-05, |
| "loss": 1.6741, |
| "num_input_tokens_seen": 5528092672, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.4717366099357605, |
| "learning_rate": 1.53310927228819e-05, |
| "loss": 1.5487, |
| "num_input_tokens_seen": 5536481280, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3305, |
| "grad_norm": 0.4618384838104248, |
| "learning_rate": 1.5254759164368644e-05, |
| "loss": 1.7316, |
| "num_input_tokens_seen": 5544869888, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.331, |
| "grad_norm": 0.7245141267776489, |
| "learning_rate": 1.517849878333923e-05, |
| "loss": 1.7489, |
| "num_input_tokens_seen": 5553258496, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.3315, |
| "grad_norm": 0.41874098777770996, |
| "learning_rate": 1.5102312755823053e-05, |
| "loss": 1.6518, |
| "num_input_tokens_seen": 5561647104, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 0.46870699524879456, |
| "learning_rate": 1.5026202256702909e-05, |
| "loss": 1.598, |
| "num_input_tokens_seen": 5570035712, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.3325, |
| "grad_norm": 0.37529900670051575, |
| "learning_rate": 1.4950168459696841e-05, |
| "loss": 1.6456, |
| "num_input_tokens_seen": 5578424320, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.333, |
| "grad_norm": 0.4923308789730072, |
| "learning_rate": 1.4874212537340067e-05, |
| "loss": 1.4711, |
| "num_input_tokens_seen": 5586812928, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3335, |
| "grad_norm": 0.3929249048233032, |
| "learning_rate": 1.4798335660966869e-05, |
| "loss": 1.5761, |
| "num_input_tokens_seen": 5595201536, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 0.4999372661113739, |
| "learning_rate": 1.4722539000692548e-05, |
| "loss": 1.5069, |
| "num_input_tokens_seen": 5603590144, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3345, |
| "grad_norm": 0.44993501901626587, |
| "learning_rate": 1.4646823725395351e-05, |
| "loss": 1.6068, |
| "num_input_tokens_seen": 5611978752, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.335, |
| "grad_norm": 0.5363733172416687, |
| "learning_rate": 1.4571191002698517e-05, |
| "loss": 1.3344, |
| "num_input_tokens_seen": 5620367360, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3355, |
| "grad_norm": 0.5620055198669434, |
| "learning_rate": 1.4495641998952172e-05, |
| "loss": 1.5378, |
| "num_input_tokens_seen": 5628755968, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.4853207468986511, |
| "learning_rate": 1.4420177879215419e-05, |
| "loss": 1.5294, |
| "num_input_tokens_seen": 5637144576, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3365, |
| "grad_norm": 0.4831787645816803, |
| "learning_rate": 1.434479980723833e-05, |
| "loss": 1.5064, |
| "num_input_tokens_seen": 5645533184, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.337, |
| "grad_norm": 0.6178323030471802, |
| "learning_rate": 1.4269508945444033e-05, |
| "loss": 1.5201, |
| "num_input_tokens_seen": 5653921792, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3375, |
| "grad_norm": 0.4733405113220215, |
| "learning_rate": 1.4194306454910757e-05, |
| "loss": 1.6281, |
| "num_input_tokens_seen": 5662310400, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 0.5506306886672974, |
| "learning_rate": 1.4119193495353925e-05, |
| "loss": 1.3539, |
| "num_input_tokens_seen": 5670699008, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.3385, |
| "grad_norm": 0.5859827995300293, |
| "learning_rate": 1.40441712251083e-05, |
| "loss": 1.6797, |
| "num_input_tokens_seen": 5679087616, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.339, |
| "grad_norm": 0.5389025211334229, |
| "learning_rate": 1.3969240801110088e-05, |
| "loss": 1.5045, |
| "num_input_tokens_seen": 5687476224, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3395, |
| "grad_norm": 0.5681171417236328, |
| "learning_rate": 1.3894403378879132e-05, |
| "loss": 1.56, |
| "num_input_tokens_seen": 5695864832, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.5575128197669983, |
| "learning_rate": 1.3819660112501054e-05, |
| "loss": 1.4456, |
| "num_input_tokens_seen": 5704253440, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3405, |
| "grad_norm": 0.4929724335670471, |
| "learning_rate": 1.3745012154609492e-05, |
| "loss": 1.4229, |
| "num_input_tokens_seen": 5712642048, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.341, |
| "grad_norm": 0.44603395462036133, |
| "learning_rate": 1.3670460656368278e-05, |
| "loss": 1.6888, |
| "num_input_tokens_seen": 5721030656, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.3415, |
| "grad_norm": 0.4557688236236572, |
| "learning_rate": 1.3596006767453766e-05, |
| "loss": 1.6441, |
| "num_input_tokens_seen": 5729419264, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 0.42381447553634644, |
| "learning_rate": 1.3521651636037017e-05, |
| "loss": 1.7471, |
| "num_input_tokens_seen": 5737807872, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3425, |
| "grad_norm": 0.4877522587776184, |
| "learning_rate": 1.3447396408766134e-05, |
| "loss": 1.6108, |
| "num_input_tokens_seen": 5746196480, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.343, |
| "grad_norm": 0.5389087796211243, |
| "learning_rate": 1.3373242230748579e-05, |
| "loss": 1.4052, |
| "num_input_tokens_seen": 5754585088, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.3435, |
| "grad_norm": 0.5732712745666504, |
| "learning_rate": 1.3299190245533522e-05, |
| "loss": 1.6114, |
| "num_input_tokens_seen": 5762973696, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.37976858019828796, |
| "learning_rate": 1.3225241595094173e-05, |
| "loss": 1.6381, |
| "num_input_tokens_seen": 5771362304, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3445, |
| "grad_norm": 0.6109358668327332, |
| "learning_rate": 1.3151397419810207e-05, |
| "loss": 1.4704, |
| "num_input_tokens_seen": 5779750912, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.345, |
| "grad_norm": 0.3784377872943878, |
| "learning_rate": 1.3077658858450137e-05, |
| "loss": 1.7119, |
| "num_input_tokens_seen": 5788139520, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3455, |
| "grad_norm": 0.534516453742981, |
| "learning_rate": 1.3004027048153826e-05, |
| "loss": 1.4831, |
| "num_input_tokens_seen": 5796528128, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 0.44002264738082886, |
| "learning_rate": 1.2930503124414862e-05, |
| "loss": 1.6671, |
| "num_input_tokens_seen": 5804916736, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3465, |
| "grad_norm": 0.47504231333732605, |
| "learning_rate": 1.2857088221063099e-05, |
| "loss": 1.7426, |
| "num_input_tokens_seen": 5813305344, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.347, |
| "grad_norm": 0.4503721296787262, |
| "learning_rate": 1.2783783470247164e-05, |
| "loss": 1.7178, |
| "num_input_tokens_seen": 5821693952, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.3475, |
| "grad_norm": 0.38480740785598755, |
| "learning_rate": 1.2710590002417008e-05, |
| "loss": 1.5611, |
| "num_input_tokens_seen": 5830082560, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 0.4555624723434448, |
| "learning_rate": 1.2637508946306443e-05, |
| "loss": 1.5239, |
| "num_input_tokens_seen": 5838471168, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.3485, |
| "grad_norm": 0.38544774055480957, |
| "learning_rate": 1.2564541428915762e-05, |
| "loss": 1.5648, |
| "num_input_tokens_seen": 5846859776, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.349, |
| "grad_norm": 0.4004034399986267, |
| "learning_rate": 1.2491688575494337e-05, |
| "loss": 1.8762, |
| "num_input_tokens_seen": 5855248384, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3495, |
| "grad_norm": 0.4557759761810303, |
| "learning_rate": 1.2418951509523312e-05, |
| "loss": 1.7131, |
| "num_input_tokens_seen": 5863636992, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.4302028715610504, |
| "learning_rate": 1.2346331352698206e-05, |
| "loss": 1.4877, |
| "num_input_tokens_seen": 5872025600, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3505, |
| "grad_norm": 0.4745676517486572, |
| "learning_rate": 1.2273829224911685e-05, |
| "loss": 1.5291, |
| "num_input_tokens_seen": 5880414208, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.351, |
| "grad_norm": 0.4593994617462158, |
| "learning_rate": 1.2201446244236242e-05, |
| "loss": 1.496, |
| "num_input_tokens_seen": 5888802816, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.3515, |
| "grad_norm": 0.4248128831386566, |
| "learning_rate": 1.2129183526906971e-05, |
| "loss": 1.5912, |
| "num_input_tokens_seen": 5897191424, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.4090263545513153, |
| "learning_rate": 1.205704218730439e-05, |
| "loss": 1.6625, |
| "num_input_tokens_seen": 5905580032, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3525, |
| "grad_norm": 0.4674663841724396, |
| "learning_rate": 1.1985023337937185e-05, |
| "loss": 1.6483, |
| "num_input_tokens_seen": 5913968640, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.353, |
| "grad_norm": 0.41158390045166016, |
| "learning_rate": 1.1913128089425103e-05, |
| "loss": 1.4624, |
| "num_input_tokens_seen": 5922357248, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.3535, |
| "grad_norm": 0.37938833236694336, |
| "learning_rate": 1.1841357550481817e-05, |
| "loss": 1.5231, |
| "num_input_tokens_seen": 5930745856, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 0.5211839079856873, |
| "learning_rate": 1.1769712827897825e-05, |
| "loss": 1.6377, |
| "num_input_tokens_seen": 5939134464, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.3545, |
| "grad_norm": 0.4222390055656433, |
| "learning_rate": 1.1698195026523379e-05, |
| "loss": 1.5385, |
| "num_input_tokens_seen": 5947523072, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.355, |
| "grad_norm": 0.5263103246688843, |
| "learning_rate": 1.1626805249251444e-05, |
| "loss": 1.432, |
| "num_input_tokens_seen": 5955911680, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3555, |
| "grad_norm": 0.40875500440597534, |
| "learning_rate": 1.1555544597000693e-05, |
| "loss": 1.5782, |
| "num_input_tokens_seen": 5964300288, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 0.4516502618789673, |
| "learning_rate": 1.1484414168698547e-05, |
| "loss": 1.7217, |
| "num_input_tokens_seen": 5972688896, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.3565, |
| "grad_norm": 0.4005357623100281, |
| "learning_rate": 1.1413415061264205e-05, |
| "loss": 1.5356, |
| "num_input_tokens_seen": 5981077504, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.357, |
| "grad_norm": 0.4757128059864044, |
| "learning_rate": 1.134254836959173e-05, |
| "loss": 1.5754, |
| "num_input_tokens_seen": 5989466112, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3575, |
| "grad_norm": 0.3795611560344696, |
| "learning_rate": 1.1271815186533156e-05, |
| "loss": 1.5715, |
| "num_input_tokens_seen": 5997854720, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 0.48202642798423767, |
| "learning_rate": 1.1201216602881696e-05, |
| "loss": 1.474, |
| "num_input_tokens_seen": 6006243328, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3585, |
| "grad_norm": 0.4397919178009033, |
| "learning_rate": 1.1130753707354836e-05, |
| "loss": 1.4755, |
| "num_input_tokens_seen": 6014631936, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.359, |
| "grad_norm": 0.4739425778388977, |
| "learning_rate": 1.106042758657758e-05, |
| "loss": 1.5371, |
| "num_input_tokens_seen": 6023020544, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.3595, |
| "grad_norm": 0.477103590965271, |
| "learning_rate": 1.0990239325065714e-05, |
| "loss": 1.6359, |
| "num_input_tokens_seen": 6031409152, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.40918999910354614, |
| "learning_rate": 1.0920190005209066e-05, |
| "loss": 1.5021, |
| "num_input_tokens_seen": 6039797760, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3605, |
| "grad_norm": 0.5159934163093567, |
| "learning_rate": 1.085028070725479e-05, |
| "loss": 1.6418, |
| "num_input_tokens_seen": 6048186368, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.361, |
| "grad_norm": 0.7591975331306458, |
| "learning_rate": 1.0780512509290758e-05, |
| "loss": 1.7691, |
| "num_input_tokens_seen": 6056574976, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.3615, |
| "grad_norm": 0.5116597414016724, |
| "learning_rate": 1.0710886487228868e-05, |
| "loss": 1.6482, |
| "num_input_tokens_seen": 6064963584, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 0.41808661818504333, |
| "learning_rate": 1.0641403714788537e-05, |
| "loss": 1.4123, |
| "num_input_tokens_seen": 6073352192, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.3625, |
| "grad_norm": 0.5217900276184082, |
| "learning_rate": 1.0572065263480046e-05, |
| "loss": 1.5236, |
| "num_input_tokens_seen": 6081740800, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.363, |
| "grad_norm": 0.4180975556373596, |
| "learning_rate": 1.0502872202588113e-05, |
| "loss": 1.6335, |
| "num_input_tokens_seen": 6090129408, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3635, |
| "grad_norm": 0.513573944568634, |
| "learning_rate": 1.043382559915532e-05, |
| "loss": 1.5707, |
| "num_input_tokens_seen": 6098518016, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 0.4156613051891327, |
| "learning_rate": 1.0364926517965693e-05, |
| "loss": 1.5941, |
| "num_input_tokens_seen": 6106906624, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.3645, |
| "grad_norm": 0.43025484681129456, |
| "learning_rate": 1.0296176021528326e-05, |
| "loss": 1.6518, |
| "num_input_tokens_seen": 6115295232, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.365, |
| "grad_norm": 0.4618057906627655, |
| "learning_rate": 1.0227575170060909e-05, |
| "loss": 1.4235, |
| "num_input_tokens_seen": 6123683840, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3655, |
| "grad_norm": 0.3291275203227997, |
| "learning_rate": 1.0159125021473421e-05, |
| "loss": 1.6211, |
| "num_input_tokens_seen": 6132072448, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 0.41411274671554565, |
| "learning_rate": 1.009082663135185e-05, |
| "loss": 1.5563, |
| "num_input_tokens_seen": 6140461056, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.3665, |
| "grad_norm": 0.3771957457065582, |
| "learning_rate": 1.0022681052941856e-05, |
| "loss": 1.6889, |
| "num_input_tokens_seen": 6148849664, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.367, |
| "grad_norm": 0.4875394105911255, |
| "learning_rate": 9.95468933713255e-06, |
| "loss": 1.3924, |
| "num_input_tokens_seen": 6157238272, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3675, |
| "grad_norm": 0.421825647354126, |
| "learning_rate": 9.886852532440312e-06, |
| "loss": 1.8188, |
| "num_input_tokens_seen": 6165626880, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.4703611433506012, |
| "learning_rate": 9.819171684992575e-06, |
| "loss": 1.6558, |
| "num_input_tokens_seen": 6174015488, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.3685, |
| "grad_norm": 0.49299588799476624, |
| "learning_rate": 9.751647838511747e-06, |
| "loss": 1.6531, |
| "num_input_tokens_seen": 6182404096, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.369, |
| "grad_norm": 0.4184141457080841, |
| "learning_rate": 9.684282034299053e-06, |
| "loss": 1.4939, |
| "num_input_tokens_seen": 6190792704, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3695, |
| "grad_norm": 0.4581114947795868, |
| "learning_rate": 9.61707531121855e-06, |
| "loss": 1.4902, |
| "num_input_tokens_seen": 6199181312, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.37457022070884705, |
| "learning_rate": 9.550028705681024e-06, |
| "loss": 1.5622, |
| "num_input_tokens_seen": 6207569920, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3705, |
| "grad_norm": 0.4025091826915741, |
| "learning_rate": 9.483143251628088e-06, |
| "loss": 1.6402, |
| "num_input_tokens_seen": 6215958528, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.371, |
| "grad_norm": 0.4053475856781006, |
| "learning_rate": 9.416419980516192e-06, |
| "loss": 1.6449, |
| "num_input_tokens_seen": 6224347136, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3715, |
| "grad_norm": 0.3622041940689087, |
| "learning_rate": 9.349859921300704e-06, |
| "loss": 1.426, |
| "num_input_tokens_seen": 6232735744, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 0.4312250316143036, |
| "learning_rate": 9.283464100420064e-06, |
| "loss": 1.6787, |
| "num_input_tokens_seen": 6241124352, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.3725, |
| "grad_norm": 0.39573124051094055, |
| "learning_rate": 9.217233541779995e-06, |
| "loss": 1.4883, |
| "num_input_tokens_seen": 6249512960, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.373, |
| "grad_norm": 0.4040946960449219, |
| "learning_rate": 9.15116926673763e-06, |
| "loss": 1.6978, |
| "num_input_tokens_seen": 6257901568, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3735, |
| "grad_norm": 0.412345826625824, |
| "learning_rate": 9.085272294085803e-06, |
| "loss": 1.6549, |
| "num_input_tokens_seen": 6266290176, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 0.4017808139324188, |
| "learning_rate": 9.019543640037363e-06, |
| "loss": 1.4813, |
| "num_input_tokens_seen": 6274678784, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.3745, |
| "grad_norm": 0.40174025297164917, |
| "learning_rate": 8.95398431820947e-06, |
| "loss": 1.5915, |
| "num_input_tokens_seen": 6283067392, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.37170112133026123, |
| "learning_rate": 8.888595339607961e-06, |
| "loss": 1.6085, |
| "num_input_tokens_seen": 6291456000, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3755, |
| "grad_norm": 0.3888947069644928, |
| "learning_rate": 8.82337771261177e-06, |
| "loss": 1.4901, |
| "num_input_tokens_seen": 6299844608, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.39092621207237244, |
| "learning_rate": 8.758332442957394e-06, |
| "loss": 1.4806, |
| "num_input_tokens_seen": 6308233216, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.3765, |
| "grad_norm": 0.4453209638595581, |
| "learning_rate": 8.693460533723346e-06, |
| "loss": 1.6993, |
| "num_input_tokens_seen": 6316621824, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.377, |
| "grad_norm": 0.3836767077445984, |
| "learning_rate": 8.62876298531472e-06, |
| "loss": 1.6511, |
| "num_input_tokens_seen": 6325010432, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.3775, |
| "grad_norm": 0.3998052775859833, |
| "learning_rate": 8.564240795447758e-06, |
| "loss": 1.5698, |
| "num_input_tokens_seen": 6333399040, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 0.3738134503364563, |
| "learning_rate": 8.499894959134436e-06, |
| "loss": 1.6333, |
| "num_input_tokens_seen": 6341787648, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.3785, |
| "grad_norm": 0.36466184258461, |
| "learning_rate": 8.435726468667135e-06, |
| "loss": 1.5178, |
| "num_input_tokens_seen": 6350176256, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.379, |
| "grad_norm": 0.4085821807384491, |
| "learning_rate": 8.37173631360339e-06, |
| "loss": 1.6823, |
| "num_input_tokens_seen": 6358564864, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3795, |
| "grad_norm": 0.3954522907733917, |
| "learning_rate": 8.307925480750535e-06, |
| "loss": 1.4361, |
| "num_input_tokens_seen": 6366953472, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.4430011808872223, |
| "learning_rate": 8.24429495415054e-06, |
| "loss": 1.6206, |
| "num_input_tokens_seen": 6375342080, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.3805, |
| "grad_norm": 0.4140196740627289, |
| "learning_rate": 8.180845715064851e-06, |
| "loss": 1.4822, |
| "num_input_tokens_seen": 6383730688, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.381, |
| "grad_norm": 0.3814021944999695, |
| "learning_rate": 8.117578741959232e-06, |
| "loss": 1.584, |
| "num_input_tokens_seen": 6392119296, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3815, |
| "grad_norm": 0.3553796410560608, |
| "learning_rate": 8.054495010488658e-06, |
| "loss": 1.556, |
| "num_input_tokens_seen": 6400507904, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 0.38275453448295593, |
| "learning_rate": 7.991595493482323e-06, |
| "loss": 1.4992, |
| "num_input_tokens_seen": 6408896512, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.3825, |
| "grad_norm": 0.36514538526535034, |
| "learning_rate": 7.928881160928572e-06, |
| "loss": 1.5722, |
| "num_input_tokens_seen": 6417285120, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.383, |
| "grad_norm": 0.4606564939022064, |
| "learning_rate": 7.86635297996001e-06, |
| "loss": 1.5389, |
| "num_input_tokens_seen": 6425673728, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3835, |
| "grad_norm": 0.40744417905807495, |
| "learning_rate": 7.804011914838524e-06, |
| "loss": 1.4212, |
| "num_input_tokens_seen": 6434062336, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.42578741908073425, |
| "learning_rate": 7.741858926940475e-06, |
| "loss": 1.5838, |
| "num_input_tokens_seen": 6442450944, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.3845, |
| "grad_norm": 0.45914557576179504, |
| "learning_rate": 7.679894974741807e-06, |
| "loss": 1.633, |
| "num_input_tokens_seen": 6450839552, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.385, |
| "grad_norm": 0.40932121872901917, |
| "learning_rate": 7.618121013803319e-06, |
| "loss": 1.422, |
| "num_input_tokens_seen": 6459228160, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3855, |
| "grad_norm": 0.35300320386886597, |
| "learning_rate": 7.556537996755919e-06, |
| "loss": 1.5215, |
| "num_input_tokens_seen": 6467616768, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 0.3876708149909973, |
| "learning_rate": 7.495146873285904e-06, |
| "loss": 1.6108, |
| "num_input_tokens_seen": 6476005376, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.3865, |
| "grad_norm": 0.3526962697505951, |
| "learning_rate": 7.433948590120326e-06, |
| "loss": 1.5735, |
| "num_input_tokens_seen": 6484393984, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.387, |
| "grad_norm": 0.377326101064682, |
| "learning_rate": 7.3729440910124464e-06, |
| "loss": 1.5143, |
| "num_input_tokens_seen": 6492782592, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3875, |
| "grad_norm": 0.3809109628200531, |
| "learning_rate": 7.312134316727093e-06, |
| "loss": 1.5331, |
| "num_input_tokens_seen": 6501171200, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 0.41740885376930237, |
| "learning_rate": 7.251520205026206e-06, |
| "loss": 1.7121, |
| "num_input_tokens_seen": 6509559808, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3885, |
| "grad_norm": 0.375685453414917, |
| "learning_rate": 7.191102690654384e-06, |
| "loss": 1.4729, |
| "num_input_tokens_seen": 6517948416, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.389, |
| "grad_norm": 0.3461897671222687, |
| "learning_rate": 7.130882705324422e-06, |
| "loss": 1.4276, |
| "num_input_tokens_seen": 6526337024, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3895, |
| "grad_norm": 0.3789466619491577, |
| "learning_rate": 7.070861177703006e-06, |
| "loss": 1.5973, |
| "num_input_tokens_seen": 6534725632, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.40518102049827576, |
| "learning_rate": 7.01103903339633e-06, |
| "loss": 1.4118, |
| "num_input_tokens_seen": 6543114240, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3905, |
| "grad_norm": 0.3697455823421478, |
| "learning_rate": 6.95141719493587e-06, |
| "loss": 1.6321, |
| "num_input_tokens_seen": 6551502848, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.391, |
| "grad_norm": 0.39922866225242615, |
| "learning_rate": 6.891996581764124e-06, |
| "loss": 1.5606, |
| "num_input_tokens_seen": 6559891456, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3915, |
| "grad_norm": 0.3575364947319031, |
| "learning_rate": 6.832778110220457e-06, |
| "loss": 1.5569, |
| "num_input_tokens_seen": 6568280064, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.36905914545059204, |
| "learning_rate": 6.773762693526967e-06, |
| "loss": 1.5744, |
| "num_input_tokens_seen": 6576668672, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.3925, |
| "grad_norm": 0.5625297427177429, |
| "learning_rate": 6.7149512417743725e-06, |
| "loss": 1.3769, |
| "num_input_tokens_seen": 6585057280, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.393, |
| "grad_norm": 0.3726418614387512, |
| "learning_rate": 6.656344661908003e-06, |
| "loss": 1.6744, |
| "num_input_tokens_seen": 6593445888, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3935, |
| "grad_norm": 0.39471179246902466, |
| "learning_rate": 6.597943857713849e-06, |
| "loss": 1.5823, |
| "num_input_tokens_seen": 6601834496, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 0.4886147975921631, |
| "learning_rate": 6.539749729804539e-06, |
| "loss": 1.4887, |
| "num_input_tokens_seen": 6610223104, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.3945, |
| "grad_norm": 0.3547024130821228, |
| "learning_rate": 6.4817631756055086e-06, |
| "loss": 1.569, |
| "num_input_tokens_seen": 6618611712, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.395, |
| "grad_norm": 0.4050310552120209, |
| "learning_rate": 6.423985089341165e-06, |
| "loss": 1.5851, |
| "num_input_tokens_seen": 6627000320, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3955, |
| "grad_norm": 0.36963605880737305, |
| "learning_rate": 6.366416362021077e-06, |
| "loss": 1.4311, |
| "num_input_tokens_seen": 6635388928, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 0.3320269286632538, |
| "learning_rate": 6.3090578814262256e-06, |
| "loss": 1.6496, |
| "num_input_tokens_seen": 6643777536, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3965, |
| "grad_norm": 0.4373522996902466, |
| "learning_rate": 6.251910532095349e-06, |
| "loss": 1.464, |
| "num_input_tokens_seen": 6652166144, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.397, |
| "grad_norm": 0.3273358941078186, |
| "learning_rate": 6.1949751953112565e-06, |
| "loss": 1.5889, |
| "num_input_tokens_seen": 6660554752, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3975, |
| "grad_norm": 0.361979216337204, |
| "learning_rate": 6.138252749087286e-06, |
| "loss": 1.5708, |
| "num_input_tokens_seen": 6668943360, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 0.3504394292831421, |
| "learning_rate": 6.081744068153714e-06, |
| "loss": 1.7071, |
| "num_input_tokens_seen": 6677331968, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.3985, |
| "grad_norm": 0.35012826323509216, |
| "learning_rate": 6.02545002394432e-06, |
| "loss": 1.5771, |
| "num_input_tokens_seen": 6685720576, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.399, |
| "grad_norm": 0.34323230385780334, |
| "learning_rate": 5.969371484582887e-06, |
| "loss": 1.5181, |
| "num_input_tokens_seen": 6694109184, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3995, |
| "grad_norm": 0.3410869836807251, |
| "learning_rate": 5.913509314869874e-06, |
| "loss": 1.5847, |
| "num_input_tokens_seen": 6702497792, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.3655712604522705, |
| "learning_rate": 5.857864376269051e-06, |
| "loss": 1.5978, |
| "num_input_tokens_seen": 6710886400, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4005, |
| "grad_norm": 0.3299401104450226, |
| "learning_rate": 5.802437526894198e-06, |
| "loss": 1.5185, |
| "num_input_tokens_seen": 6719275008, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.401, |
| "grad_norm": 0.33015549182891846, |
| "learning_rate": 5.747229621495893e-06, |
| "loss": 1.4544, |
| "num_input_tokens_seen": 6727663616, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.4015, |
| "grad_norm": 0.28621819615364075, |
| "learning_rate": 5.692241511448342e-06, |
| "loss": 1.6958, |
| "num_input_tokens_seen": 6736052224, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 0.3436781167984009, |
| "learning_rate": 5.637474044736227e-06, |
| "loss": 1.4529, |
| "num_input_tokens_seen": 6744440832, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.4025, |
| "grad_norm": 0.3146877884864807, |
| "learning_rate": 5.582928065941624e-06, |
| "loss": 1.6761, |
| "num_input_tokens_seen": 6752829440, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.403, |
| "grad_norm": 0.3468015789985657, |
| "learning_rate": 5.528604416231016e-06, |
| "loss": 1.5827, |
| "num_input_tokens_seen": 6761218048, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.4035, |
| "grad_norm": 0.30543383955955505, |
| "learning_rate": 5.474503933342272e-06, |
| "loss": 1.6216, |
| "num_input_tokens_seen": 6769606656, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 0.33638039231300354, |
| "learning_rate": 5.4206274515717735e-06, |
| "loss": 1.7167, |
| "num_input_tokens_seen": 6777995264, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4045, |
| "grad_norm": 0.2884032428264618, |
| "learning_rate": 5.366975801761507e-06, |
| "loss": 1.4414, |
| "num_input_tokens_seen": 6786383872, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.405, |
| "grad_norm": 0.3318146765232086, |
| "learning_rate": 5.313549811286294e-06, |
| "loss": 1.712, |
| "num_input_tokens_seen": 6794772480, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4055, |
| "grad_norm": 0.3560062646865845, |
| "learning_rate": 5.260350304040987e-06, |
| "loss": 1.5902, |
| "num_input_tokens_seen": 6803161088, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 0.31840780377388, |
| "learning_rate": 5.207378100427804e-06, |
| "loss": 1.5372, |
| "num_input_tokens_seen": 6811549696, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.4065, |
| "grad_norm": 0.3286549150943756, |
| "learning_rate": 5.154634017343662e-06, |
| "loss": 1.6759, |
| "num_input_tokens_seen": 6819938304, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.407, |
| "grad_norm": 0.3250563144683838, |
| "learning_rate": 5.102118868167565e-06, |
| "loss": 1.5406, |
| "num_input_tokens_seen": 6828326912, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.4075, |
| "grad_norm": 0.3316640555858612, |
| "learning_rate": 5.049833462748061e-06, |
| "loss": 1.7991, |
| "num_input_tokens_seen": 6836715520, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.3580614924430847, |
| "learning_rate": 4.997778607390809e-06, |
| "loss": 1.6432, |
| "num_input_tokens_seen": 6845104128, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4085, |
| "grad_norm": 0.3245338499546051, |
| "learning_rate": 4.945955104846061e-06, |
| "loss": 1.6115, |
| "num_input_tokens_seen": 6853492736, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.409, |
| "grad_norm": 0.3292555510997772, |
| "learning_rate": 4.89436375429633e-06, |
| "loss": 1.3836, |
| "num_input_tokens_seen": 6861881344, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.4095, |
| "grad_norm": 0.33106541633605957, |
| "learning_rate": 4.843005351344065e-06, |
| "loss": 1.4818, |
| "num_input_tokens_seen": 6870269952, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.4454377293586731, |
| "learning_rate": 4.791880687999382e-06, |
| "loss": 1.4974, |
| "num_input_tokens_seen": 6878658560, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4105, |
| "grad_norm": 0.3861941993236542, |
| "learning_rate": 4.740990552667823e-06, |
| "loss": 1.5086, |
| "num_input_tokens_seen": 6887047168, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.411, |
| "grad_norm": 0.3597433865070343, |
| "learning_rate": 4.6903357301382405e-06, |
| "loss": 1.515, |
| "num_input_tokens_seen": 6895435776, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.4115, |
| "grad_norm": 0.35469549894332886, |
| "learning_rate": 4.639917001570644e-06, |
| "loss": 1.635, |
| "num_input_tokens_seen": 6903824384, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 0.33241182565689087, |
| "learning_rate": 4.589735144484217e-06, |
| "loss": 1.6323, |
| "num_input_tokens_seen": 6912212992, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.4125, |
| "grad_norm": 0.37007105350494385, |
| "learning_rate": 4.53979093274526e-06, |
| "loss": 1.6834, |
| "num_input_tokens_seen": 6920601600, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.413, |
| "grad_norm": 0.3709860146045685, |
| "learning_rate": 4.490085136555313e-06, |
| "loss": 1.4491, |
| "num_input_tokens_seen": 6928990208, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4135, |
| "grad_norm": 0.3294796645641327, |
| "learning_rate": 4.440618522439237e-06, |
| "loss": 1.4501, |
| "num_input_tokens_seen": 6937378816, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 0.3185144066810608, |
| "learning_rate": 4.391391853233404e-06, |
| "loss": 1.4515, |
| "num_input_tokens_seen": 6945767424, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.4145, |
| "grad_norm": 0.3276340961456299, |
| "learning_rate": 4.342405888073971e-06, |
| "loss": 1.6034, |
| "num_input_tokens_seen": 6954156032, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.415, |
| "grad_norm": 0.3218885362148285, |
| "learning_rate": 4.293661382385106e-06, |
| "loss": 1.4493, |
| "num_input_tokens_seen": 6962544640, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.4155, |
| "grad_norm": 0.318389356136322, |
| "learning_rate": 4.245159087867383e-06, |
| "loss": 1.7035, |
| "num_input_tokens_seen": 6970933248, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.36420193314552307, |
| "learning_rate": 4.196899752486192e-06, |
| "loss": 1.4633, |
| "num_input_tokens_seen": 6979321856, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.4165, |
| "grad_norm": 0.3152608275413513, |
| "learning_rate": 4.148884120460186e-06, |
| "loss": 1.4906, |
| "num_input_tokens_seen": 6987710464, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.417, |
| "grad_norm": 0.30179423093795776, |
| "learning_rate": 4.1011129322498e-06, |
| "loss": 1.6253, |
| "num_input_tokens_seen": 6996099072, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.4175, |
| "grad_norm": 0.28013914823532104, |
| "learning_rate": 4.05358692454586e-06, |
| "loss": 1.5175, |
| "num_input_tokens_seen": 7004487680, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 0.344892293214798, |
| "learning_rate": 4.006306830258189e-06, |
| "loss": 1.6806, |
| "num_input_tokens_seen": 7012876288, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.4185, |
| "grad_norm": 0.31076744198799133, |
| "learning_rate": 3.9592733785043405e-06, |
| "loss": 1.4867, |
| "num_input_tokens_seen": 7021264896, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.419, |
| "grad_norm": 0.30278122425079346, |
| "learning_rate": 3.91248729459831e-06, |
| "loss": 1.6043, |
| "num_input_tokens_seen": 7029653504, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.4195, |
| "grad_norm": 1.5924491882324219, |
| "learning_rate": 3.865949300039404e-06, |
| "loss": 1.6819, |
| "num_input_tokens_seen": 7038042112, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.3173651695251465, |
| "learning_rate": 3.819660112501053e-06, |
| "loss": 1.5288, |
| "num_input_tokens_seen": 7046430720, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.4205, |
| "grad_norm": 0.390505850315094, |
| "learning_rate": 3.773620445819799e-06, |
| "loss": 1.7401, |
| "num_input_tokens_seen": 7054819328, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.421, |
| "grad_norm": 0.3141814172267914, |
| "learning_rate": 3.727831009984262e-06, |
| "loss": 1.5442, |
| "num_input_tokens_seen": 7063207936, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.4215, |
| "grad_norm": 0.32728394865989685, |
| "learning_rate": 3.682292511124179e-06, |
| "loss": 1.524, |
| "num_input_tokens_seen": 7071596544, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 0.30296072363853455, |
| "learning_rate": 3.637005651499528e-06, |
| "loss": 1.5514, |
| "num_input_tokens_seen": 7079985152, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.4225, |
| "grad_norm": 0.3034377098083496, |
| "learning_rate": 3.5919711294897285e-06, |
| "loss": 1.6488, |
| "num_input_tokens_seen": 7088373760, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.423, |
| "grad_norm": 0.3028651773929596, |
| "learning_rate": 3.5471896395828064e-06, |
| "loss": 1.6122, |
| "num_input_tokens_seen": 7096762368, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.4235, |
| "grad_norm": 0.28566136956214905, |
| "learning_rate": 3.502661872364732e-06, |
| "loss": 1.6506, |
| "num_input_tokens_seen": 7105150976, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.32108139991760254, |
| "learning_rate": 3.4583885145087613e-06, |
| "loss": 1.4408, |
| "num_input_tokens_seen": 7113539584, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.4245, |
| "grad_norm": 0.33206671476364136, |
| "learning_rate": 3.414370248764849e-06, |
| "loss": 1.4943, |
| "num_input_tokens_seen": 7121928192, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 0.27473020553588867, |
| "learning_rate": 3.3706077539490933e-06, |
| "loss": 1.5191, |
| "num_input_tokens_seen": 7130316800, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4255, |
| "grad_norm": 0.291063517332077, |
| "learning_rate": 3.327101704933313e-06, |
| "loss": 1.341, |
| "num_input_tokens_seen": 7138705408, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 0.35267290472984314, |
| "learning_rate": 3.2838527726345994e-06, |
| "loss": 1.3756, |
| "num_input_tokens_seen": 7147094016, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.4265, |
| "grad_norm": 0.29957467317581177, |
| "learning_rate": 3.240861624004983e-06, |
| "loss": 1.7059, |
| "num_input_tokens_seen": 7155482624, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.427, |
| "grad_norm": 0.29212555289268494, |
| "learning_rate": 3.198128922021162e-06, |
| "loss": 1.4891, |
| "num_input_tokens_seen": 7163871232, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4275, |
| "grad_norm": 0.3003202974796295, |
| "learning_rate": 3.155655325674272e-06, |
| "loss": 1.5788, |
| "num_input_tokens_seen": 7172259840, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 0.29497838020324707, |
| "learning_rate": 3.1134414899597033e-06, |
| "loss": 1.6972, |
| "num_input_tokens_seen": 7180648448, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4285, |
| "grad_norm": 0.28923463821411133, |
| "learning_rate": 3.0714880658670165e-06, |
| "loss": 1.4985, |
| "num_input_tokens_seen": 7189037056, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.429, |
| "grad_norm": 0.31097179651260376, |
| "learning_rate": 3.0297957003699284e-06, |
| "loss": 1.5965, |
| "num_input_tokens_seen": 7197425664, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.4295, |
| "grad_norm": 0.2652358114719391, |
| "learning_rate": 2.9883650364162784e-06, |
| "loss": 1.4394, |
| "num_input_tokens_seen": 7205814272, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.29938805103302, |
| "learning_rate": 2.947196712918157e-06, |
| "loss": 1.6263, |
| "num_input_tokens_seen": 7214202880, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4305, |
| "grad_norm": 0.2985369861125946, |
| "learning_rate": 2.906291364742042e-06, |
| "loss": 1.5659, |
| "num_input_tokens_seen": 7222591488, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.431, |
| "grad_norm": 0.31858527660369873, |
| "learning_rate": 2.8656496226990092e-06, |
| "loss": 1.6757, |
| "num_input_tokens_seen": 7230980096, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.4315, |
| "grad_norm": 0.3064689636230469, |
| "learning_rate": 2.8252721135349892e-06, |
| "loss": 1.495, |
| "num_input_tokens_seen": 7239368704, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.2998564839363098, |
| "learning_rate": 2.7851594599211297e-06, |
| "loss": 1.3919, |
| "num_input_tokens_seen": 7247757312, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4325, |
| "grad_norm": 0.30185946822166443, |
| "learning_rate": 2.7453122804441636e-06, |
| "loss": 1.3706, |
| "num_input_tokens_seen": 7256145920, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.433, |
| "grad_norm": 0.313856303691864, |
| "learning_rate": 2.705731189596901e-06, |
| "loss": 1.6292, |
| "num_input_tokens_seen": 7264534528, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4335, |
| "grad_norm": 0.32189980149269104, |
| "learning_rate": 2.6664167977687182e-06, |
| "loss": 1.7559, |
| "num_input_tokens_seen": 7272923136, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 0.3159331977367401, |
| "learning_rate": 2.6273697112361786e-06, |
| "loss": 1.6213, |
| "num_input_tokens_seen": 7281311744, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.4345, |
| "grad_norm": 0.30264052748680115, |
| "learning_rate": 2.588590532153652e-06, |
| "loss": 1.5662, |
| "num_input_tokens_seen": 7289700352, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.435, |
| "grad_norm": 0.29510387778282166, |
| "learning_rate": 2.550079858544057e-06, |
| "loss": 1.5406, |
| "num_input_tokens_seen": 7298088960, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4355, |
| "grad_norm": 0.3535095155239105, |
| "learning_rate": 2.511838284289625e-06, |
| "loss": 1.5925, |
| "num_input_tokens_seen": 7306477568, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 0.3253929615020752, |
| "learning_rate": 2.473866399122733e-06, |
| "loss": 1.5054, |
| "num_input_tokens_seen": 7314866176, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.4365, |
| "grad_norm": 0.27736151218414307, |
| "learning_rate": 2.436164788616815e-06, |
| "loss": 1.6797, |
| "num_input_tokens_seen": 7323254784, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.437, |
| "grad_norm": 0.2774750590324402, |
| "learning_rate": 2.398734034177361e-06, |
| "loss": 1.3784, |
| "num_input_tokens_seen": 7331643392, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 1.0643863677978516, |
| "learning_rate": 2.3615747130329013e-06, |
| "loss": 1.3942, |
| "num_input_tokens_seen": 7340032000, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 0.2871946096420288, |
| "learning_rate": 2.324687398226131e-06, |
| "loss": 1.5828, |
| "num_input_tokens_seen": 7348420608, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.4385, |
| "grad_norm": 0.28776809573173523, |
| "learning_rate": 2.288072658605087e-06, |
| "loss": 1.4781, |
| "num_input_tokens_seen": 7356809216, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.439, |
| "grad_norm": 0.31147900223731995, |
| "learning_rate": 2.2517310588143372e-06, |
| "loss": 1.6066, |
| "num_input_tokens_seen": 7365197824, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.4395, |
| "grad_norm": 0.32211750745773315, |
| "learning_rate": 2.215663159286314e-06, |
| "loss": 1.6086, |
| "num_input_tokens_seen": 7373586432, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.360286682844162, |
| "learning_rate": 2.1798695162326444e-06, |
| "loss": 1.5362, |
| "num_input_tokens_seen": 7381975040, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4405, |
| "grad_norm": 0.2793222665786743, |
| "learning_rate": 2.144350681635585e-06, |
| "loss": 1.6441, |
| "num_input_tokens_seen": 7390363648, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.441, |
| "grad_norm": 0.30778202414512634, |
| "learning_rate": 2.1091072032395e-06, |
| "loss": 1.6035, |
| "num_input_tokens_seen": 7398752256, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.4415, |
| "grad_norm": 0.35812097787857056, |
| "learning_rate": 2.0741396245424263e-06, |
| "loss": 1.5975, |
| "num_input_tokens_seen": 7407140864, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 0.3686063289642334, |
| "learning_rate": 2.0394484847876894e-06, |
| "loss": 1.6201, |
| "num_input_tokens_seen": 7415529472, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.4425, |
| "grad_norm": 0.3779139220714569, |
| "learning_rate": 2.0050343189555743e-06, |
| "loss": 1.6497, |
| "num_input_tokens_seen": 7423918080, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.443, |
| "grad_norm": 0.298098623752594, |
| "learning_rate": 1.970897657755084e-06, |
| "loss": 1.4754, |
| "num_input_tokens_seen": 7432306688, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4435, |
| "grad_norm": 0.3022516071796417, |
| "learning_rate": 1.937039027615779e-06, |
| "loss": 1.4341, |
| "num_input_tokens_seen": 7440695296, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 0.30083125829696655, |
| "learning_rate": 1.903458950679613e-06, |
| "loss": 1.6386, |
| "num_input_tokens_seen": 7449083904, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.4445, |
| "grad_norm": 0.2998676598072052, |
| "learning_rate": 1.8701579447929076e-06, |
| "loss": 1.4833, |
| "num_input_tokens_seen": 7457472512, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.445, |
| "grad_norm": 0.28990113735198975, |
| "learning_rate": 1.837136523498373e-06, |
| "loss": 1.5555, |
| "num_input_tokens_seen": 7465861120, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4455, |
| "grad_norm": 0.2804090082645416, |
| "learning_rate": 1.80439519602718e-06, |
| "loss": 1.5589, |
| "num_input_tokens_seen": 7474249728, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 0.29142701625823975, |
| "learning_rate": 1.7719344672910942e-06, |
| "loss": 1.5012, |
| "num_input_tokens_seen": 7482638336, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4465, |
| "grad_norm": 0.2881058156490326, |
| "learning_rate": 1.7397548378747142e-06, |
| "loss": 1.6529, |
| "num_input_tokens_seen": 7491026944, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.447, |
| "grad_norm": 0.29974132776260376, |
| "learning_rate": 1.7078568040277276e-06, |
| "loss": 1.4558, |
| "num_input_tokens_seen": 7499415552, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4475, |
| "grad_norm": 0.24919338524341583, |
| "learning_rate": 1.676240857657283e-06, |
| "loss": 1.6168, |
| "num_input_tokens_seen": 7507804160, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.27764326333999634, |
| "learning_rate": 1.6449074863203773e-06, |
| "loss": 1.4641, |
| "num_input_tokens_seen": 7516192768, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.4485, |
| "grad_norm": 0.28568482398986816, |
| "learning_rate": 1.6138571732163643e-06, |
| "loss": 1.5211, |
| "num_input_tokens_seen": 7524581376, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.449, |
| "grad_norm": 0.7067427039146423, |
| "learning_rate": 1.5830903971794765e-06, |
| "loss": 1.6592, |
| "num_input_tokens_seen": 7532969984, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.4495, |
| "grad_norm": 0.2689734399318695, |
| "learning_rate": 1.5526076326714635e-06, |
| "loss": 1.4476, |
| "num_input_tokens_seen": 7541358592, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.27099910378456116, |
| "learning_rate": 1.5224093497742654e-06, |
| "loss": 1.4756, |
| "num_input_tokens_seen": 7549747200, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4505, |
| "grad_norm": 0.2850476801395416, |
| "learning_rate": 1.4924960141827605e-06, |
| "loss": 1.5165, |
| "num_input_tokens_seen": 7558135808, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.451, |
| "grad_norm": 0.268863707780838, |
| "learning_rate": 1.4628680871975842e-06, |
| "loss": 1.5588, |
| "num_input_tokens_seen": 7566524416, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.4515, |
| "grad_norm": 0.26697346568107605, |
| "learning_rate": 1.4335260257180262e-06, |
| "loss": 1.596, |
| "num_input_tokens_seen": 7574913024, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 0.28044283390045166, |
| "learning_rate": 1.4044702822349731e-06, |
| "loss": 1.4721, |
| "num_input_tokens_seen": 7583301632, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.4525, |
| "grad_norm": 0.27265068888664246, |
| "learning_rate": 1.3757013048239287e-06, |
| "loss": 1.7483, |
| "num_input_tokens_seen": 7591690240, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.453, |
| "grad_norm": 0.27618253231048584, |
| "learning_rate": 1.3472195371381202e-06, |
| "loss": 1.4239, |
| "num_input_tokens_seen": 7600078848, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.4535, |
| "grad_norm": 0.27610379457473755, |
| "learning_rate": 1.3190254184016294e-06, |
| "loss": 1.4987, |
| "num_input_tokens_seen": 7608467456, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 0.2572946548461914, |
| "learning_rate": 1.2911193834026548e-06, |
| "loss": 1.4957, |
| "num_input_tokens_seen": 7616856064, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.4545, |
| "grad_norm": 0.2584414482116699, |
| "learning_rate": 1.2635018624867712e-06, |
| "loss": 1.6271, |
| "num_input_tokens_seen": 7625244672, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.455, |
| "grad_norm": 0.26803648471832275, |
| "learning_rate": 1.236173281550319e-06, |
| "loss": 1.6387, |
| "num_input_tokens_seen": 7633633280, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4555, |
| "grad_norm": 0.25464460253715515, |
| "learning_rate": 1.209134062033821e-06, |
| "loss": 1.5111, |
| "num_input_tokens_seen": 7642021888, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.2775379717350006, |
| "learning_rate": 1.182384620915491e-06, |
| "loss": 1.7476, |
| "num_input_tokens_seen": 7650410496, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4565, |
| "grad_norm": 0.27452683448791504, |
| "learning_rate": 1.1559253707048046e-06, |
| "loss": 1.5443, |
| "num_input_tokens_seen": 7658799104, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.457, |
| "grad_norm": 0.2475164234638214, |
| "learning_rate": 1.1297567194361303e-06, |
| "loss": 1.6505, |
| "num_input_tokens_seen": 7667187712, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.4575, |
| "grad_norm": 0.28584203124046326, |
| "learning_rate": 1.103879070662439e-06, |
| "loss": 1.5918, |
| "num_input_tokens_seen": 7675576320, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 0.2768670618534088, |
| "learning_rate": 1.0782928234490941e-06, |
| "loss": 1.482, |
| "num_input_tokens_seen": 7683964928, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4585, |
| "grad_norm": 0.27717721462249756, |
| "learning_rate": 1.0529983723676751e-06, |
| "loss": 1.6142, |
| "num_input_tokens_seen": 7692353536, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.459, |
| "grad_norm": 0.2514759302139282, |
| "learning_rate": 1.027996107489908e-06, |
| "loss": 1.5034, |
| "num_input_tokens_seen": 7700742144, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4595, |
| "grad_norm": 0.2566506862640381, |
| "learning_rate": 1.0032864143816456e-06, |
| "loss": 1.5485, |
| "num_input_tokens_seen": 7709130752, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.275288462638855, |
| "learning_rate": 9.788696740969295e-07, |
| "loss": 1.5363, |
| "num_input_tokens_seen": 7717519360, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.4605, |
| "grad_norm": 0.26422953605651855, |
| "learning_rate": 9.547462631720906e-07, |
| "loss": 1.7154, |
| "num_input_tokens_seen": 7725907968, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.461, |
| "grad_norm": 0.2841811180114746, |
| "learning_rate": 9.30916553619976e-07, |
| "loss": 1.6462, |
| "num_input_tokens_seen": 7734296576, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.4615, |
| "grad_norm": 0.2585889995098114, |
| "learning_rate": 9.073809129241784e-07, |
| "loss": 1.5431, |
| "num_input_tokens_seen": 7742685184, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 0.25092291831970215, |
| "learning_rate": 8.841397040333976e-07, |
| "loss": 1.5584, |
| "num_input_tokens_seen": 7751073792, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4625, |
| "grad_norm": 0.2754204273223877, |
| "learning_rate": 8.611932853558236e-07, |
| "loss": 1.5088, |
| "num_input_tokens_seen": 7759462400, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.463, |
| "grad_norm": 0.29892218112945557, |
| "learning_rate": 8.38542010753618e-07, |
| "loss": 1.5045, |
| "num_input_tokens_seen": 7767851008, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.4635, |
| "grad_norm": 0.2517067492008209, |
| "learning_rate": 8.161862295374567e-07, |
| "loss": 1.4251, |
| "num_input_tokens_seen": 7776239616, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.28415587544441223, |
| "learning_rate": 7.941262864611387e-07, |
| "loss": 1.5208, |
| "num_input_tokens_seen": 7784628224, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4645, |
| "grad_norm": 0.2550823986530304, |
| "learning_rate": 7.723625217162811e-07, |
| "loss": 1.5787, |
| "num_input_tokens_seen": 7793016832, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.465, |
| "grad_norm": 0.24390766024589539, |
| "learning_rate": 7.508952709270567e-07, |
| "loss": 1.6071, |
| "num_input_tokens_seen": 7801405440, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4655, |
| "grad_norm": 0.2771441638469696, |
| "learning_rate": 7.29724865145025e-07, |
| "loss": 1.6024, |
| "num_input_tokens_seen": 7809794048, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 0.3028735816478729, |
| "learning_rate": 7.088516308440386e-07, |
| "loss": 1.6315, |
| "num_input_tokens_seen": 7818182656, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4665, |
| "grad_norm": 0.273967981338501, |
| "learning_rate": 6.882758899151886e-07, |
| "loss": 1.5286, |
| "num_input_tokens_seen": 7826571264, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.467, |
| "grad_norm": 0.2625696361064911, |
| "learning_rate": 6.679979596618546e-07, |
| "loss": 1.7165, |
| "num_input_tokens_seen": 7834959872, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.4675, |
| "grad_norm": 0.7894181609153748, |
| "learning_rate": 6.480181527948049e-07, |
| "loss": 1.4989, |
| "num_input_tokens_seen": 7843348480, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 0.27667322754859924, |
| "learning_rate": 6.283367774273785e-07, |
| "loss": 1.6919, |
| "num_input_tokens_seen": 7851737088, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.4685, |
| "grad_norm": 0.2543598711490631, |
| "learning_rate": 6.089541370707297e-07, |
| "loss": 1.6136, |
| "num_input_tokens_seen": 7860125696, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.469, |
| "grad_norm": 0.2823677361011505, |
| "learning_rate": 5.898705306291508e-07, |
| "loss": 1.7088, |
| "num_input_tokens_seen": 7868514304, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.4695, |
| "grad_norm": 0.2542930543422699, |
| "learning_rate": 5.71086252395463e-07, |
| "loss": 1.728, |
| "num_input_tokens_seen": 7876902912, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.2800678014755249, |
| "learning_rate": 5.526015920464689e-07, |
| "loss": 1.5442, |
| "num_input_tokens_seen": 7885291520, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4705, |
| "grad_norm": 0.28233593702316284, |
| "learning_rate": 5.344168346385003e-07, |
| "loss": 1.5762, |
| "num_input_tokens_seen": 7893680128, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.471, |
| "grad_norm": 0.2772792875766754, |
| "learning_rate": 5.165322606030132e-07, |
| "loss": 1.498, |
| "num_input_tokens_seen": 7902068736, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.4715, |
| "grad_norm": 0.258087694644928, |
| "learning_rate": 4.98948145742264e-07, |
| "loss": 1.6592, |
| "num_input_tokens_seen": 7910457344, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.2860059142112732, |
| "learning_rate": 4.816647612250513e-07, |
| "loss": 1.5144, |
| "num_input_tokens_seen": 7918845952, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.4725, |
| "grad_norm": 0.24073940515518188, |
| "learning_rate": 4.646823735825523e-07, |
| "loss": 1.6956, |
| "num_input_tokens_seen": 7927234560, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.473, |
| "grad_norm": 0.25412750244140625, |
| "learning_rate": 4.4800124470418815e-07, |
| "loss": 1.5523, |
| "num_input_tokens_seen": 7935623168, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.4735, |
| "grad_norm": 0.2561289966106415, |
| "learning_rate": 4.3162163183360084e-07, |
| "loss": 1.5933, |
| "num_input_tokens_seen": 7944011776, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 0.2627177834510803, |
| "learning_rate": 4.155437875646828e-07, |
| "loss": 1.5529, |
| "num_input_tokens_seen": 7952400384, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4745, |
| "grad_norm": 0.2649383544921875, |
| "learning_rate": 3.997679598376891e-07, |
| "loss": 1.5151, |
| "num_input_tokens_seen": 7960788992, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 0.25103694200515747, |
| "learning_rate": 3.842943919353914e-07, |
| "loss": 1.3731, |
| "num_input_tokens_seen": 7969177600, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4755, |
| "grad_norm": 0.24840368330478668, |
| "learning_rate": 3.6912332247935224e-07, |
| "loss": 1.554, |
| "num_input_tokens_seen": 7977566208, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 0.2701607048511505, |
| "learning_rate": 3.5425498542622784e-07, |
| "loss": 1.4967, |
| "num_input_tokens_seen": 7985954816, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4765, |
| "grad_norm": 0.2613852620124817, |
| "learning_rate": 3.396896100641689e-07, |
| "loss": 1.5878, |
| "num_input_tokens_seen": 7994343424, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.477, |
| "grad_norm": 0.26147374510765076, |
| "learning_rate": 3.2542742100928114e-07, |
| "loss": 1.5783, |
| "num_input_tokens_seen": 8002732032, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.4775, |
| "grad_norm": 0.3100808560848236, |
| "learning_rate": 3.114686382021681e-07, |
| "loss": 1.5472, |
| "num_input_tokens_seen": 8011120640, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 0.26423409581184387, |
| "learning_rate": 2.9781347690452266e-07, |
| "loss": 1.8478, |
| "num_input_tokens_seen": 8019509248, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4785, |
| "grad_norm": 0.2365507185459137, |
| "learning_rate": 2.8446214769582534e-07, |
| "loss": 1.6472, |
| "num_input_tokens_seen": 8027897856, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.479, |
| "grad_norm": 0.2785324454307556, |
| "learning_rate": 2.714148564700914e-07, |
| "loss": 1.6811, |
| "num_input_tokens_seen": 8036286464, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.4795, |
| "grad_norm": 0.23417918384075165, |
| "learning_rate": 2.586718044326886e-07, |
| "loss": 1.4201, |
| "num_input_tokens_seen": 8044675072, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.2550223469734192, |
| "learning_rate": 2.462331880972468e-07, |
| "loss": 1.4727, |
| "num_input_tokens_seen": 8053063680, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4805, |
| "grad_norm": 0.25741317868232727, |
| "learning_rate": 2.340991992826136e-07, |
| "loss": 1.5361, |
| "num_input_tokens_seen": 8061452288, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.481, |
| "grad_norm": 0.2440570592880249, |
| "learning_rate": 2.222700251099097e-07, |
| "loss": 1.6914, |
| "num_input_tokens_seen": 8069840896, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4815, |
| "grad_norm": 0.4941231906414032, |
| "learning_rate": 2.107458479996316e-07, |
| "loss": 1.7785, |
| "num_input_tokens_seen": 8078229504, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 0.2466343194246292, |
| "learning_rate": 1.9952684566884927e-07, |
| "loss": 1.4094, |
| "num_input_tokens_seen": 8086618112, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.4825, |
| "grad_norm": 0.2508993148803711, |
| "learning_rate": 1.88613191128455e-07, |
| "loss": 1.5626, |
| "num_input_tokens_seen": 8095006720, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.483, |
| "grad_norm": 0.27574288845062256, |
| "learning_rate": 1.780050526805055e-07, |
| "loss": 1.6807, |
| "num_input_tokens_seen": 8103395328, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.4835, |
| "grad_norm": 0.26518452167510986, |
| "learning_rate": 1.6770259391561518e-07, |
| "loss": 1.6383, |
| "num_input_tokens_seen": 8111783936, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 0.2572641968727112, |
| "learning_rate": 1.577059737104447e-07, |
| "loss": 1.6402, |
| "num_input_tokens_seen": 8120172544, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4845, |
| "grad_norm": 0.27201879024505615, |
| "learning_rate": 1.4801534622524316e-07, |
| "loss": 1.5114, |
| "num_input_tokens_seen": 8128561152, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.485, |
| "grad_norm": 0.23598560690879822, |
| "learning_rate": 1.3863086090147415e-07, |
| "loss": 1.5498, |
| "num_input_tokens_seen": 8136949760, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.4855, |
| "grad_norm": 0.2551763653755188, |
| "learning_rate": 1.2955266245951338e-07, |
| "loss": 1.5513, |
| "num_input_tokens_seen": 8145338368, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 0.2625666558742523, |
| "learning_rate": 1.2078089089640809e-07, |
| "loss": 1.6809, |
| "num_input_tokens_seen": 8153726976, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.4865, |
| "grad_norm": 0.26251545548439026, |
| "learning_rate": 1.1231568148372562e-07, |
| "loss": 1.5135, |
| "num_input_tokens_seen": 8162115584, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.487, |
| "grad_norm": 0.2552337050437927, |
| "learning_rate": 1.0415716476547045e-07, |
| "loss": 1.6377, |
| "num_input_tokens_seen": 8170504192, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.4875, |
| "grad_norm": 0.25301122665405273, |
| "learning_rate": 9.630546655606365e-08, |
| "loss": 1.6496, |
| "num_input_tokens_seen": 8178892800, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.23959580063819885, |
| "learning_rate": 8.876070793840008e-08, |
| "loss": 1.4887, |
| "num_input_tokens_seen": 8187281408, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.4885, |
| "grad_norm": 0.2698057293891907, |
| "learning_rate": 8.15230052619942e-08, |
| "loss": 1.5558, |
| "num_input_tokens_seen": 8195670016, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.489, |
| "grad_norm": 0.2517794370651245, |
| "learning_rate": 7.459247014117488e-08, |
| "loss": 1.6568, |
| "num_input_tokens_seen": 8204058624, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.4895, |
| "grad_norm": 0.32929736375808716, |
| "learning_rate": 6.796920945336682e-08, |
| "loss": 1.6056, |
| "num_input_tokens_seen": 8212447232, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.2451072484254837, |
| "learning_rate": 6.165332533744072e-08, |
| "loss": 1.3545, |
| "num_input_tokens_seen": 8220835840, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4905, |
| "grad_norm": 0.24773673713207245, |
| "learning_rate": 5.5644915192145654e-08, |
| "loss": 1.6526, |
| "num_input_tokens_seen": 8229224448, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.491, |
| "grad_norm": 0.2363719791173935, |
| "learning_rate": 4.9944071674599135e-08, |
| "loss": 1.5398, |
| "num_input_tokens_seen": 8237613056, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.4915, |
| "grad_norm": 0.24789544939994812, |
| "learning_rate": 4.4550882698857214e-08, |
| "loss": 1.5177, |
| "num_input_tokens_seen": 8246001664, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 0.24309134483337402, |
| "learning_rate": 3.946543143456882e-08, |
| "loss": 1.5559, |
| "num_input_tokens_seen": 8254390272, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.4925, |
| "grad_norm": 0.2546921968460083, |
| "learning_rate": 3.468779630568353e-08, |
| "loss": 1.5036, |
| "num_input_tokens_seen": 8262778880, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.493, |
| "grad_norm": 0.2471814751625061, |
| "learning_rate": 3.021805098924136e-08, |
| "loss": 1.5532, |
| "num_input_tokens_seen": 8271167488, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.4935, |
| "grad_norm": 0.24231931567192078, |
| "learning_rate": 2.6056264414249245e-08, |
| "loss": 1.5185, |
| "num_input_tokens_seen": 8279556096, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 0.24174270033836365, |
| "learning_rate": 2.220250076060193e-08, |
| "loss": 1.5999, |
| "num_input_tokens_seen": 8287944704, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.4945, |
| "grad_norm": 0.24908506870269775, |
| "learning_rate": 1.8656819458100496e-08, |
| "loss": 1.5319, |
| "num_input_tokens_seen": 8296333312, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.495, |
| "grad_norm": 0.25362494587898254, |
| "learning_rate": 1.541927518554198e-08, |
| "loss": 1.6403, |
| "num_input_tokens_seen": 8304721920, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.4955, |
| "grad_norm": 0.2601455748081207, |
| "learning_rate": 1.2489917869860091e-08, |
| "loss": 1.6017, |
| "num_input_tokens_seen": 8313110528, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.250872403383255, |
| "learning_rate": 9.868792685368001e-09, |
| "loss": 1.5581, |
| "num_input_tokens_seen": 8321499136, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.4965, |
| "grad_norm": 0.2510140538215637, |
| "learning_rate": 7.55594005306337e-09, |
| "loss": 1.5565, |
| "num_input_tokens_seen": 8329887744, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.497, |
| "grad_norm": 0.2628908157348633, |
| "learning_rate": 5.551395639988855e-09, |
| "loss": 1.7027, |
| "num_input_tokens_seen": 8338276352, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4975, |
| "grad_norm": 0.24786627292633057, |
| "learning_rate": 3.855190358703631e-09, |
| "loss": 1.5539, |
| "num_input_tokens_seen": 8346664960, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 0.24326151609420776, |
| "learning_rate": 2.467350366788246e-09, |
| "loss": 1.5463, |
| "num_input_tokens_seen": 8355053568, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.4985, |
| "grad_norm": 0.2904972732067108, |
| "learning_rate": 1.3878970664538138e-09, |
| "loss": 1.4752, |
| "num_input_tokens_seen": 8363442176, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.499, |
| "grad_norm": 0.24948014318943024, |
| "learning_rate": 6.168471042067303e-10, |
| "loss": 1.4927, |
| "num_input_tokens_seen": 8371830784, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.4995, |
| "grad_norm": 0.24558107554912567, |
| "learning_rate": 1.5421237058887984e-10, |
| "loss": 1.5189, |
| "num_input_tokens_seen": 8380219392, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.2598268687725067, |
| "learning_rate": 0.0, |
| "loss": 1.5883, |
| "num_input_tokens_seen": 8388608000, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 8388608000, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.902112919650304e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|