| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 916, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.043668122270742356, |
| "grad_norm": 2.421576976776123, |
| "learning_rate": 1.0869565217391306e-06, |
| "loss": 0.5167, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08733624454148471, |
| "grad_norm": 2.160121440887451, |
| "learning_rate": 2.173913043478261e-06, |
| "loss": 0.5039, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13100436681222707, |
| "grad_norm": 0.915518045425415, |
| "learning_rate": 3.2608695652173914e-06, |
| "loss": 0.4632, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17467248908296942, |
| "grad_norm": 0.7904559969902039, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 0.3982, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2183406113537118, |
| "grad_norm": 0.6008340716362, |
| "learning_rate": 5.4347826086956525e-06, |
| "loss": 0.4161, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.26200873362445415, |
| "grad_norm": 0.6038509011268616, |
| "learning_rate": 6.521739130434783e-06, |
| "loss": 0.438, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3056768558951965, |
| "grad_norm": 0.609183132648468, |
| "learning_rate": 7.608695652173914e-06, |
| "loss": 0.3848, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.34934497816593885, |
| "grad_norm": 0.65712571144104, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 0.3993, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3930131004366812, |
| "grad_norm": 0.6194190979003906, |
| "learning_rate": 9.782608695652175e-06, |
| "loss": 0.3769, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4366812227074236, |
| "grad_norm": 0.4761613607406616, |
| "learning_rate": 9.99947842870608e-06, |
| "loss": 0.3808, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.48034934497816595, |
| "grad_norm": 0.4921339750289917, |
| "learning_rate": 9.997359731816998e-06, |
| "loss": 0.4205, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5240174672489083, |
| "grad_norm": 0.48024195432662964, |
| "learning_rate": 9.99361200124597e-06, |
| "loss": 0.38, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5676855895196506, |
| "grad_norm": 0.6233803629875183, |
| "learning_rate": 9.988236458673974e-06, |
| "loss": 0.3953, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.611353711790393, |
| "grad_norm": 0.5606607794761658, |
| "learning_rate": 9.981234856414306e-06, |
| "loss": 0.3865, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6550218340611353, |
| "grad_norm": 0.49620741605758667, |
| "learning_rate": 9.972609476841368e-06, |
| "loss": 0.3899, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6986899563318777, |
| "grad_norm": 0.5842658281326294, |
| "learning_rate": 9.962363131646649e-06, |
| "loss": 0.3792, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.74235807860262, |
| "grad_norm": 0.5468127727508545, |
| "learning_rate": 9.950499160922184e-06, |
| "loss": 0.4015, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7860262008733624, |
| "grad_norm": 0.5464998483657837, |
| "learning_rate": 9.937021432071754e-06, |
| "loss": 0.3533, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8296943231441049, |
| "grad_norm": 0.5048817992210388, |
| "learning_rate": 9.921934338550187e-06, |
| "loss": 0.3961, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8733624454148472, |
| "grad_norm": 0.47697556018829346, |
| "learning_rate": 9.905242798431196e-06, |
| "loss": 0.3438, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9170305676855895, |
| "grad_norm": 0.5746617913246155, |
| "learning_rate": 9.886952252804177e-06, |
| "loss": 0.4006, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9606986899563319, |
| "grad_norm": 0.5081667304039001, |
| "learning_rate": 9.867068664000538e-06, |
| "loss": 0.3679, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0043668122270741, |
| "grad_norm": 0.4806345999240875, |
| "learning_rate": 9.845598513650104e-06, |
| "loss": 0.4113, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0480349344978166, |
| "grad_norm": 0.4791143238544464, |
| "learning_rate": 9.822548800568238e-06, |
| "loss": 0.341, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.091703056768559, |
| "grad_norm": 0.5520183444023132, |
| "learning_rate": 9.797927038474383e-06, |
| "loss": 0.298, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1353711790393013, |
| "grad_norm": 0.486562579870224, |
| "learning_rate": 9.771741253542742e-06, |
| "loss": 0.2989, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.1790393013100438, |
| "grad_norm": 0.5037546753883362, |
| "learning_rate": 9.743999981785914e-06, |
| "loss": 0.3058, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.222707423580786, |
| "grad_norm": 0.5140413045883179, |
| "learning_rate": 9.714712266272339e-06, |
| "loss": 0.3164, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2663755458515285, |
| "grad_norm": 0.4978218972682953, |
| "learning_rate": 9.683887654178446e-06, |
| "loss": 0.296, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3100436681222707, |
| "grad_norm": 0.5410030484199524, |
| "learning_rate": 9.651536193676476e-06, |
| "loss": 0.2938, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3537117903930131, |
| "grad_norm": 0.5140953063964844, |
| "learning_rate": 9.617668430658991e-06, |
| "loss": 0.3249, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3973799126637554, |
| "grad_norm": 0.4528365433216095, |
| "learning_rate": 9.582295405301131e-06, |
| "loss": 0.3356, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4410480349344978, |
| "grad_norm": 0.49946603178977966, |
| "learning_rate": 9.545428648461756e-06, |
| "loss": 0.3037, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.48471615720524, |
| "grad_norm": 0.48589998483657837, |
| "learning_rate": 9.50708017792463e-06, |
| "loss": 0.3116, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.5283842794759825, |
| "grad_norm": 0.46080437302589417, |
| "learning_rate": 9.46726249448087e-06, |
| "loss": 0.296, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.572052401746725, |
| "grad_norm": 0.4566941559314728, |
| "learning_rate": 9.425988577853959e-06, |
| "loss": 0.3079, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.6157205240174672, |
| "grad_norm": 0.5628035068511963, |
| "learning_rate": 9.383271882468631e-06, |
| "loss": 0.2906, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.6593886462882095, |
| "grad_norm": 0.4313275218009949, |
| "learning_rate": 9.339126333065008e-06, |
| "loss": 0.2879, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.703056768558952, |
| "grad_norm": 0.4829094409942627, |
| "learning_rate": 9.293566320159432e-06, |
| "loss": 0.3609, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.7467248908296944, |
| "grad_norm": 0.7929471135139465, |
| "learning_rate": 9.24660669535346e-06, |
| "loss": 0.3263, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7903930131004366, |
| "grad_norm": 0.4220748841762543, |
| "learning_rate": 9.198262766492554e-06, |
| "loss": 0.3092, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.8340611353711789, |
| "grad_norm": 0.4901680648326874, |
| "learning_rate": 9.14855029267605e-06, |
| "loss": 0.3152, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.8777292576419216, |
| "grad_norm": 0.47252029180526733, |
| "learning_rate": 9.097485479120027e-06, |
| "loss": 0.3223, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.9213973799126638, |
| "grad_norm": 0.46700412034988403, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.3144, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.965065502183406, |
| "grad_norm": 0.46521633863449097, |
| "learning_rate": 8.99136585239836e-06, |
| "loss": 0.3179, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.0087336244541483, |
| "grad_norm": 0.48223376274108887, |
| "learning_rate": 8.9363456319888e-06, |
| "loss": 0.3021, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.052401746724891, |
| "grad_norm": 0.446074903011322, |
| "learning_rate": 8.880042246075366e-06, |
| "loss": 0.2441, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.096069868995633, |
| "grad_norm": 0.45803341269493103, |
| "learning_rate": 8.82247404837222e-06, |
| "loss": 0.2733, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.1397379912663754, |
| "grad_norm": 0.429457426071167, |
| "learning_rate": 8.763659804895442e-06, |
| "loss": 0.2563, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.183406113537118, |
| "grad_norm": 0.6878861784934998, |
| "learning_rate": 8.703618687845697e-06, |
| "loss": 0.2458, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.2270742358078603, |
| "grad_norm": 0.4481293261051178, |
| "learning_rate": 8.64237026935852e-06, |
| "loss": 0.2261, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.2707423580786026, |
| "grad_norm": 0.45138078927993774, |
| "learning_rate": 8.579934515124202e-06, |
| "loss": 0.2408, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.314410480349345, |
| "grad_norm": 0.5058510303497314, |
| "learning_rate": 8.5163317778794e-06, |
| "loss": 0.2386, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.3580786026200875, |
| "grad_norm": 0.5651599168777466, |
| "learning_rate": 8.45158279077258e-06, |
| "loss": 0.2035, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.4017467248908297, |
| "grad_norm": 0.4735155999660492, |
| "learning_rate": 8.385708660605431e-06, |
| "loss": 0.2106, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.445414847161572, |
| "grad_norm": 0.44301047921180725, |
| "learning_rate": 8.318730860952523e-06, |
| "loss": 0.2164, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.489082969432314, |
| "grad_norm": 0.38600876927375793, |
| "learning_rate": 8.250671225161345e-06, |
| "loss": 0.2275, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.532751091703057, |
| "grad_norm": 0.49234113097190857, |
| "learning_rate": 8.181551939235115e-06, |
| "loss": 0.2254, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.576419213973799, |
| "grad_norm": 0.4783915877342224, |
| "learning_rate": 8.111395534600604e-06, |
| "loss": 0.2253, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.6200873362445414, |
| "grad_norm": 0.4308622479438782, |
| "learning_rate": 8.040224880763368e-06, |
| "loss": 0.2202, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.6637554585152836, |
| "grad_norm": 0.4942546784877777, |
| "learning_rate": 7.968063177852775e-06, |
| "loss": 0.2512, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.7074235807860263, |
| "grad_norm": 0.4427158832550049, |
| "learning_rate": 7.894933949059245e-06, |
| "loss": 0.237, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.7510917030567685, |
| "grad_norm": 0.46294692158699036, |
| "learning_rate": 7.820861032966199e-06, |
| "loss": 0.226, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.7947598253275108, |
| "grad_norm": 0.42187586426734924, |
| "learning_rate": 7.745868575779176e-06, |
| "loss": 0.2362, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.8384279475982535, |
| "grad_norm": 0.4270602762699127, |
| "learning_rate": 7.669981023454682e-06, |
| "loss": 0.2159, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.8820960698689957, |
| "grad_norm": 0.49507179856300354, |
| "learning_rate": 7.593223113731323e-06, |
| "loss": 0.2566, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.925764192139738, |
| "grad_norm": 0.4554119408130646, |
| "learning_rate": 7.515619868065833e-06, |
| "loss": 0.2648, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.96943231441048, |
| "grad_norm": 0.42243942618370056, |
| "learning_rate": 7.437196583476597e-06, |
| "loss": 0.2426, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.013100436681223, |
| "grad_norm": 0.4137606620788574, |
| "learning_rate": 7.357978824297362e-06, |
| "loss": 0.225, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.056768558951965, |
| "grad_norm": 0.433912992477417, |
| "learning_rate": 7.2779924138438065e-06, |
| "loss": 0.1688, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.1004366812227073, |
| "grad_norm": 0.4669990539550781, |
| "learning_rate": 7.197263425995682e-06, |
| "loss": 0.1763, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.14410480349345, |
| "grad_norm": 0.4027640223503113, |
| "learning_rate": 7.115818176697285e-06, |
| "loss": 0.1805, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.1877729257641922, |
| "grad_norm": 0.577460765838623, |
| "learning_rate": 7.033683215379002e-06, |
| "loss": 0.1709, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.2314410480349345, |
| "grad_norm": 0.43528082966804504, |
| "learning_rate": 6.950885316302773e-06, |
| "loss": 0.1558, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.2751091703056767, |
| "grad_norm": 0.5665518641471863, |
| "learning_rate": 6.867451469834237e-06, |
| "loss": 0.1935, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.3187772925764194, |
| "grad_norm": 0.3636087477207184, |
| "learning_rate": 6.7834088736444435e-06, |
| "loss": 0.1779, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.3624454148471616, |
| "grad_norm": 0.4824029505252838, |
| "learning_rate": 6.698784923843993e-06, |
| "loss": 0.1748, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.406113537117904, |
| "grad_norm": 0.44956591725349426, |
| "learning_rate": 6.613607206052476e-06, |
| "loss": 0.1637, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.449781659388646, |
| "grad_norm": 0.4280209243297577, |
| "learning_rate": 6.527903486406147e-06, |
| "loss": 0.1618, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.493449781659389, |
| "grad_norm": 0.5125846862792969, |
| "learning_rate": 6.441701702506755e-06, |
| "loss": 0.2097, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.537117903930131, |
| "grad_norm": 0.4643654227256775, |
| "learning_rate": 6.355029954314468e-06, |
| "loss": 0.1765, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.5807860262008733, |
| "grad_norm": 0.3958646357059479, |
| "learning_rate": 6.267916494987883e-06, |
| "loss": 0.1716, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.6244541484716155, |
| "grad_norm": 0.3993144929409027, |
| "learning_rate": 6.180389721674101e-06, |
| "loss": 0.1763, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.668122270742358, |
| "grad_norm": 0.4378385841846466, |
| "learning_rate": 6.092478166251839e-06, |
| "loss": 0.1677, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.7117903930131004, |
| "grad_norm": 0.42186248302459717, |
| "learning_rate": 6.00421048603066e-06, |
| "loss": 0.1874, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.7554585152838427, |
| "grad_norm": 0.4455322027206421, |
| "learning_rate": 5.915615454409281e-06, |
| "loss": 0.1641, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.7991266375545854, |
| "grad_norm": 0.5567952990531921, |
| "learning_rate": 5.8267219514960625e-06, |
| "loss": 0.1714, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.8427947598253276, |
| "grad_norm": 0.463058739900589, |
| "learning_rate": 5.737558954694698e-06, |
| "loss": 0.1799, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.88646288209607, |
| "grad_norm": 0.4127854108810425, |
| "learning_rate": 5.648155529258195e-06, |
| "loss": 0.1721, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.930131004366812, |
| "grad_norm": 0.5975726246833801, |
| "learning_rate": 5.558540818814213e-06, |
| "loss": 0.1792, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.9737991266375547, |
| "grad_norm": 0.44827261567115784, |
| "learning_rate": 5.468744035864867e-06, |
| "loss": 0.1675, |
| "step": 910 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1832, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 143371959500800.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|