{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.987034713508992, "eval_steps": 500, "global_step": 1192, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1672940192388122, "grad_norm": 0.03940200433135033, "learning_rate": 0.0001917785234899329, "loss": 0.388, "step": 50 }, { "epoch": 0.3345880384776244, "grad_norm": 0.03866202384233475, "learning_rate": 0.00018338926174496644, "loss": 0.3472, "step": 100 }, { "epoch": 0.5018820577164367, "grad_norm": 0.040275849401950836, "learning_rate": 0.000175, "loss": 0.3506, "step": 150 }, { "epoch": 0.6691760769552488, "grad_norm": 0.03893669694662094, "learning_rate": 0.00016661073825503358, "loss": 0.3398, "step": 200 }, { "epoch": 0.8364700961940611, "grad_norm": 0.03782316669821739, "learning_rate": 0.0001582214765100671, "loss": 0.3371, "step": 250 }, { "epoch": 1.0033458803847763, "grad_norm": 0.03445591777563095, "learning_rate": 0.00014983221476510067, "loss": 0.3301, "step": 300 }, { "epoch": 1.1706398996235885, "grad_norm": 0.042704977095127106, "learning_rate": 0.00014144295302013425, "loss": 0.317, "step": 350 }, { "epoch": 1.3379339188624007, "grad_norm": 0.04385839030146599, "learning_rate": 0.0001330536912751678, "loss": 0.3148, "step": 400 }, { "epoch": 1.5052279381012128, "grad_norm": 0.04302438348531723, "learning_rate": 0.00012466442953020134, "loss": 0.3159, "step": 450 }, { "epoch": 1.6725219573400252, "grad_norm": 0.05040173605084419, "learning_rate": 0.0001162751677852349, "loss": 0.3271, "step": 500 }, { "epoch": 1.8398159765788373, "grad_norm": 0.05767366662621498, "learning_rate": 0.00010788590604026847, "loss": 0.3133, "step": 550 }, { "epoch": 2.0066917607695527, "grad_norm": 0.04548301920294762, "learning_rate": 9.949664429530202e-05, "loss": 0.315, "step": 600 }, { "epoch": 2.1739857800083646, "grad_norm": 0.05534046143293381, "learning_rate": 9.110738255033557e-05, "loss": 0.3001, "step": 650 }, { "epoch": 2.341279799247177, "grad_norm": 0.054814413189888, "learning_rate": 8.271812080536914e-05, "loss": 0.2985, "step": 700 }, { "epoch": 2.5085738184859894, "grad_norm": 0.0619826577603817, "learning_rate": 7.432885906040269e-05, "loss": 0.2939, "step": 750 }, { "epoch": 2.6758678377248013, "grad_norm": 0.06426603347063065, "learning_rate": 6.593959731543624e-05, "loss": 0.2968, "step": 800 }, { "epoch": 2.8431618569636137, "grad_norm": 0.05984446406364441, "learning_rate": 5.7550335570469805e-05, "loss": 0.3063, "step": 850 }, { "epoch": 3.010037641154329, "grad_norm": 0.06274156272411346, "learning_rate": 4.9161073825503354e-05, "loss": 0.3021, "step": 900 }, { "epoch": 3.1773316603931407, "grad_norm": 0.08778294175863266, "learning_rate": 4.077181208053692e-05, "loss": 0.2803, "step": 950 }, { "epoch": 3.344625679631953, "grad_norm": 0.07436411827802658, "learning_rate": 3.238255033557047e-05, "loss": 0.2835, "step": 1000 }, { "epoch": 3.5119196988707655, "grad_norm": 0.05694897472858429, "learning_rate": 2.3993288590604026e-05, "loss": 0.2859, "step": 1050 }, { "epoch": 3.6792137181095774, "grad_norm": 0.0587756410241127, "learning_rate": 1.5604026845637585e-05, "loss": 0.2863, "step": 1100 }, { "epoch": 3.84650773734839, "grad_norm": 0.0690668448805809, "learning_rate": 7.214765100671142e-06, "loss": 0.2954, "step": 1150 } ], "logging_steps": 50, "max_steps": 1192, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.779324622389903e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }