Instructions to use igzi/lora-multirc with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use igzi/lora-multirc with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B") model = PeftModel.from_pretrained(base_model, "igzi/lora-multirc") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.09245562130177515, | |
| "eval_steps": 500, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0009245562130177514, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00019800000000000002, | |
| "loss": 1.6901, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0018491124260355029, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 0.000196, | |
| "loss": 0.4966, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0027736686390532543, | |
| "grad_norm": 67.5, | |
| "learning_rate": 0.000194, | |
| "loss": 0.8836, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0036982248520710057, | |
| "grad_norm": 28.875, | |
| "learning_rate": 0.000192, | |
| "loss": 0.6205, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.004622781065088758, | |
| "grad_norm": 0.023193359375, | |
| "learning_rate": 0.00019, | |
| "loss": 0.7316, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.005547337278106509, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 0.000188, | |
| "loss": 0.7953, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0064718934911242604, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 0.00018600000000000002, | |
| "loss": 0.5551, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0073964497041420114, | |
| "grad_norm": 0.0303955078125, | |
| "learning_rate": 0.00018400000000000003, | |
| "loss": 0.8572, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.008321005917159764, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000182, | |
| "loss": 0.8262, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.009245562130177515, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00018, | |
| "loss": 0.4421, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.010170118343195266, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00017800000000000002, | |
| "loss": 0.5296, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.011094674556213017, | |
| "grad_norm": 0.025146484375, | |
| "learning_rate": 0.00017600000000000002, | |
| "loss": 0.5619, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01201923076923077, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000174, | |
| "loss": 0.6676, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.012943786982248521, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000172, | |
| "loss": 0.5962, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.013868343195266272, | |
| "grad_norm": 0.0162353515625, | |
| "learning_rate": 0.00017, | |
| "loss": 0.7956, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.014792899408284023, | |
| "grad_norm": 18.625, | |
| "learning_rate": 0.000168, | |
| "loss": 0.84, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.015717455621301776, | |
| "grad_norm": 27.625, | |
| "learning_rate": 0.000166, | |
| "loss": 0.7259, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.01664201183431953, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000164, | |
| "loss": 0.5285, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.017566568047337278, | |
| "grad_norm": 16.125, | |
| "learning_rate": 0.000162, | |
| "loss": 0.8432, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.01849112426035503, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00016, | |
| "loss": 0.5795, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01941568047337278, | |
| "grad_norm": 3.3125, | |
| "learning_rate": 0.00015800000000000002, | |
| "loss": 0.5298, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.020340236686390532, | |
| "grad_norm": 3.703125, | |
| "learning_rate": 0.00015600000000000002, | |
| "loss": 0.5904, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.021264792899408285, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000154, | |
| "loss": 0.6087, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.022189349112426034, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 0.000152, | |
| "loss": 0.729, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.023113905325443787, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.3885, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.02403846153846154, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000148, | |
| "loss": 0.3173, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.02496301775147929, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000146, | |
| "loss": 0.5709, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.025887573964497042, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 0.000144, | |
| "loss": 0.8465, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.026812130177514795, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000142, | |
| "loss": 0.4306, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.027736686390532544, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.00014, | |
| "loss": 0.549, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.028661242603550297, | |
| "grad_norm": 2.9375, | |
| "learning_rate": 0.000138, | |
| "loss": 0.3375, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.029585798816568046, | |
| "grad_norm": 0.03369140625, | |
| "learning_rate": 0.00013600000000000003, | |
| "loss": 0.5455, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.0305103550295858, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.000134, | |
| "loss": 0.5112, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.03143491124260355, | |
| "grad_norm": 1.78125, | |
| "learning_rate": 0.000132, | |
| "loss": 0.4484, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0323594674556213, | |
| "grad_norm": 0.018310546875, | |
| "learning_rate": 0.00013000000000000002, | |
| "loss": 0.5034, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.03328402366863906, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00012800000000000002, | |
| "loss": 0.5168, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.034208579881656806, | |
| "grad_norm": 0.018798828125, | |
| "learning_rate": 0.000126, | |
| "loss": 0.2794, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.035133136094674555, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000124, | |
| "loss": 0.6018, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.036057692307692304, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000122, | |
| "loss": 0.597, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.03698224852071006, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 0.00012, | |
| "loss": 0.3998, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.03790680473372781, | |
| "grad_norm": 0.0252685546875, | |
| "learning_rate": 0.000118, | |
| "loss": 0.3998, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.03883136094674556, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.000116, | |
| "loss": 0.8371, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.039755917159763315, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 0.00011399999999999999, | |
| "loss": 0.4418, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.040680473372781065, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 0.00011200000000000001, | |
| "loss": 0.427, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.041605029585798814, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00011000000000000002, | |
| "loss": 0.5198, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.04252958579881657, | |
| "grad_norm": 0.244140625, | |
| "learning_rate": 0.00010800000000000001, | |
| "loss": 0.4875, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.04345414201183432, | |
| "grad_norm": 3.375, | |
| "learning_rate": 0.00010600000000000002, | |
| "loss": 0.4774, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.04437869822485207, | |
| "grad_norm": 3.234375, | |
| "learning_rate": 0.00010400000000000001, | |
| "loss": 0.4572, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.045303254437869825, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.00010200000000000001, | |
| "loss": 0.4148, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.046227810650887574, | |
| "grad_norm": 0.1240234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6811, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.04715236686390532, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.8e-05, | |
| "loss": 0.428, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.04807692307692308, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 9.6e-05, | |
| "loss": 0.3754, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.04900147928994083, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.4e-05, | |
| "loss": 0.4773, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.04992603550295858, | |
| "grad_norm": 3.765625, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 0.7884, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.050850591715976334, | |
| "grad_norm": 0.019775390625, | |
| "learning_rate": 9e-05, | |
| "loss": 0.2355, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.051775147928994084, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 0.5385, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.05269970414201183, | |
| "grad_norm": 2.625, | |
| "learning_rate": 8.6e-05, | |
| "loss": 0.4285, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.05362426035502959, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 8.4e-05, | |
| "loss": 0.6612, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.05454881656804734, | |
| "grad_norm": 0.0198974609375, | |
| "learning_rate": 8.2e-05, | |
| "loss": 0.283, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.05547337278106509, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.414, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05639792899408284, | |
| "grad_norm": 4.625, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 0.5375, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.05732248520710059, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.6e-05, | |
| "loss": 0.6285, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.05824704142011834, | |
| "grad_norm": 0.0118408203125, | |
| "learning_rate": 7.4e-05, | |
| "loss": 0.5064, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.05917159763313609, | |
| "grad_norm": 0.058837890625, | |
| "learning_rate": 7.2e-05, | |
| "loss": 0.4763, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.06009615384615385, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7e-05, | |
| "loss": 0.5027, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.0610207100591716, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 0.6967, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.061945266272189346, | |
| "grad_norm": 3.328125, | |
| "learning_rate": 6.6e-05, | |
| "loss": 0.3514, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.0628698224852071, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 0.4051, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.06379437869822485, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.2e-05, | |
| "loss": 0.4403, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.0647189349112426, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6e-05, | |
| "loss": 0.4544, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.06564349112426035, | |
| "grad_norm": 0.033203125, | |
| "learning_rate": 5.8e-05, | |
| "loss": 0.4363, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.06656804733727811, | |
| "grad_norm": 0.205078125, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 0.4371, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.06749260355029586, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 5.4000000000000005e-05, | |
| "loss": 0.4322, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.06841715976331361, | |
| "grad_norm": 0.04736328125, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 0.4264, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.06934171597633136, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4408, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.07026627218934911, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.5493, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.07119082840236686, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.3345, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.07211538461538461, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.319, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.07303994082840237, | |
| "grad_norm": 0.041015625, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.6092, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.07396449704142012, | |
| "grad_norm": 2.828125, | |
| "learning_rate": 4e-05, | |
| "loss": 0.4057, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.07488905325443787, | |
| "grad_norm": 0.0281982421875, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.3609, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.07581360946745562, | |
| "grad_norm": 0.025390625, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.4486, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.07673816568047337, | |
| "grad_norm": 0.034423828125, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 0.5546, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.07766272189349112, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.4731, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.07858727810650888, | |
| "grad_norm": 0.061279296875, | |
| "learning_rate": 3e-05, | |
| "loss": 0.4029, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.07951183431952663, | |
| "grad_norm": 0.0693359375, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.5766, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.08043639053254438, | |
| "grad_norm": 17.875, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.5435, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.08136094674556213, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.4748, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.08228550295857988, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.5257, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.08321005917159763, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4337, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.08413461538461539, | |
| "grad_norm": 0.048828125, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.5057, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.08505917159763314, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.5384, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.08598372781065089, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.4339, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.08690828402366864, | |
| "grad_norm": 4.25, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.6224, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.08783284023668639, | |
| "grad_norm": 0.0191650390625, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3908, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.08875739644970414, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.5687, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.08968195266272189, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6e-06, | |
| "loss": 0.5476, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.09060650887573965, | |
| "grad_norm": 0.0145263671875, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.4753, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.0915310650887574, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.5388, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.09245562130177515, | |
| "grad_norm": 4.34375, | |
| "learning_rate": 0.0, | |
| "loss": 0.4994, | |
| "step": 5000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.979888803807232e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |