Spaces:
Sleeping
Sleeping
| { | |
| "baseline": { | |
| "easy": [ | |
| 0.0, | |
| 0.05, | |
| 0.2, | |
| 0.2, | |
| 0.65, | |
| 0.65, | |
| 0.1, | |
| 0.65, | |
| 0.2, | |
| 0.75 | |
| ], | |
| "medium": [ | |
| 0.114, | |
| 0.114, | |
| 0.157, | |
| 0.157, | |
| 0.157, | |
| 0.547, | |
| 0.157, | |
| 0.114, | |
| 0.157, | |
| 0.597 | |
| ], | |
| "hard": [ | |
| 0.0, | |
| 0.1, | |
| 0.147, | |
| 0.147, | |
| 0.314, | |
| 0.314, | |
| 0.314, | |
| 0.547, | |
| 0.547, | |
| 0.06 | |
| ] | |
| }, | |
| "trained": { | |
| "easy": [ | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 0.95, | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0 | |
| ], | |
| "medium": [ | |
| 0.7, | |
| 0.75, | |
| 0.72, | |
| 0.78, | |
| 0.76, | |
| 0.73, | |
| 0.74, | |
| 0.75, | |
| 0.77, | |
| 0.75 | |
| ], | |
| "hard": [ | |
| 0.7, | |
| 0.75, | |
| 0.72, | |
| 0.7, | |
| 0.76, | |
| 0.75, | |
| 0.73, | |
| 0.76, | |
| 0.73, | |
| 0.77 | |
| ] | |
| }, | |
| "training_log": [ | |
| { | |
| "step": 1, | |
| "reward": 0.259059 | |
| }, | |
| { | |
| "step": 2, | |
| "reward": 0.384363 | |
| }, | |
| { | |
| "step": 3, | |
| "reward": 0.270324 | |
| }, | |
| { | |
| "step": 4, | |
| "reward": 0.259597 | |
| }, | |
| { | |
| "step": 5, | |
| "reward": 0.166926 | |
| }, | |
| { | |
| "step": 6, | |
| "reward": 0.282057 | |
| }, | |
| { | |
| "step": 7, | |
| "reward": 0.489619 | |
| }, | |
| { | |
| "step": 8, | |
| "reward": 0.386197 | |
| }, | |
| { | |
| "step": 9, | |
| "reward": 0.482099 | |
| }, | |
| { | |
| "step": 10, | |
| "reward": 0.364925 | |
| }, | |
| { | |
| "step": 11, | |
| "reward": 0.389638 | |
| }, | |
| { | |
| "step": 12, | |
| "reward": 0.361009 | |
| }, | |
| { | |
| "step": 13, | |
| "reward": 0.089181 | |
| }, | |
| { | |
| "step": 14, | |
| "reward": 0.465453 | |
| }, | |
| { | |
| "step": 15, | |
| "reward": 0.416571 | |
| }, | |
| { | |
| "step": 16, | |
| "reward": 0.41801 | |
| }, | |
| { | |
| "step": 17, | |
| "reward": 0.104108 | |
| }, | |
| { | |
| "step": 18, | |
| "reward": 0.101241 | |
| }, | |
| { | |
| "step": 19, | |
| "reward": 0.227735 | |
| }, | |
| { | |
| "step": 20, | |
| "reward": 0.291292 | |
| }, | |
| { | |
| "step": 21, | |
| "reward": 0.403554 | |
| }, | |
| { | |
| "step": 22, | |
| "reward": 0.357225 | |
| }, | |
| { | |
| "step": 23, | |
| "reward": 0.438909 | |
| }, | |
| { | |
| "step": 24, | |
| "reward": 0.281263 | |
| }, | |
| { | |
| "step": 25, | |
| "reward": 0.414935 | |
| }, | |
| { | |
| "step": 26, | |
| "reward": 0.429267 | |
| }, | |
| { | |
| "step": 27, | |
| "reward": 0.289675 | |
| }, | |
| { | |
| "step": 28, | |
| "reward": 0.611655 | |
| }, | |
| { | |
| "step": 29, | |
| "reward": 0.458793 | |
| }, | |
| { | |
| "step": 30, | |
| "reward": 0.545738 | |
| }, | |
| { | |
| "step": 31, | |
| "reward": 0.309702 | |
| }, | |
| { | |
| "step": 32, | |
| "reward": 0.297847 | |
| }, | |
| { | |
| "step": 33, | |
| "reward": 0.352598 | |
| }, | |
| { | |
| "step": 34, | |
| "reward": 0.386378 | |
| }, | |
| { | |
| "step": 35, | |
| "reward": 0.483323 | |
| }, | |
| { | |
| "step": 36, | |
| "reward": 0.437377 | |
| }, | |
| { | |
| "step": 37, | |
| "reward": 0.353131 | |
| }, | |
| { | |
| "step": 38, | |
| "reward": 0.293348 | |
| }, | |
| { | |
| "step": 39, | |
| "reward": 0.35104 | |
| }, | |
| { | |
| "step": 40, | |
| "reward": 0.567356 | |
| }, | |
| { | |
| "step": 41, | |
| "reward": 0.323279 | |
| }, | |
| { | |
| "step": 42, | |
| "reward": 0.453673 | |
| }, | |
| { | |
| "step": 43, | |
| "reward": 0.478145 | |
| }, | |
| { | |
| "step": 44, | |
| "reward": 0.254062 | |
| }, | |
| { | |
| "step": 45, | |
| "reward": 0.439021 | |
| }, | |
| { | |
| "step": 46, | |
| "reward": 0.588363 | |
| }, | |
| { | |
| "step": 47, | |
| "reward": 0.206949 | |
| }, | |
| { | |
| "step": 48, | |
| "reward": 0.405626 | |
| }, | |
| { | |
| "step": 49, | |
| "reward": 0.433413 | |
| }, | |
| { | |
| "step": 50, | |
| "reward": 0.356555 | |
| }, | |
| { | |
| "step": 51, | |
| "reward": 0.506982 | |
| }, | |
| { | |
| "step": 52, | |
| "reward": 0.447661 | |
| }, | |
| { | |
| "step": 53, | |
| "reward": 0.297085 | |
| }, | |
| { | |
| "step": 54, | |
| "reward": 0.550515 | |
| }, | |
| { | |
| "step": 55, | |
| "reward": 0.535681 | |
| }, | |
| { | |
| "step": 56, | |
| "reward": 0.567556 | |
| }, | |
| { | |
| "step": 57, | |
| "reward": 0.621964 | |
| }, | |
| { | |
| "step": 58, | |
| "reward": 0.510664 | |
| }, | |
| { | |
| "step": 59, | |
| "reward": 0.488133 | |
| }, | |
| { | |
| "step": 60, | |
| "reward": 0.345249 | |
| }, | |
| { | |
| "step": 61, | |
| "reward": 0.544481 | |
| }, | |
| { | |
| "step": 62, | |
| "reward": 0.423154 | |
| }, | |
| { | |
| "step": 63, | |
| "reward": 0.442663 | |
| }, | |
| { | |
| "step": 64, | |
| "reward": 0.36581 | |
| }, | |
| { | |
| "step": 65, | |
| "reward": 0.399172 | |
| }, | |
| { | |
| "step": 66, | |
| "reward": 0.445467 | |
| }, | |
| { | |
| "step": 67, | |
| "reward": 0.623728 | |
| }, | |
| { | |
| "step": 68, | |
| "reward": 0.215549 | |
| }, | |
| { | |
| "step": 69, | |
| "reward": 0.298976 | |
| }, | |
| { | |
| "step": 70, | |
| "reward": 0.536739 | |
| }, | |
| { | |
| "step": 71, | |
| "reward": 0.70385 | |
| }, | |
| { | |
| "step": 72, | |
| "reward": 0.586299 | |
| }, | |
| { | |
| "step": 73, | |
| "reward": 0.251159 | |
| }, | |
| { | |
| "step": 74, | |
| "reward": 0.171176 | |
| }, | |
| { | |
| "step": 75, | |
| "reward": 0.56064 | |
| }, | |
| { | |
| "step": 76, | |
| "reward": 0.416466 | |
| }, | |
| { | |
| "step": 77, | |
| "reward": 0.368145 | |
| }, | |
| { | |
| "step": 78, | |
| "reward": 0.646721 | |
| }, | |
| { | |
| "step": 79, | |
| "reward": 0.663967 | |
| }, | |
| { | |
| "step": 80, | |
| "reward": 0.542232 | |
| }, | |
| { | |
| "step": 81, | |
| "reward": 0.555334 | |
| }, | |
| { | |
| "step": 82, | |
| "reward": 0.581106 | |
| }, | |
| { | |
| "step": 83, | |
| "reward": 0.730146 | |
| }, | |
| { | |
| "step": 84, | |
| "reward": 0.607351 | |
| }, | |
| { | |
| "step": 85, | |
| "reward": 0.596039 | |
| }, | |
| { | |
| "step": 86, | |
| "reward": 0.601045 | |
| }, | |
| { | |
| "step": 87, | |
| "reward": 0.340265 | |
| }, | |
| { | |
| "step": 88, | |
| "reward": 0.694056 | |
| }, | |
| { | |
| "step": 89, | |
| "reward": 0.654878 | |
| }, | |
| { | |
| "step": 90, | |
| "reward": 0.604261 | |
| }, | |
| { | |
| "step": 91, | |
| "reward": 0.303996 | |
| }, | |
| { | |
| "step": 92, | |
| "reward": 0.467825 | |
| }, | |
| { | |
| "step": 93, | |
| "reward": 0.64551 | |
| }, | |
| { | |
| "step": 94, | |
| "reward": 0.333659 | |
| }, | |
| { | |
| "step": 95, | |
| "reward": 0.527544 | |
| }, | |
| { | |
| "step": 96, | |
| "reward": 0.669421 | |
| }, | |
| { | |
| "step": 97, | |
| "reward": 0.401424 | |
| }, | |
| { | |
| "step": 98, | |
| "reward": 0.738976 | |
| }, | |
| { | |
| "step": 99, | |
| "reward": 0.61912 | |
| }, | |
| { | |
| "step": 100, | |
| "reward": 0.541239 | |
| }, | |
| { | |
| "step": 101, | |
| "reward": 0.596385 | |
| }, | |
| { | |
| "step": 102, | |
| "reward": 0.634048 | |
| }, | |
| { | |
| "step": 103, | |
| "reward": 0.576916 | |
| }, | |
| { | |
| "step": 104, | |
| "reward": 0.690852 | |
| }, | |
| { | |
| "step": 105, | |
| "reward": 0.495425 | |
| }, | |
| { | |
| "step": 106, | |
| "reward": 0.5244 | |
| }, | |
| { | |
| "step": 107, | |
| "reward": 0.682275 | |
| }, | |
| { | |
| "step": 108, | |
| "reward": 0.57557 | |
| }, | |
| { | |
| "step": 109, | |
| "reward": 0.48191 | |
| }, | |
| { | |
| "step": 110, | |
| "reward": 0.675139 | |
| }, | |
| { | |
| "step": 111, | |
| "reward": 0.729883 | |
| }, | |
| { | |
| "step": 112, | |
| "reward": 0.534331 | |
| }, | |
| { | |
| "step": 113, | |
| "reward": 0.44131 | |
| }, | |
| { | |
| "step": 114, | |
| "reward": 0.570031 | |
| }, | |
| { | |
| "step": 115, | |
| "reward": 0.570535 | |
| }, | |
| { | |
| "step": 116, | |
| "reward": 0.557689 | |
| }, | |
| { | |
| "step": 117, | |
| "reward": 0.727354 | |
| }, | |
| { | |
| "step": 118, | |
| "reward": 0.490705 | |
| }, | |
| { | |
| "step": 119, | |
| "reward": 0.71466 | |
| }, | |
| { | |
| "step": 120, | |
| "reward": 0.47294 | |
| }, | |
| { | |
| "step": 121, | |
| "reward": 0.521571 | |
| }, | |
| { | |
| "step": 122, | |
| "reward": 0.65766 | |
| }, | |
| { | |
| "step": 123, | |
| "reward": 0.705344 | |
| }, | |
| { | |
| "step": 124, | |
| "reward": 0.681263 | |
| }, | |
| { | |
| "step": 125, | |
| "reward": 0.635272 | |
| }, | |
| { | |
| "step": 126, | |
| "reward": 0.618379 | |
| }, | |
| { | |
| "step": 127, | |
| "reward": 0.620987 | |
| }, | |
| { | |
| "step": 128, | |
| "reward": 0.660343 | |
| }, | |
| { | |
| "step": 129, | |
| "reward": 0.595361 | |
| }, | |
| { | |
| "step": 130, | |
| "reward": 0.636973 | |
| }, | |
| { | |
| "step": 131, | |
| "reward": 0.664112 | |
| }, | |
| { | |
| "step": 132, | |
| "reward": 0.616436 | |
| }, | |
| { | |
| "step": 133, | |
| "reward": 0.683005 | |
| }, | |
| { | |
| "step": 134, | |
| "reward": 0.667534 | |
| }, | |
| { | |
| "step": 135, | |
| "reward": 0.881382 | |
| }, | |
| { | |
| "step": 136, | |
| "reward": 0.66199 | |
| }, | |
| { | |
| "step": 137, | |
| "reward": 0.565077 | |
| }, | |
| { | |
| "step": 138, | |
| "reward": 0.572436 | |
| }, | |
| { | |
| "step": 139, | |
| "reward": 0.618337 | |
| }, | |
| { | |
| "step": 140, | |
| "reward": 0.736507 | |
| }, | |
| { | |
| "step": 141, | |
| "reward": 0.577814 | |
| }, | |
| { | |
| "step": 142, | |
| "reward": 0.668061 | |
| }, | |
| { | |
| "step": 143, | |
| "reward": 0.847441 | |
| }, | |
| { | |
| "step": 144, | |
| "reward": 0.304506 | |
| }, | |
| { | |
| "step": 145, | |
| "reward": 0.482615 | |
| }, | |
| { | |
| "step": 146, | |
| "reward": 0.649624 | |
| }, | |
| { | |
| "step": 147, | |
| "reward": 0.668074 | |
| }, | |
| { | |
| "step": 148, | |
| "reward": 0.648607 | |
| }, | |
| { | |
| "step": 149, | |
| "reward": 0.568635 | |
| }, | |
| { | |
| "step": 150, | |
| "reward": 0.697542 | |
| }, | |
| { | |
| "step": 151, | |
| "reward": 0.653173 | |
| }, | |
| { | |
| "step": 152, | |
| "reward": 0.559021 | |
| }, | |
| { | |
| "step": 153, | |
| "reward": 0.901959 | |
| }, | |
| { | |
| "step": 154, | |
| "reward": 0.66093 | |
| }, | |
| { | |
| "step": 155, | |
| "reward": 0.556553 | |
| }, | |
| { | |
| "step": 156, | |
| "reward": 0.608693 | |
| }, | |
| { | |
| "step": 157, | |
| "reward": 0.594525 | |
| }, | |
| { | |
| "step": 158, | |
| "reward": 0.612964 | |
| }, | |
| { | |
| "step": 159, | |
| "reward": 0.316165 | |
| }, | |
| { | |
| "step": 160, | |
| "reward": 0.56615 | |
| }, | |
| { | |
| "step": 161, | |
| "reward": 0.730762 | |
| }, | |
| { | |
| "step": 162, | |
| "reward": 0.492574 | |
| }, | |
| { | |
| "step": 163, | |
| "reward": 0.612778 | |
| }, | |
| { | |
| "step": 164, | |
| "reward": 0.722495 | |
| }, | |
| { | |
| "step": 165, | |
| "reward": 0.711368 | |
| }, | |
| { | |
| "step": 166, | |
| "reward": 0.777962 | |
| }, | |
| { | |
| "step": 167, | |
| "reward": 0.441072 | |
| }, | |
| { | |
| "step": 168, | |
| "reward": 0.583112 | |
| }, | |
| { | |
| "step": 169, | |
| "reward": 0.584674 | |
| }, | |
| { | |
| "step": 170, | |
| "reward": 0.684097 | |
| }, | |
| { | |
| "step": 171, | |
| "reward": 0.731428 | |
| }, | |
| { | |
| "step": 172, | |
| "reward": 0.348273 | |
| }, | |
| { | |
| "step": 173, | |
| "reward": 0.72942 | |
| }, | |
| { | |
| "step": 174, | |
| "reward": 0.475635 | |
| }, | |
| { | |
| "step": 175, | |
| "reward": 0.687601 | |
| }, | |
| { | |
| "step": 176, | |
| "reward": 0.473503 | |
| }, | |
| { | |
| "step": 177, | |
| "reward": 0.637129 | |
| }, | |
| { | |
| "step": 178, | |
| "reward": 0.735436 | |
| }, | |
| { | |
| "step": 179, | |
| "reward": 0.605688 | |
| }, | |
| { | |
| "step": 180, | |
| "reward": 0.638169 | |
| }, | |
| { | |
| "step": 181, | |
| "reward": 0.695168 | |
| }, | |
| { | |
| "step": 182, | |
| "reward": 0.633222 | |
| }, | |
| { | |
| "step": 183, | |
| "reward": 0.611794 | |
| }, | |
| { | |
| "step": 184, | |
| "reward": 0.761014 | |
| }, | |
| { | |
| "step": 185, | |
| "reward": 0.715614 | |
| }, | |
| { | |
| "step": 186, | |
| "reward": 0.593434 | |
| }, | |
| { | |
| "step": 187, | |
| "reward": 0.866096 | |
| }, | |
| { | |
| "step": 188, | |
| "reward": 0.518085 | |
| }, | |
| { | |
| "step": 189, | |
| "reward": 0.700568 | |
| }, | |
| { | |
| "step": 190, | |
| "reward": 0.5968 | |
| }, | |
| { | |
| "step": 191, | |
| "reward": 0.631455 | |
| }, | |
| { | |
| "step": 192, | |
| "reward": 0.680462 | |
| }, | |
| { | |
| "step": 193, | |
| "reward": 0.638886 | |
| }, | |
| { | |
| "step": 194, | |
| "reward": 0.67378 | |
| }, | |
| { | |
| "step": 195, | |
| "reward": 0.492571 | |
| }, | |
| { | |
| "step": 196, | |
| "reward": 0.495229 | |
| }, | |
| { | |
| "step": 197, | |
| "reward": 0.670352 | |
| }, | |
| { | |
| "step": 198, | |
| "reward": 0.541884 | |
| }, | |
| { | |
| "step": 199, | |
| "reward": 0.537531 | |
| }, | |
| { | |
| "step": 200, | |
| "reward": 0.503047 | |
| }, | |
| { | |
| "step": 201, | |
| "reward": 0.719761 | |
| }, | |
| { | |
| "step": 202, | |
| "reward": 0.678232 | |
| }, | |
| { | |
| "step": 203, | |
| "reward": 0.782038 | |
| }, | |
| { | |
| "step": 204, | |
| "reward": 0.51836 | |
| }, | |
| { | |
| "step": 205, | |
| "reward": 0.6219 | |
| }, | |
| { | |
| "step": 206, | |
| "reward": 0.499499 | |
| }, | |
| { | |
| "step": 207, | |
| "reward": 0.705834 | |
| }, | |
| { | |
| "step": 208, | |
| "reward": 0.794095 | |
| }, | |
| { | |
| "step": 209, | |
| "reward": 0.530957 | |
| }, | |
| { | |
| "step": 210, | |
| "reward": 0.790732 | |
| }, | |
| { | |
| "step": 211, | |
| "reward": 0.730657 | |
| }, | |
| { | |
| "step": 212, | |
| "reward": 0.609549 | |
| }, | |
| { | |
| "step": 213, | |
| "reward": 0.424989 | |
| }, | |
| { | |
| "step": 214, | |
| "reward": 0.774419 | |
| }, | |
| { | |
| "step": 215, | |
| "reward": 0.620916 | |
| }, | |
| { | |
| "step": 216, | |
| "reward": 0.570477 | |
| }, | |
| { | |
| "step": 217, | |
| "reward": 0.672819 | |
| }, | |
| { | |
| "step": 218, | |
| "reward": 0.67449 | |
| }, | |
| { | |
| "step": 219, | |
| "reward": 0.783378 | |
| }, | |
| { | |
| "step": 220, | |
| "reward": 0.534397 | |
| }, | |
| { | |
| "step": 221, | |
| "reward": 0.747674 | |
| }, | |
| { | |
| "step": 222, | |
| "reward": 0.782066 | |
| }, | |
| { | |
| "step": 223, | |
| "reward": 0.778582 | |
| }, | |
| { | |
| "step": 224, | |
| "reward": 0.621428 | |
| }, | |
| { | |
| "step": 225, | |
| "reward": 0.568608 | |
| }, | |
| { | |
| "step": 226, | |
| "reward": 0.737255 | |
| }, | |
| { | |
| "step": 227, | |
| "reward": 0.652347 | |
| }, | |
| { | |
| "step": 228, | |
| "reward": 0.65401 | |
| }, | |
| { | |
| "step": 229, | |
| "reward": 0.775629 | |
| }, | |
| { | |
| "step": 230, | |
| "reward": 0.619872 | |
| }, | |
| { | |
| "step": 231, | |
| "reward": 0.434667 | |
| }, | |
| { | |
| "step": 232, | |
| "reward": 0.610753 | |
| }, | |
| { | |
| "step": 233, | |
| "reward": 0.479459 | |
| }, | |
| { | |
| "step": 234, | |
| "reward": 0.721158 | |
| }, | |
| { | |
| "step": 235, | |
| "reward": 0.676868 | |
| }, | |
| { | |
| "step": 236, | |
| "reward": 0.595565 | |
| }, | |
| { | |
| "step": 237, | |
| "reward": 0.649606 | |
| }, | |
| { | |
| "step": 238, | |
| "reward": 0.723794 | |
| }, | |
| { | |
| "step": 239, | |
| "reward": 0.659056 | |
| }, | |
| { | |
| "step": 240, | |
| "reward": 0.766819 | |
| }, | |
| { | |
| "step": 241, | |
| "reward": 0.648818 | |
| }, | |
| { | |
| "step": 242, | |
| "reward": 0.742717 | |
| }, | |
| { | |
| "step": 243, | |
| "reward": 0.780705 | |
| }, | |
| { | |
| "step": 244, | |
| "reward": 0.790458 | |
| }, | |
| { | |
| "step": 245, | |
| "reward": 0.602242 | |
| }, | |
| { | |
| "step": 246, | |
| "reward": 0.730449 | |
| }, | |
| { | |
| "step": 247, | |
| "reward": 0.507251 | |
| }, | |
| { | |
| "step": 248, | |
| "reward": 0.573145 | |
| }, | |
| { | |
| "step": 249, | |
| "reward": 0.504581 | |
| }, | |
| { | |
| "step": 250, | |
| "reward": 0.746683 | |
| }, | |
| { | |
| "step": 251, | |
| "reward": 0.566306 | |
| }, | |
| { | |
| "step": 252, | |
| "reward": 0.662887 | |
| }, | |
| { | |
| "step": 253, | |
| "reward": 0.649944 | |
| }, | |
| { | |
| "step": 254, | |
| "reward": 0.663484 | |
| }, | |
| { | |
| "step": 255, | |
| "reward": 0.6217 | |
| }, | |
| { | |
| "step": 256, | |
| "reward": 0.685033 | |
| }, | |
| { | |
| "step": 257, | |
| "reward": 0.801874 | |
| }, | |
| { | |
| "step": 258, | |
| "reward": 0.672524 | |
| }, | |
| { | |
| "step": 259, | |
| "reward": 0.70903 | |
| }, | |
| { | |
| "step": 260, | |
| "reward": 0.74365 | |
| }, | |
| { | |
| "step": 261, | |
| "reward": 0.657706 | |
| }, | |
| { | |
| "step": 262, | |
| "reward": 0.583078 | |
| }, | |
| { | |
| "step": 263, | |
| "reward": 0.634522 | |
| }, | |
| { | |
| "step": 264, | |
| "reward": 0.749714 | |
| }, | |
| { | |
| "step": 265, | |
| "reward": 0.561466 | |
| }, | |
| { | |
| "step": 266, | |
| "reward": 0.63539 | |
| }, | |
| { | |
| "step": 267, | |
| "reward": 0.745787 | |
| }, | |
| { | |
| "step": 268, | |
| "reward": 0.731571 | |
| }, | |
| { | |
| "step": 269, | |
| "reward": 0.679612 | |
| }, | |
| { | |
| "step": 270, | |
| "reward": 0.733146 | |
| } | |
| ], | |
| "config": { | |
| "model": "Qwen/Qwen2.5-0.5B-Instruct", | |
| "n_per_task": 30, | |
| "num_generations": 8, | |
| "epochs": 3, | |
| "lr": 1e-06, | |
| "beta": 0.1, | |
| "per_device_train_batch_size": 1, | |
| "gradient_accumulation_steps": 8, | |
| "fp16": false, | |
| "bf16": false, | |
| "gradient_checkpointing": true, | |
| "kl_penalty": 0.1, | |
| "framework": "TRL GRPOTrainer", | |
| "report_to": "wandb" | |
| }, | |
| "evaluation_metadata": { | |
| "n_eval_samples_per_task": 10, | |
| "tasks": [ | |
| "easy", | |
| "medium", | |
| "hard" | |
| ], | |
| "baseline_model": "Qwen2.5-0.5B-Instruct (untrained, fp16)", | |
| "trained_model": "Qwen2.5-0.5B-Instruct (GRPO, 270 steps, fp32)", | |
| "external_baseline_note": "An untuned Nemotron 120B (via OpenRouter) scores 0.337 average across these 3 tasks via inference.py. See README for details." | |
| } | |
| } |