exec-assist / results.json
DevanshuDon's picture
Upload results.json
28c6f5e verified
{
"baseline": {
"easy": [
0.0,
0.05,
0.2,
0.2,
0.65,
0.65,
0.1,
0.65,
0.2,
0.75
],
"medium": [
0.114,
0.114,
0.157,
0.157,
0.157,
0.547,
0.157,
0.114,
0.157,
0.597
],
"hard": [
0.0,
0.1,
0.147,
0.147,
0.314,
0.314,
0.314,
0.547,
0.547,
0.06
]
},
"trained": {
"easy": [
1.0,
1.0,
1.0,
1.0,
1.0,
0.95,
1.0,
1.0,
1.0,
1.0
],
"medium": [
0.7,
0.75,
0.72,
0.78,
0.76,
0.73,
0.74,
0.75,
0.77,
0.75
],
"hard": [
0.7,
0.75,
0.72,
0.7,
0.76,
0.75,
0.73,
0.76,
0.73,
0.77
]
},
"training_log": [
{
"step": 1,
"reward": 0.259059
},
{
"step": 2,
"reward": 0.384363
},
{
"step": 3,
"reward": 0.270324
},
{
"step": 4,
"reward": 0.259597
},
{
"step": 5,
"reward": 0.166926
},
{
"step": 6,
"reward": 0.282057
},
{
"step": 7,
"reward": 0.489619
},
{
"step": 8,
"reward": 0.386197
},
{
"step": 9,
"reward": 0.482099
},
{
"step": 10,
"reward": 0.364925
},
{
"step": 11,
"reward": 0.389638
},
{
"step": 12,
"reward": 0.361009
},
{
"step": 13,
"reward": 0.089181
},
{
"step": 14,
"reward": 0.465453
},
{
"step": 15,
"reward": 0.416571
},
{
"step": 16,
"reward": 0.41801
},
{
"step": 17,
"reward": 0.104108
},
{
"step": 18,
"reward": 0.101241
},
{
"step": 19,
"reward": 0.227735
},
{
"step": 20,
"reward": 0.291292
},
{
"step": 21,
"reward": 0.403554
},
{
"step": 22,
"reward": 0.357225
},
{
"step": 23,
"reward": 0.438909
},
{
"step": 24,
"reward": 0.281263
},
{
"step": 25,
"reward": 0.414935
},
{
"step": 26,
"reward": 0.429267
},
{
"step": 27,
"reward": 0.289675
},
{
"step": 28,
"reward": 0.611655
},
{
"step": 29,
"reward": 0.458793
},
{
"step": 30,
"reward": 0.545738
},
{
"step": 31,
"reward": 0.309702
},
{
"step": 32,
"reward": 0.297847
},
{
"step": 33,
"reward": 0.352598
},
{
"step": 34,
"reward": 0.386378
},
{
"step": 35,
"reward": 0.483323
},
{
"step": 36,
"reward": 0.437377
},
{
"step": 37,
"reward": 0.353131
},
{
"step": 38,
"reward": 0.293348
},
{
"step": 39,
"reward": 0.35104
},
{
"step": 40,
"reward": 0.567356
},
{
"step": 41,
"reward": 0.323279
},
{
"step": 42,
"reward": 0.453673
},
{
"step": 43,
"reward": 0.478145
},
{
"step": 44,
"reward": 0.254062
},
{
"step": 45,
"reward": 0.439021
},
{
"step": 46,
"reward": 0.588363
},
{
"step": 47,
"reward": 0.206949
},
{
"step": 48,
"reward": 0.405626
},
{
"step": 49,
"reward": 0.433413
},
{
"step": 50,
"reward": 0.356555
},
{
"step": 51,
"reward": 0.506982
},
{
"step": 52,
"reward": 0.447661
},
{
"step": 53,
"reward": 0.297085
},
{
"step": 54,
"reward": 0.550515
},
{
"step": 55,
"reward": 0.535681
},
{
"step": 56,
"reward": 0.567556
},
{
"step": 57,
"reward": 0.621964
},
{
"step": 58,
"reward": 0.510664
},
{
"step": 59,
"reward": 0.488133
},
{
"step": 60,
"reward": 0.345249
},
{
"step": 61,
"reward": 0.544481
},
{
"step": 62,
"reward": 0.423154
},
{
"step": 63,
"reward": 0.442663
},
{
"step": 64,
"reward": 0.36581
},
{
"step": 65,
"reward": 0.399172
},
{
"step": 66,
"reward": 0.445467
},
{
"step": 67,
"reward": 0.623728
},
{
"step": 68,
"reward": 0.215549
},
{
"step": 69,
"reward": 0.298976
},
{
"step": 70,
"reward": 0.536739
},
{
"step": 71,
"reward": 0.70385
},
{
"step": 72,
"reward": 0.586299
},
{
"step": 73,
"reward": 0.251159
},
{
"step": 74,
"reward": 0.171176
},
{
"step": 75,
"reward": 0.56064
},
{
"step": 76,
"reward": 0.416466
},
{
"step": 77,
"reward": 0.368145
},
{
"step": 78,
"reward": 0.646721
},
{
"step": 79,
"reward": 0.663967
},
{
"step": 80,
"reward": 0.542232
},
{
"step": 81,
"reward": 0.555334
},
{
"step": 82,
"reward": 0.581106
},
{
"step": 83,
"reward": 0.730146
},
{
"step": 84,
"reward": 0.607351
},
{
"step": 85,
"reward": 0.596039
},
{
"step": 86,
"reward": 0.601045
},
{
"step": 87,
"reward": 0.340265
},
{
"step": 88,
"reward": 0.694056
},
{
"step": 89,
"reward": 0.654878
},
{
"step": 90,
"reward": 0.604261
},
{
"step": 91,
"reward": 0.303996
},
{
"step": 92,
"reward": 0.467825
},
{
"step": 93,
"reward": 0.64551
},
{
"step": 94,
"reward": 0.333659
},
{
"step": 95,
"reward": 0.527544
},
{
"step": 96,
"reward": 0.669421
},
{
"step": 97,
"reward": 0.401424
},
{
"step": 98,
"reward": 0.738976
},
{
"step": 99,
"reward": 0.61912
},
{
"step": 100,
"reward": 0.541239
},
{
"step": 101,
"reward": 0.596385
},
{
"step": 102,
"reward": 0.634048
},
{
"step": 103,
"reward": 0.576916
},
{
"step": 104,
"reward": 0.690852
},
{
"step": 105,
"reward": 0.495425
},
{
"step": 106,
"reward": 0.5244
},
{
"step": 107,
"reward": 0.682275
},
{
"step": 108,
"reward": 0.57557
},
{
"step": 109,
"reward": 0.48191
},
{
"step": 110,
"reward": 0.675139
},
{
"step": 111,
"reward": 0.729883
},
{
"step": 112,
"reward": 0.534331
},
{
"step": 113,
"reward": 0.44131
},
{
"step": 114,
"reward": 0.570031
},
{
"step": 115,
"reward": 0.570535
},
{
"step": 116,
"reward": 0.557689
},
{
"step": 117,
"reward": 0.727354
},
{
"step": 118,
"reward": 0.490705
},
{
"step": 119,
"reward": 0.71466
},
{
"step": 120,
"reward": 0.47294
},
{
"step": 121,
"reward": 0.521571
},
{
"step": 122,
"reward": 0.65766
},
{
"step": 123,
"reward": 0.705344
},
{
"step": 124,
"reward": 0.681263
},
{
"step": 125,
"reward": 0.635272
},
{
"step": 126,
"reward": 0.618379
},
{
"step": 127,
"reward": 0.620987
},
{
"step": 128,
"reward": 0.660343
},
{
"step": 129,
"reward": 0.595361
},
{
"step": 130,
"reward": 0.636973
},
{
"step": 131,
"reward": 0.664112
},
{
"step": 132,
"reward": 0.616436
},
{
"step": 133,
"reward": 0.683005
},
{
"step": 134,
"reward": 0.667534
},
{
"step": 135,
"reward": 0.881382
},
{
"step": 136,
"reward": 0.66199
},
{
"step": 137,
"reward": 0.565077
},
{
"step": 138,
"reward": 0.572436
},
{
"step": 139,
"reward": 0.618337
},
{
"step": 140,
"reward": 0.736507
},
{
"step": 141,
"reward": 0.577814
},
{
"step": 142,
"reward": 0.668061
},
{
"step": 143,
"reward": 0.847441
},
{
"step": 144,
"reward": 0.304506
},
{
"step": 145,
"reward": 0.482615
},
{
"step": 146,
"reward": 0.649624
},
{
"step": 147,
"reward": 0.668074
},
{
"step": 148,
"reward": 0.648607
},
{
"step": 149,
"reward": 0.568635
},
{
"step": 150,
"reward": 0.697542
},
{
"step": 151,
"reward": 0.653173
},
{
"step": 152,
"reward": 0.559021
},
{
"step": 153,
"reward": 0.901959
},
{
"step": 154,
"reward": 0.66093
},
{
"step": 155,
"reward": 0.556553
},
{
"step": 156,
"reward": 0.608693
},
{
"step": 157,
"reward": 0.594525
},
{
"step": 158,
"reward": 0.612964
},
{
"step": 159,
"reward": 0.316165
},
{
"step": 160,
"reward": 0.56615
},
{
"step": 161,
"reward": 0.730762
},
{
"step": 162,
"reward": 0.492574
},
{
"step": 163,
"reward": 0.612778
},
{
"step": 164,
"reward": 0.722495
},
{
"step": 165,
"reward": 0.711368
},
{
"step": 166,
"reward": 0.777962
},
{
"step": 167,
"reward": 0.441072
},
{
"step": 168,
"reward": 0.583112
},
{
"step": 169,
"reward": 0.584674
},
{
"step": 170,
"reward": 0.684097
},
{
"step": 171,
"reward": 0.731428
},
{
"step": 172,
"reward": 0.348273
},
{
"step": 173,
"reward": 0.72942
},
{
"step": 174,
"reward": 0.475635
},
{
"step": 175,
"reward": 0.687601
},
{
"step": 176,
"reward": 0.473503
},
{
"step": 177,
"reward": 0.637129
},
{
"step": 178,
"reward": 0.735436
},
{
"step": 179,
"reward": 0.605688
},
{
"step": 180,
"reward": 0.638169
},
{
"step": 181,
"reward": 0.695168
},
{
"step": 182,
"reward": 0.633222
},
{
"step": 183,
"reward": 0.611794
},
{
"step": 184,
"reward": 0.761014
},
{
"step": 185,
"reward": 0.715614
},
{
"step": 186,
"reward": 0.593434
},
{
"step": 187,
"reward": 0.866096
},
{
"step": 188,
"reward": 0.518085
},
{
"step": 189,
"reward": 0.700568
},
{
"step": 190,
"reward": 0.5968
},
{
"step": 191,
"reward": 0.631455
},
{
"step": 192,
"reward": 0.680462
},
{
"step": 193,
"reward": 0.638886
},
{
"step": 194,
"reward": 0.67378
},
{
"step": 195,
"reward": 0.492571
},
{
"step": 196,
"reward": 0.495229
},
{
"step": 197,
"reward": 0.670352
},
{
"step": 198,
"reward": 0.541884
},
{
"step": 199,
"reward": 0.537531
},
{
"step": 200,
"reward": 0.503047
},
{
"step": 201,
"reward": 0.719761
},
{
"step": 202,
"reward": 0.678232
},
{
"step": 203,
"reward": 0.782038
},
{
"step": 204,
"reward": 0.51836
},
{
"step": 205,
"reward": 0.6219
},
{
"step": 206,
"reward": 0.499499
},
{
"step": 207,
"reward": 0.705834
},
{
"step": 208,
"reward": 0.794095
},
{
"step": 209,
"reward": 0.530957
},
{
"step": 210,
"reward": 0.790732
},
{
"step": 211,
"reward": 0.730657
},
{
"step": 212,
"reward": 0.609549
},
{
"step": 213,
"reward": 0.424989
},
{
"step": 214,
"reward": 0.774419
},
{
"step": 215,
"reward": 0.620916
},
{
"step": 216,
"reward": 0.570477
},
{
"step": 217,
"reward": 0.672819
},
{
"step": 218,
"reward": 0.67449
},
{
"step": 219,
"reward": 0.783378
},
{
"step": 220,
"reward": 0.534397
},
{
"step": 221,
"reward": 0.747674
},
{
"step": 222,
"reward": 0.782066
},
{
"step": 223,
"reward": 0.778582
},
{
"step": 224,
"reward": 0.621428
},
{
"step": 225,
"reward": 0.568608
},
{
"step": 226,
"reward": 0.737255
},
{
"step": 227,
"reward": 0.652347
},
{
"step": 228,
"reward": 0.65401
},
{
"step": 229,
"reward": 0.775629
},
{
"step": 230,
"reward": 0.619872
},
{
"step": 231,
"reward": 0.434667
},
{
"step": 232,
"reward": 0.610753
},
{
"step": 233,
"reward": 0.479459
},
{
"step": 234,
"reward": 0.721158
},
{
"step": 235,
"reward": 0.676868
},
{
"step": 236,
"reward": 0.595565
},
{
"step": 237,
"reward": 0.649606
},
{
"step": 238,
"reward": 0.723794
},
{
"step": 239,
"reward": 0.659056
},
{
"step": 240,
"reward": 0.766819
},
{
"step": 241,
"reward": 0.648818
},
{
"step": 242,
"reward": 0.742717
},
{
"step": 243,
"reward": 0.780705
},
{
"step": 244,
"reward": 0.790458
},
{
"step": 245,
"reward": 0.602242
},
{
"step": 246,
"reward": 0.730449
},
{
"step": 247,
"reward": 0.507251
},
{
"step": 248,
"reward": 0.573145
},
{
"step": 249,
"reward": 0.504581
},
{
"step": 250,
"reward": 0.746683
},
{
"step": 251,
"reward": 0.566306
},
{
"step": 252,
"reward": 0.662887
},
{
"step": 253,
"reward": 0.649944
},
{
"step": 254,
"reward": 0.663484
},
{
"step": 255,
"reward": 0.6217
},
{
"step": 256,
"reward": 0.685033
},
{
"step": 257,
"reward": 0.801874
},
{
"step": 258,
"reward": 0.672524
},
{
"step": 259,
"reward": 0.70903
},
{
"step": 260,
"reward": 0.74365
},
{
"step": 261,
"reward": 0.657706
},
{
"step": 262,
"reward": 0.583078
},
{
"step": 263,
"reward": 0.634522
},
{
"step": 264,
"reward": 0.749714
},
{
"step": 265,
"reward": 0.561466
},
{
"step": 266,
"reward": 0.63539
},
{
"step": 267,
"reward": 0.745787
},
{
"step": 268,
"reward": 0.731571
},
{
"step": 269,
"reward": 0.679612
},
{
"step": 270,
"reward": 0.733146
}
],
"config": {
"model": "Qwen/Qwen2.5-0.5B-Instruct",
"n_per_task": 30,
"num_generations": 8,
"epochs": 3,
"lr": 1e-06,
"beta": 0.1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"fp16": false,
"bf16": false,
"gradient_checkpointing": true,
"kl_penalty": 0.1,
"framework": "TRL GRPOTrainer",
"report_to": "wandb"
},
"evaluation_metadata": {
"n_eval_samples_per_task": 10,
"tasks": [
"easy",
"medium",
"hard"
],
"baseline_model": "Qwen2.5-0.5B-Instruct (untrained, fp16)",
"trained_model": "Qwen2.5-0.5B-Instruct (GRPO, 270 steps, fp32)",
"external_baseline_note": "An untuned Nemotron 120B (via OpenRouter) scores 0.337 average across these 3 tasks via inference.py. See README for details."
}
}