Spaces:
Paused
Paused
况兑
eval: greedy decode + numeric strict; system: force full decimals; regressions: A/B/C/noisy
e45d7fc
| set -euo pipefail | |
| AD=./runs/overfit10_gold | |
| for D in subset10.numeric.jsonl subset10.perturbed.chat.jsonl subset10.perturbed.chat.norm.jsonl subset10.noisy.chat.jsonl | |
| do | |
| echo "==> $D" | |
| python eval_simple.py --adapter "$AD" --data "$D" | |
| done | |
| echo "OK: full regression passed." | |