"""Build SFT JSONL data from replay trajectories.""" from __future__ import annotations import argparse import json try: from .slm_policy import replay_to_sft_rows, write_sft_jsonl except ImportError: # pragma: no cover from slm_policy import replay_to_sft_rows, write_sft_jsonl def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--replay", nargs="+", default=["artifacts/trajectories.jsonl"]) parser.add_argument("--output", default="artifacts/sft/browser_actions.jsonl") parser.add_argument( "--include-failures", action="store_true", help="Include failed episodes. By default only successful episodes are exported.", ) parser.add_argument( "--include-noop", action="store_true", help="Include noop actions. By default noop/recovery actions are excluded from SFT.", ) args = parser.parse_args() excluded = ("ask_oracle",) if args.include_noop else ("ask_oracle", "noop") rows = replay_to_sft_rows( args.replay, success_only=not args.include_failures, excluded_action_types=excluded, ) write_sft_jsonl(rows, args.output) print(json.dumps({"output": args.output, "rows": len(rows)}, indent=2)) if __name__ == "__main__": main()