""" DPO (Direct Preference Optimization) module for LKAlert alert-timing alignment. Aligns HazardHead to prefer timely alerts (TTA ∈ [1.5, 5.0]s) over too-early, too-late, or false-alarm predictions. Stage flow: 1. make_dpo_pairs.py — build preference pair manifests from SFT manifests 2. trainer.py — DPO fine-tune HazardHead on top of frozen SFT model """ from .dataset import DPODataset, dpo_collate_fn from .trainer import DPOModel, DPOTrainer, compute_dpo_loss __all__ = [ "DPODataset", "dpo_collate_fn", "DPOModel", "DPOTrainer", "compute_dpo_loss", ]