| """ |
| DPO (Direct Preference Optimization) module for LKAlert alert-timing alignment. |
| |
| Aligns HazardHead to prefer timely alerts (TTA ∈ [1.5, 5.0]s) over |
| too-early, too-late, or false-alarm predictions. |
| |
| Stage flow: |
| 1. make_dpo_pairs.py — build preference pair manifests from SFT manifests |
| 2. trainer.py — DPO fine-tune HazardHead on top of frozen SFT model |
| """ |
|
|
| from .dataset import DPODataset, dpo_collate_fn |
| from .trainer import DPOModel, DPOTrainer, compute_dpo_loss |
|
|
| __all__ = [ |
| "DPODataset", |
| "dpo_collate_fn", |
| "DPOModel", |
| "DPOTrainer", |
| "compute_dpo_loss", |
| ] |
|
|