VLAlert / training /DPO /__init__.py
AsianPlayer's picture
Add VLAlert code
1e05592 verified
Raw
History Blame Contribute Delete
601 Bytes
"""
DPO (Direct Preference Optimization) module for LKAlert alert-timing alignment.
Aligns HazardHead to prefer timely alerts (TTA ∈ [1.5, 5.0]s) over
too-early, too-late, or false-alarm predictions.
Stage flow:
1. make_dpo_pairs.py — build preference pair manifests from SFT manifests
2. trainer.py — DPO fine-tune HazardHead on top of frozen SFT model
"""
from .dataset import DPODataset, dpo_collate_fn
from .trainer import DPOModel, DPOTrainer, compute_dpo_loss
__all__ = [
"DPODataset",
"dpo_collate_fn",
"DPOModel",
"DPOTrainer",
"compute_dpo_loss",
]