Add VLAlert code

1e05592 verified 16 days ago

9.91 kB

	#!/usr/bin/env python3
	"""
	Post-processing transforms for PolicyModel logits.

	Three composable operations (all numpy-based, CPU-cheap):
	1. Per-class temperature scaling — L-BFGS-fit on train split (3 params)
	2. Temporal smoothing — EMA over per-video sorted-by-TTA sequence
	3. Non-ego bias — reduces ALERT when OBSERVE probability dominates

	Applied in order: raw_logits → per-class-T → smoothing → non-ego-bias → (+global_bias)
	"""
	from __future__ import annotations

	from collections import defaultdict
	from typing import Sequence, Tuple

	import numpy as np
	import torch
	import torch.nn.functional as F


	# ─────────────────────────── 1. Per-class temperature ─────────────────────────
	def fit_per_class_temperature(
	logits: np.ndarray,
	labels: np.ndarray,
	init: Tuple[float, float, float] = (1.0, 1.0, 1.0),
	max_iter: int = 200,
	device: str = "cpu",
	) -> np.ndarray:
	"""
	Fit 3 positive temperatures T = (T_SILENT, T_OBSERVE, T_ALERT)
	by minimizing cross-entropy on (logits / T).

	logits : [N, 3] float
	labels : [N] int in {0, 1, 2}
	Returns T : [3] float (positive)
	"""
	lg = torch.as_tensor(logits, dtype=torch.float32, device=device)
	lb = torch.as_tensor(labels, dtype=torch.long, device=device)
	log_T = torch.tensor(np.log(init), dtype=torch.float32,
	device=device, requires_grad=True)
	opt = torch.optim.LBFGS([log_T], lr=0.1, max_iter=max_iter,
	tolerance_grad=1e-7, line_search_fn="strong_wolfe")

	def closure():
	opt.zero_grad()
	T = log_T.exp().clamp(min=1e-3, max=1e3)
	cal = lg / T.unsqueeze(0) # broadcast [1,3]
	loss = F.cross_entropy(cal, lb)
	loss.backward()
	return loss

	opt.step(closure)
	T = log_T.detach().exp().clamp(min=0.1, max=10.0).cpu().numpy()
	return T.astype(np.float32)


	def apply_per_class_temperature(logits: np.ndarray, T: Sequence[float]) -> np.ndarray:
	"""logits [N,3] ÷ T [3] (broadcast)."""
	return (logits / np.asarray(T, dtype=np.float32)[None, :]).astype(np.float32)


	# ─────────────────────────── 2. Temporal smoothing ────────────────────────────
	def temporal_smooth(
	logits: np.ndarray,
	video_ids: Sequence[str],
	ttas: np.ndarray,
	window: int = 3,
	mode: str = "ema", # "ema" \| "mean"
	alpha: float = 0.5, # EMA weight on current frame
	) -> np.ndarray:
	"""
	For each video, sort its samples by tta DESC (earliest = largest tta first)
	and apply causal smoothing. Samples with tta == -1 (non_ego / safe_neg) have
	no meaningful temporal order → kept as-is.

	logits : [N, 3]
	video_ids: list[str] length N
	ttas : [N] float; negative values ⇒ no smoothing
	window : for mode="mean", the sliding window size
	alpha : for mode="ema", weight of current frame (past gets 1-alpha)
	"""
	out = logits.copy()
	by_vid: dict[str, list[int]] = defaultdict(list)
	for i, v in enumerate(video_ids):
	if ttas[i] >= 0: # only ego-positive samples have a time axis
	by_vid[v].append(i)

	for v, idxs in by_vid.items():
	if len(idxs) < 2:
	continue
	# largest tta first = earliest in time
	idxs_sorted = sorted(idxs, key=lambda i: -float(ttas[i]))
	L = np.stack([logits[i] for i in idxs_sorted]) # [n, 3]

	if mode == "mean":
	sm = L.copy()
	for pos in range(1, len(L)):
	lo = max(0, pos - window + 1)
	sm[pos] = L[lo:pos + 1].mean(axis=0)
	elif mode == "ema":
	sm = np.empty_like(L)
	sm[0] = L[0]
	for pos in range(1, len(L)):
	sm[pos] = alpha * L[pos] + (1.0 - alpha) * sm[pos - 1]
	else:
	raise ValueError(f"Unknown mode: {mode}")

	for k, i in enumerate(idxs_sorted):
	out[i] = sm[k]
	return out


	# ─────────────────────────── 3. Non-ego bias ──────────────────────────────────
	def non_ego_bias(logits: np.ndarray, alpha: float = 0.5) -> np.ndarray:
	"""
	Reduce ALERT logit when OBSERVE probability dominates.
	Uses only information available at inference (no ground-truth leakage).

	Mechanism:
	p = softmax(logits)
	logits[:, 2] -= alpha * p[:, 1]

	alpha in [0, 2] is the magnitude of the nudge.
	"""
	if alpha == 0.0:
	return logits
	p = np.exp(logits - logits.max(axis=1, keepdims=True))
	p /= p.sum(axis=1, keepdims=True)
	out = logits.copy()
	out[:, 2] = out[:, 2] - alpha * p[:, 1]
	return out


	# ─────────────────────────── All-in-one helper ────────────────────────────────
	def apply_postproc(
	logits: np.ndarray,
	video_ids: Sequence[str],
	ttas: np.ndarray,
	T_per_class: Sequence[float] \| None = None,
	smooth_window: int = 1, # 1 ⇒ no smoothing
	smooth_mode: str = "ema",
	smooth_alpha: float = 0.5,
	non_ego_alpha: float = 0.0,
	alert_bias: float = 0.0,
	) -> np.ndarray:
	lg = logits.astype(np.float32).copy()
	if T_per_class is not None:
	lg = apply_per_class_temperature(lg, T_per_class)
	if smooth_window > 1 or smooth_mode == "ema":
	lg = temporal_smooth(lg, video_ids, ttas,
	window=smooth_window,
	mode=smooth_mode, alpha=smooth_alpha)
	if non_ego_alpha > 0.0:
	lg = non_ego_bias(lg, alpha=non_ego_alpha)
	if alert_bias != 0.0:
	lg[:, 2] = lg[:, 2] + alert_bias
	return lg


	# ─────────────────────────── Metric computation ───────────────────────────────
	def compute_metrics(
	logits: np.ndarray, # [N, 3]
	labels: np.ndarray, # [N] in {0,1,2}
	cats: np.ndarray, # [N] str
	ttas: np.ndarray, # [N] float
	video_ids: Sequence[str],
	) -> dict:
	"""Mirror of baseline.comparison.eval_all.compute_all_metrics."""
	preds = logits.argmax(axis=1)
	# softmax only for prob_alert (for binary AP)
	p = np.exp(logits - logits.max(axis=1, keepdims=True))
	p /= p.sum(axis=1, keepdims=True)
	prob_alert = p[:, 2]

	def _r(n, d): return float(n) / float(d) if d > 0 else 0.0

	ego_mask = cats == "ego_positive"
	safe_mask = cats == "safe_neg"
	ne_mask = cats == "non_ego"

	ego_alert = _r(((preds == 2) & ego_mask & (labels == 2)).sum(),
	(ego_mask & (labels == 2)).sum())
	non_ego_noalert = _r(((preds != 2) & ne_mask).sum(), ne_mask.sum())
	safe_silent = _r(((preds == 0) & safe_mask).sum(), safe_mask.sum())
	fa = _r(((preds == 2) & safe_mask).sum(), safe_mask.sum())
	burden_non_ego = _r(((preds == 2) & ne_mask).sum(), ne_mask.sum())
	# PolicyScore v3 (safety-first): 0.65ego_recall + 0.25safe_silent - 0.15*safe_alert
	policy_score = 0.65 * ego_alert + 0.25 * safe_silent - 0.15 * fa

	# binary AP / F1
	from sklearn.metrics import average_precision_score
	binary_true = (labels == 2).astype(int)
	try:
	ap = float(average_precision_score(binary_true, prob_alert))
	except Exception:
	ap = 0.0
	tp = int(((preds == 2) & (labels == 2)).sum())
	fp = int(((preds == 2) & (labels != 2)).sum())
	fn = int(((preds != 2) & (labels == 2)).sum())
	prec = _r(tp, tp + fp)
	rec = _r(tp, tp + fn)
	f1 = _r(2 * prec * rec, prec + rec)

	# lead time (OBSERVE∪ALERT response, per ego video)
	ego_idx = np.where(ego_mask)[0]
	by_video: dict[str, list] = defaultdict(list)
	for i in ego_idx:
	by_video[video_ids[i]].append((float(ttas[i]), int(preds[i])))
	lead_times = []
	alert_lead_times = []
	n_videos = len(by_video)
	for vid, items in by_video.items():
	items.sort(key=lambda x: -x[0]) # earliest first
	earliest_any = None
	earliest_alert = None
	for tta_val, pr in items:
	if earliest_any is None and pr in (1, 2): earliest_any = tta_val
	if earliest_alert is None and pr == 2: earliest_alert = tta_val
	if earliest_any is not None: lead_times.append(earliest_any)
	if earliest_alert is not None: alert_lead_times.append(earliest_alert)
	mean_lead = float(np.mean(lead_times)) if lead_times else 0.0
	mean_lead_alrt = float(np.mean(alert_lead_times)) if alert_lead_times else 0.0
	cov_any = _r(len(lead_times), n_videos)
	cov_alert = _r(len(alert_lead_times), n_videos)

	return {
	"policy_score": policy_score,
	"ego_alert_recall": ego_alert,
	"non_ego_noalert": non_ego_noalert,
	"safe_neg_silent": safe_silent,
	"safe_neg_alert_leak": fa,
	"burden_non_ego": burden_non_ego,
	"binary_ap": ap,
	"binary_precision": prec,
	"binary_recall": rec,
	"binary_f1": f1,
	"lead_time_mean": mean_lead,
	"lead_time_coverage": cov_any,
	"alert_lead_time_mean": mean_lead_alrt,
	"alert_lead_time_coverage": cov_alert,
	"n_samples": int(len(labels)),
	"n_ego_videos": n_videos,
	}