Spaces:

Rthur2003
/

crowncode-backend

Sleeping

App Files Files Community

Rthur2003 commited on Apr 16

Commit

1c2de42

1 Parent(s): bb6655d

feat: refactor training history figure generation to use GradientBoostingClassifier and improve metric tracking

Browse files

Files changed (1) hide show

app/training/generate_deep_figures.py +34 -28

app/training/generate_deep_figures.py CHANGED Viewed

@@ -106,57 +106,63 @@ def _cv_predict(model, X_scaled, y):
 # ── 1. Training history (XGBoost boosting-round learning curve) ──────────
 def fig_training_history(model, scaler, X, y):
-    """Retrain lightly with eval_set to capture boosting progression."""
-    from xgboost import XGBClassifier
     from sklearn.model_selection import train_test_split
     X_scaled = scaler.transform(X)
     X_tr, X_val, y_tr, y_val = train_test_split(
         X_scaled, y, test_size=0.2, stratify=y, random_state=42,
     )
-    params = model.get_params()
-    # Reset early-stopping / n_estimators for a fresh fit with eval tracking
-    params["n_estimators"] = min(params.get("n_estimators", 300) or 300, 500)
-    params["eval_metric"] = ["logloss", "error", "auc"]
-    clf = XGBClassifier(**{k: v for k, v in params.items() if k != "early_stopping_rounds"})
-    clf.fit(
-        X_tr, y_tr,
-        eval_set=[(X_tr, y_tr), (X_val, y_val)],
-        verbose=False,
-    )
-    history = clf.evals_result()
-    tr = history["validation_0"]
-    vl = history["validation_1"]
     fig, axes = plt.subplots(1, 3, figsize=(16, 5))
-    x = np.arange(1, len(tr["logloss"]) + 1)
-    for ax, metric, title in [
-        (axes[0], "logloss", "Log Loss"),
-        (axes[1], "error", "Error Rate"),
-        (axes[2], "auc", "ROC-AUC"),
-    ]:
-        ax.plot(x, tr[metric], color=PALETTE["primary"], lw=2.2, label="Eğitim / Train")
-        ax.plot(x, vl[metric], color=PALETTE["error"], lw=2.2,
                 linestyle="--", label="Doğrulama / Validation")
         ax.set_xlabel("Boosting Round")
         ax.set_ylabel(title)
         ax.set_title(f"{title} — Boosting İlerlemesi", fontweight="bold")
         ax.legend(framealpha=0.85)
-        # best round annotation
-        best_idx = int(np.argmin(vl["logloss"])) if metric == "logloss" else int(np.argmax(vl[metric]))
         ax.axvline(best_idx + 1, color=PALETTE["accent"], linestyle=":", alpha=0.7)
         ax.annotate(
             f"en iyi: {best_idx + 1}",
-            xy=(best_idx + 1, vl[metric][best_idx]),
             xytext=(12, -12), textcoords="offset points",
             fontsize=9, color=PALETTE["fg"],
         )
-    fig.suptitle("XGBoost Eğitim Geçmişi — Train vs Validation", fontsize=14, fontweight="bold")
     plt.tight_layout()
     plt.savefig(FIGURES_DIR / "training_history.png")
     plt.close()

 # ── 1. Training history (XGBoost boosting-round learning curve) ──────────
 def fig_training_history(model, scaler, X, y):
+    """Retrain with staged_predict to capture boosting progression."""
+    from sklearn.ensemble import GradientBoostingClassifier
     from sklearn.model_selection import train_test_split
+    from sklearn.metrics import log_loss, roc_auc_score
     X_scaled = scaler.transform(X)
     X_tr, X_val, y_tr, y_val = train_test_split(
         X_scaled, y, test_size=0.2, stratify=y, random_state=42,
     )
+    clf = clone(model)
+    clf.fit(X_tr, y_tr)
+    n_est = clf.n_estimators_  if hasattr(clf, 'n_estimators_') else clf.n_estimators
+    tr_loss, vl_loss = [], []
+    tr_err, vl_err = [], []
+    tr_auc, vl_auc = [], []
+    for i, (tr_prob, vl_prob) in enumerate(
+        zip(clf.staged_predict_proba(X_tr), clf.staged_predict_proba(X_val))
+    ):
+        tr_loss.append(log_loss(y_tr, tr_prob))
+        vl_loss.append(log_loss(y_val, vl_prob))
+        tr_err.append(1.0 - (tr_prob.argmax(1) == y_tr).mean())
+        vl_err.append(1.0 - (vl_prob.argmax(1) == y_val).mean())
+        tr_auc.append(roc_auc_score(y_tr, tr_prob[:, 1]))
+        vl_auc.append(roc_auc_score(y_val, vl_prob[:, 1]))
     fig, axes = plt.subplots(1, 3, figsize=(16, 5))
+    x = np.arange(1, len(tr_loss) + 1)
+    panels = [
+        (axes[0], tr_loss, vl_loss, "Log Loss", True),
+        (axes[1], tr_err, vl_err, "Error Rate", True),
+        (axes[2], tr_auc, vl_auc, "ROC-AUC", False),
+    ]
+    for ax, tr_vals, vl_vals, title, lower_better in panels:
+        ax.plot(x, tr_vals, color=PALETTE["primary"], lw=2.2, label="Eğitim / Train")
+        ax.plot(x, vl_vals, color=PALETTE["error"], lw=2.2,
                 linestyle="--", label="Doğrulama / Validation")
         ax.set_xlabel("Boosting Round")
         ax.set_ylabel(title)
         ax.set_title(f"{title} — Boosting İlerlemesi", fontweight="bold")
         ax.legend(framealpha=0.85)
+        best_idx = int(np.argmin(vl_vals)) if lower_better else int(np.argmax(vl_vals))
         ax.axvline(best_idx + 1, color=PALETTE["accent"], linestyle=":", alpha=0.7)
         ax.annotate(
             f"en iyi: {best_idx + 1}",
+            xy=(best_idx + 1, vl_vals[best_idx]),
             xytext=(12, -12), textcoords="offset points",
             fontsize=9, color=PALETTE["fg"],
         )
+    model_name = type(model).__name__
+    fig.suptitle(f"{model_name} Eğitim Geçmişi — Train vs Validation",
+                 fontsize=14, fontweight="bold")
     plt.tight_layout()
     plt.savefig(FIGURES_DIR / "training_history.png")
     plt.close()