Spaces:
Sleeping
Sleeping
feat: refactor training history figure generation to use GradientBoostingClassifier and improve metric tracking
Browse files
app/training/generate_deep_figures.py
CHANGED
|
@@ -106,57 +106,63 @@ def _cv_predict(model, X_scaled, y):
|
|
| 106 |
|
| 107 |
# ── 1. Training history (XGBoost boosting-round learning curve) ──────────
|
| 108 |
def fig_training_history(model, scaler, X, y):
|
| 109 |
-
"""Retrain
|
| 110 |
-
from
|
| 111 |
from sklearn.model_selection import train_test_split
|
|
|
|
| 112 |
|
| 113 |
X_scaled = scaler.transform(X)
|
| 114 |
X_tr, X_val, y_tr, y_val = train_test_split(
|
| 115 |
X_scaled, y, test_size=0.2, stratify=y, random_state=42,
|
| 116 |
)
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
params["n_estimators"] = min(params.get("n_estimators", 300) or 300, 500)
|
| 121 |
-
params["eval_metric"] = ["logloss", "error", "auc"]
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
verbose=False,
|
| 128 |
-
)
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
|
| 135 |
-
x = np.arange(1, len(
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
(axes[0],
|
| 139 |
-
(axes[1],
|
| 140 |
-
(axes[2],
|
| 141 |
-
]
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
| 144 |
linestyle="--", label="Doğrulama / Validation")
|
| 145 |
ax.set_xlabel("Boosting Round")
|
| 146 |
ax.set_ylabel(title)
|
| 147 |
ax.set_title(f"{title} — Boosting İlerlemesi", fontweight="bold")
|
| 148 |
ax.legend(framealpha=0.85)
|
| 149 |
-
|
| 150 |
-
best_idx = int(np.argmin(vl["logloss"])) if metric == "logloss" else int(np.argmax(vl[metric]))
|
| 151 |
ax.axvline(best_idx + 1, color=PALETTE["accent"], linestyle=":", alpha=0.7)
|
| 152 |
ax.annotate(
|
| 153 |
f"en iyi: {best_idx + 1}",
|
| 154 |
-
xy=(best_idx + 1,
|
| 155 |
xytext=(12, -12), textcoords="offset points",
|
| 156 |
fontsize=9, color=PALETTE["fg"],
|
| 157 |
)
|
| 158 |
|
| 159 |
-
|
|
|
|
|
|
|
| 160 |
plt.tight_layout()
|
| 161 |
plt.savefig(FIGURES_DIR / "training_history.png")
|
| 162 |
plt.close()
|
|
|
|
| 106 |
|
| 107 |
# ── 1. Training history (XGBoost boosting-round learning curve) ──────────
|
| 108 |
def fig_training_history(model, scaler, X, y):
|
| 109 |
+
"""Retrain with staged_predict to capture boosting progression."""
|
| 110 |
+
from sklearn.ensemble import GradientBoostingClassifier
|
| 111 |
from sklearn.model_selection import train_test_split
|
| 112 |
+
from sklearn.metrics import log_loss, roc_auc_score
|
| 113 |
|
| 114 |
X_scaled = scaler.transform(X)
|
| 115 |
X_tr, X_val, y_tr, y_val = train_test_split(
|
| 116 |
X_scaled, y, test_size=0.2, stratify=y, random_state=42,
|
| 117 |
)
|
| 118 |
|
| 119 |
+
clf = clone(model)
|
| 120 |
+
clf.fit(X_tr, y_tr)
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
n_est = clf.n_estimators_ if hasattr(clf, 'n_estimators_') else clf.n_estimators
|
| 123 |
+
tr_loss, vl_loss = [], []
|
| 124 |
+
tr_err, vl_err = [], []
|
| 125 |
+
tr_auc, vl_auc = [], []
|
|
|
|
|
|
|
| 126 |
|
| 127 |
+
for i, (tr_prob, vl_prob) in enumerate(
|
| 128 |
+
zip(clf.staged_predict_proba(X_tr), clf.staged_predict_proba(X_val))
|
| 129 |
+
):
|
| 130 |
+
tr_loss.append(log_loss(y_tr, tr_prob))
|
| 131 |
+
vl_loss.append(log_loss(y_val, vl_prob))
|
| 132 |
+
tr_err.append(1.0 - (tr_prob.argmax(1) == y_tr).mean())
|
| 133 |
+
vl_err.append(1.0 - (vl_prob.argmax(1) == y_val).mean())
|
| 134 |
+
tr_auc.append(roc_auc_score(y_tr, tr_prob[:, 1]))
|
| 135 |
+
vl_auc.append(roc_auc_score(y_val, vl_prob[:, 1]))
|
| 136 |
|
| 137 |
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
|
| 138 |
+
x = np.arange(1, len(tr_loss) + 1)
|
| 139 |
+
|
| 140 |
+
panels = [
|
| 141 |
+
(axes[0], tr_loss, vl_loss, "Log Loss", True),
|
| 142 |
+
(axes[1], tr_err, vl_err, "Error Rate", True),
|
| 143 |
+
(axes[2], tr_auc, vl_auc, "ROC-AUC", False),
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
for ax, tr_vals, vl_vals, title, lower_better in panels:
|
| 147 |
+
ax.plot(x, tr_vals, color=PALETTE["primary"], lw=2.2, label="Eğitim / Train")
|
| 148 |
+
ax.plot(x, vl_vals, color=PALETTE["error"], lw=2.2,
|
| 149 |
linestyle="--", label="Doğrulama / Validation")
|
| 150 |
ax.set_xlabel("Boosting Round")
|
| 151 |
ax.set_ylabel(title)
|
| 152 |
ax.set_title(f"{title} — Boosting İlerlemesi", fontweight="bold")
|
| 153 |
ax.legend(framealpha=0.85)
|
| 154 |
+
best_idx = int(np.argmin(vl_vals)) if lower_better else int(np.argmax(vl_vals))
|
|
|
|
| 155 |
ax.axvline(best_idx + 1, color=PALETTE["accent"], linestyle=":", alpha=0.7)
|
| 156 |
ax.annotate(
|
| 157 |
f"en iyi: {best_idx + 1}",
|
| 158 |
+
xy=(best_idx + 1, vl_vals[best_idx]),
|
| 159 |
xytext=(12, -12), textcoords="offset points",
|
| 160 |
fontsize=9, color=PALETTE["fg"],
|
| 161 |
)
|
| 162 |
|
| 163 |
+
model_name = type(model).__name__
|
| 164 |
+
fig.suptitle(f"{model_name} Eğitim Geçmişi — Train vs Validation",
|
| 165 |
+
fontsize=14, fontweight="bold")
|
| 166 |
plt.tight_layout()
|
| 167 |
plt.savefig(FIGURES_DIR / "training_history.png")
|
| 168 |
plt.close()
|