Rthur2003 commited on
Commit
1c2de42
·
1 Parent(s): bb6655d

feat: refactor training history figure generation to use GradientBoostingClassifier and improve metric tracking

Browse files
Files changed (1) hide show
  1. app/training/generate_deep_figures.py +34 -28
app/training/generate_deep_figures.py CHANGED
@@ -106,57 +106,63 @@ def _cv_predict(model, X_scaled, y):
106
 
107
  # ── 1. Training history (XGBoost boosting-round learning curve) ──────────
108
  def fig_training_history(model, scaler, X, y):
109
- """Retrain lightly with eval_set to capture boosting progression."""
110
- from xgboost import XGBClassifier
111
  from sklearn.model_selection import train_test_split
 
112
 
113
  X_scaled = scaler.transform(X)
114
  X_tr, X_val, y_tr, y_val = train_test_split(
115
  X_scaled, y, test_size=0.2, stratify=y, random_state=42,
116
  )
117
 
118
- params = model.get_params()
119
- # Reset early-stopping / n_estimators for a fresh fit with eval tracking
120
- params["n_estimators"] = min(params.get("n_estimators", 300) or 300, 500)
121
- params["eval_metric"] = ["logloss", "error", "auc"]
122
 
123
- clf = XGBClassifier(**{k: v for k, v in params.items() if k != "early_stopping_rounds"})
124
- clf.fit(
125
- X_tr, y_tr,
126
- eval_set=[(X_tr, y_tr), (X_val, y_val)],
127
- verbose=False,
128
- )
129
 
130
- history = clf.evals_result()
131
- tr = history["validation_0"]
132
- vl = history["validation_1"]
 
 
 
 
 
 
133
 
134
  fig, axes = plt.subplots(1, 3, figsize=(16, 5))
135
- x = np.arange(1, len(tr["logloss"]) + 1)
136
-
137
- for ax, metric, title in [
138
- (axes[0], "logloss", "Log Loss"),
139
- (axes[1], "error", "Error Rate"),
140
- (axes[2], "auc", "ROC-AUC"),
141
- ]:
142
- ax.plot(x, tr[metric], color=PALETTE["primary"], lw=2.2, label="Eğitim / Train")
143
- ax.plot(x, vl[metric], color=PALETTE["error"], lw=2.2,
 
 
144
  linestyle="--", label="Doğrulama / Validation")
145
  ax.set_xlabel("Boosting Round")
146
  ax.set_ylabel(title)
147
  ax.set_title(f"{title} — Boosting İlerlemesi", fontweight="bold")
148
  ax.legend(framealpha=0.85)
149
- # best round annotation
150
- best_idx = int(np.argmin(vl["logloss"])) if metric == "logloss" else int(np.argmax(vl[metric]))
151
  ax.axvline(best_idx + 1, color=PALETTE["accent"], linestyle=":", alpha=0.7)
152
  ax.annotate(
153
  f"en iyi: {best_idx + 1}",
154
- xy=(best_idx + 1, vl[metric][best_idx]),
155
  xytext=(12, -12), textcoords="offset points",
156
  fontsize=9, color=PALETTE["fg"],
157
  )
158
 
159
- fig.suptitle("XGBoost Eğitim Geçmişi — Train vs Validation", fontsize=14, fontweight="bold")
 
 
160
  plt.tight_layout()
161
  plt.savefig(FIGURES_DIR / "training_history.png")
162
  plt.close()
 
106
 
107
  # ── 1. Training history (XGBoost boosting-round learning curve) ──────────
108
  def fig_training_history(model, scaler, X, y):
109
+ """Retrain with staged_predict to capture boosting progression."""
110
+ from sklearn.ensemble import GradientBoostingClassifier
111
  from sklearn.model_selection import train_test_split
112
+ from sklearn.metrics import log_loss, roc_auc_score
113
 
114
  X_scaled = scaler.transform(X)
115
  X_tr, X_val, y_tr, y_val = train_test_split(
116
  X_scaled, y, test_size=0.2, stratify=y, random_state=42,
117
  )
118
 
119
+ clf = clone(model)
120
+ clf.fit(X_tr, y_tr)
 
 
121
 
122
+ n_est = clf.n_estimators_ if hasattr(clf, 'n_estimators_') else clf.n_estimators
123
+ tr_loss, vl_loss = [], []
124
+ tr_err, vl_err = [], []
125
+ tr_auc, vl_auc = [], []
 
 
126
 
127
+ for i, (tr_prob, vl_prob) in enumerate(
128
+ zip(clf.staged_predict_proba(X_tr), clf.staged_predict_proba(X_val))
129
+ ):
130
+ tr_loss.append(log_loss(y_tr, tr_prob))
131
+ vl_loss.append(log_loss(y_val, vl_prob))
132
+ tr_err.append(1.0 - (tr_prob.argmax(1) == y_tr).mean())
133
+ vl_err.append(1.0 - (vl_prob.argmax(1) == y_val).mean())
134
+ tr_auc.append(roc_auc_score(y_tr, tr_prob[:, 1]))
135
+ vl_auc.append(roc_auc_score(y_val, vl_prob[:, 1]))
136
 
137
  fig, axes = plt.subplots(1, 3, figsize=(16, 5))
138
+ x = np.arange(1, len(tr_loss) + 1)
139
+
140
+ panels = [
141
+ (axes[0], tr_loss, vl_loss, "Log Loss", True),
142
+ (axes[1], tr_err, vl_err, "Error Rate", True),
143
+ (axes[2], tr_auc, vl_auc, "ROC-AUC", False),
144
+ ]
145
+
146
+ for ax, tr_vals, vl_vals, title, lower_better in panels:
147
+ ax.plot(x, tr_vals, color=PALETTE["primary"], lw=2.2, label="Eğitim / Train")
148
+ ax.plot(x, vl_vals, color=PALETTE["error"], lw=2.2,
149
  linestyle="--", label="Doğrulama / Validation")
150
  ax.set_xlabel("Boosting Round")
151
  ax.set_ylabel(title)
152
  ax.set_title(f"{title} — Boosting İlerlemesi", fontweight="bold")
153
  ax.legend(framealpha=0.85)
154
+ best_idx = int(np.argmin(vl_vals)) if lower_better else int(np.argmax(vl_vals))
 
155
  ax.axvline(best_idx + 1, color=PALETTE["accent"], linestyle=":", alpha=0.7)
156
  ax.annotate(
157
  f"en iyi: {best_idx + 1}",
158
+ xy=(best_idx + 1, vl_vals[best_idx]),
159
  xytext=(12, -12), textcoords="offset points",
160
  fontsize=9, color=PALETTE["fg"],
161
  )
162
 
163
+ model_name = type(model).__name__
164
+ fig.suptitle(f"{model_name} Eğitim Geçmişi — Train vs Validation",
165
+ fontsize=14, fontweight="bold")
166
  plt.tight_layout()
167
  plt.savefig(FIGURES_DIR / "training_history.png")
168
  plt.close()