crowncode-backend / app /training /generate_figures.py
Rthur2003's picture
feat: update output directory for generated figures to docs/academic/figures
983f3ef
"""Generate academic-quality figures from training results.
Produces publication-ready figures in docs/academic/figures/:
- confusion_matrix.png
- roc_curves_comparison.png
- precision_recall_curves.png
- feature_importance_top20.png
- calibration_plot.png
- feature_distribution_ai_vs_human.png
- shap_summary.png
- model_comparison_bars.png
Usage:
python -m app.training.generate_figures
"""
from __future__ import annotations
import csv
import json
import pickle
from pathlib import Path
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from sklearn.metrics import (
confusion_matrix, roc_curve, auc,
precision_recall_curve, average_precision_score,
)
from sklearn.calibration import calibration_curve
from sklearn.model_selection import StratifiedKFold, cross_val_predict
from sklearn.base import clone
# ── Paths ────────────────────────────────────────────────────────────────
BACKEND = Path(__file__).resolve().parents[2]
MODELS_DIR = BACKEND / "models"
DATASET_DIR = BACKEND.parent / "DataSet"
FIGURES_DIR = BACKEND.parent / "docs" / "academic" / "figures"
FEATURES_CSV = DATASET_DIR / "features.csv"
# ── Theme (AURIS parchment gold palette) ─────────────────────────────────
PALETTE = {
"bg": "#faf6ed",
"fg": "#3d2817",
"primary": "#c99347",
"secondary": "#7fb069",
"error": "#a64b3c",
"grid": "#d8c9a8",
"accent": "#e7c77a",
}
plt.rcParams.update({
"figure.facecolor": PALETTE["bg"],
"axes.facecolor": PALETTE["bg"],
"axes.edgecolor": PALETTE["fg"],
"axes.labelcolor": PALETTE["fg"],
"xtick.color": PALETTE["fg"],
"ytick.color": PALETTE["fg"],
"text.color": PALETTE["fg"],
"font.family": "DejaVu Sans",
"font.size": 11,
"axes.grid": True,
"grid.color": PALETTE["grid"],
"grid.alpha": 0.4,
"savefig.dpi": 150,
"savefig.bbox": "tight",
"figure.dpi": 100,
})
def _load_artifacts():
"""Load training results, model, features CSV."""
with open(MODELS_DIR / "training_results.json", "r") as f:
results = json.load(f)
with open(MODELS_DIR / "auris_classifier_v1.pkl", "rb") as f:
model = pickle.load(f)
with open(MODELS_DIR / "feature_scaler_v1.pkl", "rb") as f:
scaler = pickle.load(f)
with open(MODELS_DIR / "feature_columns_v1.json", "r") as f:
feature_cols = json.load(f)
return results, model, scaler, feature_cols
def _load_csv_data(feature_cols):
with open(FEATURES_CSV, "r", encoding="utf-8") as f:
rows = list(csv.DictReader(f))
X = np.array([[float(r[c]) for c in feature_cols] for r in rows])
X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
y = np.array([int(r["label_int"]) for r in rows])
return X, y
def _get_cv_predictions(model, X_scaled, y, cache: dict) -> tuple:
"""Cross-validated predictions with caching across figures."""
key = id(model)
if key in cache:
return cache[key]
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
y_prob = cross_val_predict(
clone(model), X_scaled, y, cv=cv, method="predict_proba", n_jobs=-1,
)[:, 1]
y_pred = (y_prob > 0.5).astype(int)
cache[key] = (y, y_pred, y_prob)
return y, y_pred, y_prob
def fig_confusion_matrix(results: dict, y_true: np.ndarray, y_pred: np.ndarray) -> None:
"""Confusion matrix for the best model (CV predictions)."""
best = results.get("_best_model", "XGBoost")
data = results.get(best, {})
cm = confusion_matrix(y_true, y_pred)
fig, ax = plt.subplots(figsize=(6.5, 5.5))
cmap = LinearSegmentedColormap.from_list(
"auris", [PALETTE["bg"], PALETTE["primary"]],
)
im = ax.imshow(cm, cmap=cmap, aspect="auto")
acc = data.get("accuracy", (y_true == y_pred).mean())
f1v = data.get("f1", 0.0)
aucv = data.get("roc_auc", 0.0)
ax.set_title(
f"Karışıklık Matrisi — {best}\n"
f"Accuracy: {acc:.1%} F1: {f1v:.3f} AUC: {aucv:.3f}",
fontsize=13, fontweight="bold",
)
classes = ["İnsan / Human", "AI"]
ax.set_xticks([0, 1])
ax.set_yticks([0, 1])
ax.set_xticklabels(classes)
ax.set_yticklabels(classes)
ax.set_xlabel("Tahmin / Predicted")
ax.set_ylabel("Gerçek / Actual")
# cell annotations with count + percentage
total = cm.sum()
for i in range(2):
for j in range(2):
count = cm[i, j]
pct = 100 * count / total
color = PALETTE["bg"] if count > total * 0.25 else PALETTE["fg"]
ax.text(
j, i, f"{count}\n({pct:.1f}%)",
ha="center", va="center",
color=color, fontsize=13, fontweight="bold",
)
plt.colorbar(im, ax=ax, shrink=0.7)
plt.savefig(FIGURES_DIR / "confusion_matrix.png")
plt.close()
print(" ✓ confusion_matrix.png")
def fig_roc_comparison(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
"""ROC curve for best model + reference diagonal."""
fig, ax = plt.subplots(figsize=(8, 6.5))
best = results.get("_best_model", "XGBoost")
fpr, tpr, _ = roc_curve(y_true, y_prob)
roc_auc = auc(fpr, tpr)
ax.plot(fpr, tpr, color=PALETTE["primary"], lw=3,
label=f"{best} (AUC = {roc_auc:.4f})")
ax.fill_between(fpr, tpr, alpha=0.15, color=PALETTE["primary"])
ax.plot([0, 1], [0, 1], "k:", alpha=0.5, lw=1.5, label="Rastgele / Random")
# Add other models as AUC markers
for name, data in results.items():
if name.startswith("_") or not isinstance(data, dict) or name == best:
continue
auc_v = data.get("roc_auc", 0)
ax.annotate(
f"{name}: AUC={auc_v:.3f}",
xy=(0.45, 0.05 + 0.04 * list(results.keys()).index(name)),
fontsize=8, alpha=0.7,
)
ax.set_xlabel("Yanlış Pozitif Oranı / False Positive Rate")
ax.set_ylabel("Doğru Pozitif Oranı / True Positive Rate")
ax.set_title("ROC Eğrisi — En İyi Model (5-fold CV)", fontsize=13, fontweight="bold")
ax.legend(loc="lower right", framealpha=0.85)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1.02])
plt.savefig(FIGURES_DIR / "roc_curves_comparison.png")
plt.close()
print(" ✓ roc_curves_comparison.png")
def fig_pr_curves(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
"""Precision-Recall curve for best model."""
fig, ax = plt.subplots(figsize=(8, 6.5))
best = results.get("_best_model", "XGBoost")
prec, rec, _ = precision_recall_curve(y_true, y_prob)
ap = average_precision_score(y_true, y_prob)
baseline = y_true.mean()
ax.plot(rec, prec, color=PALETTE["primary"], lw=3,
label=f"{best} (AP = {ap:.4f})")
ax.fill_between(rec, prec, alpha=0.15, color=PALETTE["primary"])
ax.axhline(baseline, color="k", linestyle=":", alpha=0.5,
label=f"Baseline = {baseline:.3f}")
ax.set_xlabel("Duyarlılık / Recall")
ax.set_ylabel("Kesinlik / Precision")
ax.set_title("Precision-Recall Eğrisi — En İyi Model", fontsize=13, fontweight="bold")
ax.legend(loc="lower left", framealpha=0.85)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1.02])
plt.savefig(FIGURES_DIR / "precision_recall_curves.png")
plt.close()
print(" ✓ precision_recall_curves.png")
def fig_feature_importance(results: dict, top_n: int = 20) -> None:
"""Top N feature importance bar chart."""
imp = results.get("_feature_importance", {})
if not imp:
return
items = sorted(imp.items(), key=lambda x: x[1], reverse=True)[:top_n]
names = [n for n, _ in items]
vals = [v for _, v in items]
fig, ax = plt.subplots(figsize=(9, 7))
y_pos = np.arange(len(names))
colors_grad = plt.cm.copper(np.linspace(0.3, 0.85, len(names)))
ax.barh(y_pos, vals, color=colors_grad, edgecolor=PALETTE["fg"], linewidth=0.5)
ax.set_yticks(y_pos)
ax.set_yticklabels(names, fontsize=10)
ax.invert_yaxis()
ax.set_xlabel("Normalize Önem / Normalized Importance")
ax.set_title(f"En Önemli {top_n} Özellik — {results.get('_best_model', 'XGBoost')}",
fontsize=13, fontweight="bold")
for i, v in enumerate(vals):
ax.text(v + max(vals) * 0.01, i, f"{v:.4f}", va="center", fontsize=8)
plt.savefig(FIGURES_DIR / "feature_importance_top20.png")
plt.close()
print(" ✓ feature_importance_top20.png")
def fig_calibration(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
"""Calibration curve — does predicted probability match reality?"""
fig, ax = plt.subplots(figsize=(7, 6.5))
best = results.get("_best_model", "XGBoost")
frac_pos, mean_pred = calibration_curve(y_true, y_prob, n_bins=10)
ax.plot(mean_pred, frac_pos, "o-", color=PALETTE["primary"], lw=3,
markersize=8, label=f"{best}")
ax.fill_between(mean_pred, frac_pos, mean_pred, alpha=0.15,
color=PALETTE["primary"])
ax.plot([0, 1], [0, 1], "k:", alpha=0.5, label="Mükemmel / Perfect")
# Brier score annotation
brier = float(np.mean((y_prob - y_true) ** 2))
ax.text(
0.04, 0.94,
f"Brier Score = {brier:.4f}\nN = {len(y_true)} (5-fold CV)",
transform=ax.transAxes,
fontsize=10, va="top",
bbox=dict(boxstyle="round,pad=0.5", facecolor=PALETTE["bg"],
edgecolor=PALETTE["primary"], alpha=0.85),
)
ax.set_xlabel("Ortalama Tahmin Olasılığı / Mean Predicted Probability")
ax.set_ylabel("Gerçek Pozitif Oranı / Fraction of Positives")
ax.set_title("Kalibrasyon Eğrisi — En İyi Model", fontsize=13, fontweight="bold")
ax.legend(loc="lower right", framealpha=0.85, fontsize=10)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
plt.savefig(FIGURES_DIR / "calibration_plot.png")
plt.close()
print(" ✓ calibration_plot.png")
def fig_feature_distributions(feature_cols: list[str], top_features: list[str]) -> None:
"""Distribution of top-8 features by AI vs Human."""
with open(FEATURES_CSV, "r", encoding="utf-8") as f:
rows = list(csv.DictReader(f))
n = min(8, len(top_features))
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.flatten()
for i in range(n):
col = top_features[i]
ai_vals, hum_vals = [], []
for r in rows:
try:
v = float(r[col])
if np.isnan(v) or np.isinf(v): continue
(ai_vals if r["label_int"] == "1" else hum_vals).append(v)
except (ValueError, KeyError):
continue
ax = axes[i]
# histogram overlay
bins = 30
ax.hist(hum_vals, bins=bins, alpha=0.55, color=PALETTE["secondary"],
label=f"İnsan (n={len(hum_vals)})", density=True)
ax.hist(ai_vals, bins=bins, alpha=0.55, color=PALETTE["error"],
label=f"AI (n={len(ai_vals)})", density=True)
ax.set_title(col, fontsize=10, fontweight="bold")
ax.set_ylabel("Yoğunluk" if i % 4 == 0 else "")
ax.legend(fontsize=7, loc="best")
ax.tick_params(labelsize=8)
for i in range(n, len(axes)):
axes[i].axis("off")
fig.suptitle("AI vs İnsan — En Önemli 8 Özelliğin Dağılımı",
fontsize=14, fontweight="bold", y=1.02)
plt.tight_layout()
plt.savefig(FIGURES_DIR / "feature_distribution_ai_vs_human.png")
plt.close()
print(" ✓ feature_distribution_ai_vs_human.png")
def fig_shap_summary(model, scaler, feature_cols, X, max_display: int = 20) -> None:
"""SHAP summary — global feature importance with directional info."""
try:
import shap
except ImportError:
print(" ! SHAP not available, skipping")
return
X_scaled = scaler.transform(X)
# Subsample for speed
if len(X_scaled) > 1000:
idx = np.random.RandomState(42).choice(len(X_scaled), 1000, replace=False)
X_sub = X_scaled[idx]
else:
X_sub = X_scaled
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_sub)
if isinstance(shap_values, list):
sv = shap_values[1] if len(shap_values) > 1 else shap_values[0]
else:
sv = shap_values
fig = plt.figure(figsize=(10, 8))
shap.summary_plot(
sv, X_sub,
feature_names=feature_cols,
max_display=max_display,
show=False,
plot_size=None,
)
plt.title("SHAP Özet Grafiği — Global Özellik Etkisi",
fontsize=13, fontweight="bold", pad=14)
plt.savefig(FIGURES_DIR / "shap_summary.png", bbox_inches="tight")
plt.close()
print(" ✓ shap_summary.png")
def fig_model_comparison(results: dict) -> None:
"""Bar chart comparing accuracy/f1/auc across all models."""
items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)]
items.sort(key=lambda x: x[1].get("roc_auc", 0), reverse=True)
names = [n for n, _ in items]
metrics = {
"Accuracy": [d["accuracy"] for _, d in items],
"F1 Score": [d["f1"] for _, d in items],
"ROC-AUC": [d["roc_auc"] for _, d in items],
"Precision": [d["precision"] for _, d in items],
"Recall": [d["recall"] for _, d in items],
}
x = np.arange(len(names))
width = 0.16
fig, ax = plt.subplots(figsize=(12, 6.5))
colors = [PALETTE["primary"], PALETTE["secondary"], PALETTE["error"],
PALETTE["accent"], "#7a5c3c"]
for i, (metric, vals) in enumerate(metrics.items()):
ax.bar(x + i * width - 2 * width, vals, width, label=metric,
color=colors[i], edgecolor=PALETTE["fg"], linewidth=0.3)
ax.set_ylabel("Skor / Score")
ax.set_title("Model Performans Karşılaştırması", fontsize=13, fontweight="bold")
ax.set_xticks(x)
ax.set_xticklabels(names, rotation=20, ha="right")
ax.legend(loc="lower right", framealpha=0.85)
ax.set_ylim([0.5, 1.0])
ax.grid(True, axis="y", alpha=0.4)
plt.savefig(FIGURES_DIR / "model_comparison_bars.png")
plt.close()
print(" ✓ model_comparison_bars.png")
def main() -> None:
FIGURES_DIR.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {FIGURES_DIR}")
print("Loading artifacts...")
results, model, scaler, feature_cols = _load_artifacts()
importance = results.get("_feature_importance", {})
top_features = [n for n, _ in sorted(
importance.items(), key=lambda x: x[1], reverse=True,
)]
print("\nLoading dataset...")
X, y = _load_csv_data(feature_cols)
X_scaled = scaler.transform(X)
print("Computing 5-fold cross-validated predictions (this may take ~1-2 min)...")
cache: dict = {}
y_true, y_pred, y_prob = _get_cv_predictions(model, X_scaled, y, cache)
print("\nGenerating figures...")
fig_confusion_matrix(results, y_true, y_pred)
fig_roc_comparison(results, y_true, y_prob)
fig_pr_curves(results, y_true, y_prob)
fig_feature_importance(results)
fig_calibration(results, y_true, y_prob)
fig_model_comparison(results)
fig_feature_distributions(feature_cols, top_features)
print("\nGenerating SHAP summary...")
fig_shap_summary(model, scaler, feature_cols, X)
print(f"\nDone. {len(list(FIGURES_DIR.glob('*.png')))} figures in {FIGURES_DIR}")
if __name__ == "__main__":
main()