"""Generate academic-quality figures from training results. Produces publication-ready figures in docs/academic/figures/: - confusion_matrix.png - roc_curves_comparison.png - precision_recall_curves.png - feature_importance_top20.png - calibration_plot.png - feature_distribution_ai_vs_human.png - shap_summary.png - model_comparison_bars.png Usage: python -m app.training.generate_figures """ from __future__ import annotations import csv import json import pickle from pathlib import Path import numpy as np import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from matplotlib.colors import LinearSegmentedColormap from sklearn.metrics import ( confusion_matrix, roc_curve, auc, precision_recall_curve, average_precision_score, ) from sklearn.calibration import calibration_curve from sklearn.model_selection import StratifiedKFold, cross_val_predict from sklearn.base import clone # ── Paths ──────────────────────────────────────────────────────────────── BACKEND = Path(__file__).resolve().parents[2] MODELS_DIR = BACKEND / "models" DATASET_DIR = BACKEND.parent / "DataSet" FIGURES_DIR = BACKEND.parent / "docs" / "academic" / "figures" FEATURES_CSV = DATASET_DIR / "features.csv" # ── Theme (AURIS parchment gold palette) ───────────────────────────────── PALETTE = { "bg": "#faf6ed", "fg": "#3d2817", "primary": "#c99347", "secondary": "#7fb069", "error": "#a64b3c", "grid": "#d8c9a8", "accent": "#e7c77a", } plt.rcParams.update({ "figure.facecolor": PALETTE["bg"], "axes.facecolor": PALETTE["bg"], "axes.edgecolor": PALETTE["fg"], "axes.labelcolor": PALETTE["fg"], "xtick.color": PALETTE["fg"], "ytick.color": PALETTE["fg"], "text.color": PALETTE["fg"], "font.family": "DejaVu Sans", "font.size": 11, "axes.grid": True, "grid.color": PALETTE["grid"], "grid.alpha": 0.4, "savefig.dpi": 150, "savefig.bbox": "tight", "figure.dpi": 100, }) def _load_artifacts(): """Load training results, model, features CSV.""" with open(MODELS_DIR / "training_results.json", "r") as f: results = json.load(f) with open(MODELS_DIR / "auris_classifier_v1.pkl", "rb") as f: model = pickle.load(f) with open(MODELS_DIR / "feature_scaler_v1.pkl", "rb") as f: scaler = pickle.load(f) with open(MODELS_DIR / "feature_columns_v1.json", "r") as f: feature_cols = json.load(f) return results, model, scaler, feature_cols def _load_csv_data(feature_cols): with open(FEATURES_CSV, "r", encoding="utf-8") as f: rows = list(csv.DictReader(f)) X = np.array([[float(r[c]) for c in feature_cols] for r in rows]) X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0) y = np.array([int(r["label_int"]) for r in rows]) return X, y def _get_cv_predictions(model, X_scaled, y, cache: dict) -> tuple: """Cross-validated predictions with caching across figures.""" key = id(model) if key in cache: return cache[key] cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) y_prob = cross_val_predict( clone(model), X_scaled, y, cv=cv, method="predict_proba", n_jobs=-1, )[:, 1] y_pred = (y_prob > 0.5).astype(int) cache[key] = (y, y_pred, y_prob) return y, y_pred, y_prob def fig_confusion_matrix(results: dict, y_true: np.ndarray, y_pred: np.ndarray) -> None: """Confusion matrix for the best model (CV predictions).""" best = results.get("_best_model", "XGBoost") data = results.get(best, {}) cm = confusion_matrix(y_true, y_pred) fig, ax = plt.subplots(figsize=(6.5, 5.5)) cmap = LinearSegmentedColormap.from_list( "auris", [PALETTE["bg"], PALETTE["primary"]], ) im = ax.imshow(cm, cmap=cmap, aspect="auto") acc = data.get("accuracy", (y_true == y_pred).mean()) f1v = data.get("f1", 0.0) aucv = data.get("roc_auc", 0.0) ax.set_title( f"Karışıklık Matrisi — {best}\n" f"Accuracy: {acc:.1%} F1: {f1v:.3f} AUC: {aucv:.3f}", fontsize=13, fontweight="bold", ) classes = ["İnsan / Human", "AI"] ax.set_xticks([0, 1]) ax.set_yticks([0, 1]) ax.set_xticklabels(classes) ax.set_yticklabels(classes) ax.set_xlabel("Tahmin / Predicted") ax.set_ylabel("Gerçek / Actual") # cell annotations with count + percentage total = cm.sum() for i in range(2): for j in range(2): count = cm[i, j] pct = 100 * count / total color = PALETTE["bg"] if count > total * 0.25 else PALETTE["fg"] ax.text( j, i, f"{count}\n({pct:.1f}%)", ha="center", va="center", color=color, fontsize=13, fontweight="bold", ) plt.colorbar(im, ax=ax, shrink=0.7) plt.savefig(FIGURES_DIR / "confusion_matrix.png") plt.close() print(" ✓ confusion_matrix.png") def fig_roc_comparison(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None: """ROC curve for best model + reference diagonal.""" fig, ax = plt.subplots(figsize=(8, 6.5)) best = results.get("_best_model", "XGBoost") fpr, tpr, _ = roc_curve(y_true, y_prob) roc_auc = auc(fpr, tpr) ax.plot(fpr, tpr, color=PALETTE["primary"], lw=3, label=f"{best} (AUC = {roc_auc:.4f})") ax.fill_between(fpr, tpr, alpha=0.15, color=PALETTE["primary"]) ax.plot([0, 1], [0, 1], "k:", alpha=0.5, lw=1.5, label="Rastgele / Random") # Add other models as AUC markers for name, data in results.items(): if name.startswith("_") or not isinstance(data, dict) or name == best: continue auc_v = data.get("roc_auc", 0) ax.annotate( f"{name}: AUC={auc_v:.3f}", xy=(0.45, 0.05 + 0.04 * list(results.keys()).index(name)), fontsize=8, alpha=0.7, ) ax.set_xlabel("Yanlış Pozitif Oranı / False Positive Rate") ax.set_ylabel("Doğru Pozitif Oranı / True Positive Rate") ax.set_title("ROC Eğrisi — En İyi Model (5-fold CV)", fontsize=13, fontweight="bold") ax.legend(loc="lower right", framealpha=0.85) ax.set_xlim([0, 1]) ax.set_ylim([0, 1.02]) plt.savefig(FIGURES_DIR / "roc_curves_comparison.png") plt.close() print(" ✓ roc_curves_comparison.png") def fig_pr_curves(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None: """Precision-Recall curve for best model.""" fig, ax = plt.subplots(figsize=(8, 6.5)) best = results.get("_best_model", "XGBoost") prec, rec, _ = precision_recall_curve(y_true, y_prob) ap = average_precision_score(y_true, y_prob) baseline = y_true.mean() ax.plot(rec, prec, color=PALETTE["primary"], lw=3, label=f"{best} (AP = {ap:.4f})") ax.fill_between(rec, prec, alpha=0.15, color=PALETTE["primary"]) ax.axhline(baseline, color="k", linestyle=":", alpha=0.5, label=f"Baseline = {baseline:.3f}") ax.set_xlabel("Duyarlılık / Recall") ax.set_ylabel("Kesinlik / Precision") ax.set_title("Precision-Recall Eğrisi — En İyi Model", fontsize=13, fontweight="bold") ax.legend(loc="lower left", framealpha=0.85) ax.set_xlim([0, 1]) ax.set_ylim([0, 1.02]) plt.savefig(FIGURES_DIR / "precision_recall_curves.png") plt.close() print(" ✓ precision_recall_curves.png") def fig_feature_importance(results: dict, top_n: int = 20) -> None: """Top N feature importance bar chart.""" imp = results.get("_feature_importance", {}) if not imp: return items = sorted(imp.items(), key=lambda x: x[1], reverse=True)[:top_n] names = [n for n, _ in items] vals = [v for _, v in items] fig, ax = plt.subplots(figsize=(9, 7)) y_pos = np.arange(len(names)) colors_grad = plt.cm.copper(np.linspace(0.3, 0.85, len(names))) ax.barh(y_pos, vals, color=colors_grad, edgecolor=PALETTE["fg"], linewidth=0.5) ax.set_yticks(y_pos) ax.set_yticklabels(names, fontsize=10) ax.invert_yaxis() ax.set_xlabel("Normalize Önem / Normalized Importance") ax.set_title(f"En Önemli {top_n} Özellik — {results.get('_best_model', 'XGBoost')}", fontsize=13, fontweight="bold") for i, v in enumerate(vals): ax.text(v + max(vals) * 0.01, i, f"{v:.4f}", va="center", fontsize=8) plt.savefig(FIGURES_DIR / "feature_importance_top20.png") plt.close() print(" ✓ feature_importance_top20.png") def fig_calibration(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None: """Calibration curve — does predicted probability match reality?""" fig, ax = plt.subplots(figsize=(7, 6.5)) best = results.get("_best_model", "XGBoost") frac_pos, mean_pred = calibration_curve(y_true, y_prob, n_bins=10) ax.plot(mean_pred, frac_pos, "o-", color=PALETTE["primary"], lw=3, markersize=8, label=f"{best}") ax.fill_between(mean_pred, frac_pos, mean_pred, alpha=0.15, color=PALETTE["primary"]) ax.plot([0, 1], [0, 1], "k:", alpha=0.5, label="Mükemmel / Perfect") # Brier score annotation brier = float(np.mean((y_prob - y_true) ** 2)) ax.text( 0.04, 0.94, f"Brier Score = {brier:.4f}\nN = {len(y_true)} (5-fold CV)", transform=ax.transAxes, fontsize=10, va="top", bbox=dict(boxstyle="round,pad=0.5", facecolor=PALETTE["bg"], edgecolor=PALETTE["primary"], alpha=0.85), ) ax.set_xlabel("Ortalama Tahmin Olasılığı / Mean Predicted Probability") ax.set_ylabel("Gerçek Pozitif Oranı / Fraction of Positives") ax.set_title("Kalibrasyon Eğrisi — En İyi Model", fontsize=13, fontweight="bold") ax.legend(loc="lower right", framealpha=0.85, fontsize=10) ax.set_xlim([0, 1]) ax.set_ylim([0, 1]) plt.savefig(FIGURES_DIR / "calibration_plot.png") plt.close() print(" ✓ calibration_plot.png") def fig_feature_distributions(feature_cols: list[str], top_features: list[str]) -> None: """Distribution of top-8 features by AI vs Human.""" with open(FEATURES_CSV, "r", encoding="utf-8") as f: rows = list(csv.DictReader(f)) n = min(8, len(top_features)) fig, axes = plt.subplots(2, 4, figsize=(16, 8)) axes = axes.flatten() for i in range(n): col = top_features[i] ai_vals, hum_vals = [], [] for r in rows: try: v = float(r[col]) if np.isnan(v) or np.isinf(v): continue (ai_vals if r["label_int"] == "1" else hum_vals).append(v) except (ValueError, KeyError): continue ax = axes[i] # histogram overlay bins = 30 ax.hist(hum_vals, bins=bins, alpha=0.55, color=PALETTE["secondary"], label=f"İnsan (n={len(hum_vals)})", density=True) ax.hist(ai_vals, bins=bins, alpha=0.55, color=PALETTE["error"], label=f"AI (n={len(ai_vals)})", density=True) ax.set_title(col, fontsize=10, fontweight="bold") ax.set_ylabel("Yoğunluk" if i % 4 == 0 else "") ax.legend(fontsize=7, loc="best") ax.tick_params(labelsize=8) for i in range(n, len(axes)): axes[i].axis("off") fig.suptitle("AI vs İnsan — En Önemli 8 Özelliğin Dağılımı", fontsize=14, fontweight="bold", y=1.02) plt.tight_layout() plt.savefig(FIGURES_DIR / "feature_distribution_ai_vs_human.png") plt.close() print(" ✓ feature_distribution_ai_vs_human.png") def fig_shap_summary(model, scaler, feature_cols, X, max_display: int = 20) -> None: """SHAP summary — global feature importance with directional info.""" try: import shap except ImportError: print(" ! SHAP not available, skipping") return X_scaled = scaler.transform(X) # Subsample for speed if len(X_scaled) > 1000: idx = np.random.RandomState(42).choice(len(X_scaled), 1000, replace=False) X_sub = X_scaled[idx] else: X_sub = X_scaled explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X_sub) if isinstance(shap_values, list): sv = shap_values[1] if len(shap_values) > 1 else shap_values[0] else: sv = shap_values fig = plt.figure(figsize=(10, 8)) shap.summary_plot( sv, X_sub, feature_names=feature_cols, max_display=max_display, show=False, plot_size=None, ) plt.title("SHAP Özet Grafiği — Global Özellik Etkisi", fontsize=13, fontweight="bold", pad=14) plt.savefig(FIGURES_DIR / "shap_summary.png", bbox_inches="tight") plt.close() print(" ✓ shap_summary.png") def fig_model_comparison(results: dict) -> None: """Bar chart comparing accuracy/f1/auc across all models.""" items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)] items.sort(key=lambda x: x[1].get("roc_auc", 0), reverse=True) names = [n for n, _ in items] metrics = { "Accuracy": [d["accuracy"] for _, d in items], "F1 Score": [d["f1"] for _, d in items], "ROC-AUC": [d["roc_auc"] for _, d in items], "Precision": [d["precision"] for _, d in items], "Recall": [d["recall"] for _, d in items], } x = np.arange(len(names)) width = 0.16 fig, ax = plt.subplots(figsize=(12, 6.5)) colors = [PALETTE["primary"], PALETTE["secondary"], PALETTE["error"], PALETTE["accent"], "#7a5c3c"] for i, (metric, vals) in enumerate(metrics.items()): ax.bar(x + i * width - 2 * width, vals, width, label=metric, color=colors[i], edgecolor=PALETTE["fg"], linewidth=0.3) ax.set_ylabel("Skor / Score") ax.set_title("Model Performans Karşılaştırması", fontsize=13, fontweight="bold") ax.set_xticks(x) ax.set_xticklabels(names, rotation=20, ha="right") ax.legend(loc="lower right", framealpha=0.85) ax.set_ylim([0.5, 1.0]) ax.grid(True, axis="y", alpha=0.4) plt.savefig(FIGURES_DIR / "model_comparison_bars.png") plt.close() print(" ✓ model_comparison_bars.png") def main() -> None: FIGURES_DIR.mkdir(parents=True, exist_ok=True) print(f"Output directory: {FIGURES_DIR}") print("Loading artifacts...") results, model, scaler, feature_cols = _load_artifacts() importance = results.get("_feature_importance", {}) top_features = [n for n, _ in sorted( importance.items(), key=lambda x: x[1], reverse=True, )] print("\nLoading dataset...") X, y = _load_csv_data(feature_cols) X_scaled = scaler.transform(X) print("Computing 5-fold cross-validated predictions (this may take ~1-2 min)...") cache: dict = {} y_true, y_pred, y_prob = _get_cv_predictions(model, X_scaled, y, cache) print("\nGenerating figures...") fig_confusion_matrix(results, y_true, y_pred) fig_roc_comparison(results, y_true, y_prob) fig_pr_curves(results, y_true, y_prob) fig_feature_importance(results) fig_calibration(results, y_true, y_prob) fig_model_comparison(results) fig_feature_distributions(feature_cols, top_features) print("\nGenerating SHAP summary...") fig_shap_summary(model, scaler, feature_cols, X) print(f"\nDone. {len(list(FIGURES_DIR.glob('*.png')))} figures in {FIGURES_DIR}") if __name__ == "__main__": main()