Spaces:

Rthur2003
/

crowncode-backend

Sleeping

App Files Files Community

Rthur2003 commited on Apr 15

Commit

94e94a1

1 Parent(s): 9e93437

feat: add figure generation script for training results visualization

Browse files

Files changed (1) hide show

app/training/generate_figures.py +408 -0

app/training/generate_figures.py ADDED Viewed

	@@ -0,0 +1,408 @@

+"""Generate academic-quality figures from training results.
+Produces publication-ready figures in DataSet/figures/:
+  - confusion_matrix.png
+  - roc_curves_comparison.png
+  - precision_recall_curves.png
+  - feature_importance_top20.png
+  - calibration_plot.png
+  - feature_distribution_ai_vs_human.png
+  - shap_summary.png
+  - model_comparison_bars.png
+Usage:
+    python -m app.training.generate_figures
+"""
+from __future__ import annotations
+import csv
+import json
+import pickle
+from pathlib import Path
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from matplotlib.colors import LinearSegmentedColormap
+from sklearn.metrics import (
+    confusion_matrix, roc_curve, auc,
+    precision_recall_curve, average_precision_score,
+)
+from sklearn.calibration import calibration_curve
+# ── Paths ────────────────────────────────────────────────────────────────
+BACKEND = Path(__file__).resolve().parents[2]
+MODELS_DIR = BACKEND / "models"
+DATASET_DIR = BACKEND.parent / "DataSet"
+FIGURES_DIR = DATASET_DIR / "figures"
+FEATURES_CSV = DATASET_DIR / "features.csv"
+# ── Theme (AURIS parchment gold palette) ─────────────────────────────────
+PALETTE = {
+    "bg": "#faf6ed",
+    "fg": "#3d2817",
+    "primary": "#c99347",
+    "secondary": "#7fb069",
+    "error": "#a64b3c",
+    "grid": "#d8c9a8",
+    "accent": "#e7c77a",
+}
+plt.rcParams.update({
+    "figure.facecolor": PALETTE["bg"],
+    "axes.facecolor": PALETTE["bg"],
+    "axes.edgecolor": PALETTE["fg"],
+    "axes.labelcolor": PALETTE["fg"],
+    "xtick.color": PALETTE["fg"],
+    "ytick.color": PALETTE["fg"],
+    "text.color": PALETTE["fg"],
+    "font.family": "DejaVu Sans",
+    "font.size": 11,
+    "axes.grid": True,
+    "grid.color": PALETTE["grid"],
+    "grid.alpha": 0.4,
+    "savefig.dpi": 150,
+    "savefig.bbox": "tight",
+    "figure.dpi": 100,
+})
+def _load_artifacts():
+    """Load training results, model, features CSV."""
+    with open(MODELS_DIR / "training_results.json", "r") as f:
+        results = json.load(f)
+    with open(MODELS_DIR / "auris_classifier_v1.pkl", "rb") as f:
+        model = pickle.load(f)
+    with open(MODELS_DIR / "feature_scaler_v1.pkl", "rb") as f:
+        scaler = pickle.load(f)
+    with open(MODELS_DIR / "feature_columns_v1.json", "r") as f:
+        feature_cols = json.load(f)
+    return results, model, scaler, feature_cols
+def _load_csv_data(feature_cols):
+    with open(FEATURES_CSV, "r", encoding="utf-8") as f:
+        rows = list(csv.DictReader(f))
+    X = np.array([[float(r[c]) for c in feature_cols] for r in rows])
+    X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
+    y = np.array([int(r["label_int"]) for r in rows])
+    return X, y
+def fig_confusion_matrix(results: dict) -> None:
+    """Confusion matrix for the best model (CV predictions)."""
+    best = results.get("_best_model", "XGBoost")
+    data = results.get(best)
+    if not data:
+        return
+    y_true = np.array(data["y_true"])
+    y_pred = np.array(data["y_pred"])
+    cm = confusion_matrix(y_true, y_pred)
+    fig, ax = plt.subplots(figsize=(6.5, 5.5))
+    cmap = LinearSegmentedColormap.from_list(
+        "auris", [PALETTE["bg"], PALETTE["primary"]],
+    )
+    im = ax.imshow(cm, cmap=cmap, aspect="auto")
+    ax.set_title(
+        f"Karışıklık Matrisi — {best}\n"
+        f"Accuracy: {data['accuracy']:.1%}  F1: {data['f1']:.3f}  AUC: {data['roc_auc']:.3f}",
+        fontsize=13, fontweight="bold",
+    )
+    classes = ["İnsan / Human", "AI"]
+    ax.set_xticks([0, 1])
+    ax.set_yticks([0, 1])
+    ax.set_xticklabels(classes)
+    ax.set_yticklabels(classes)
+    ax.set_xlabel("Tahmin / Predicted")
+    ax.set_ylabel("Gerçek / Actual")
+    # cell annotations with count + percentage
+    total = cm.sum()
+    for i in range(2):
+        for j in range(2):
+            count = cm[i, j]
+            pct = 100 * count / total
+            color = PALETTE["bg"] if count > total * 0.25 else PALETTE["fg"]
+            ax.text(
+                j, i, f"{count}\n({pct:.1f}%)",
+                ha="center", va="center",
+                color=color, fontsize=13, fontweight="bold",
+            )
+    plt.colorbar(im, ax=ax, shrink=0.7)
+    plt.savefig(FIGURES_DIR / "confusion_matrix.png")
+    plt.close()
+    print("  ✓ confusion_matrix.png")
+def fig_roc_comparison(results: dict) -> None:
+    """All models ROC curves overlaid."""
+    fig, ax = plt.subplots(figsize=(8, 6.5))
+    colors = plt.cm.plasma(np.linspace(0.15, 0.85, 10))
+    best = results.get("_best_model", "XGBoost")
+    items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)]
+    items.sort(key=lambda x: x[1].get("roc_auc", 0), reverse=True)
+    for idx, (name, data) in enumerate(items):
+        y_true = np.array(data["y_true"])
+        y_prob = np.array(data["y_prob"])
+        fpr, tpr, _ = roc_curve(y_true, y_prob)
+        roc_auc = auc(fpr, tpr)
+        lw = 3 if name == best else 1.5
+        ls = "-" if name == best else "--"
+        ax.plot(
+            fpr, tpr,
+            color=colors[idx],
+            lw=lw, ls=ls,
+            label=f"{name} (AUC = {roc_auc:.4f})",
+        )
+    ax.plot([0, 1], [0, 1], "k:", alpha=0.3, lw=1)
+    ax.set_xlabel("Yanlış Pozitif Oranı / False Positive Rate")
+    ax.set_ylabel("Doğru Pozitif Oranı / True Positive Rate")
+    ax.set_title("ROC Eğrileri — Model Karşılaştırması", fontsize=13, fontweight="bold")
+    ax.legend(loc="lower right", framealpha=0.85)
+    ax.set_xlim([0, 1])
+    ax.set_ylim([0, 1.02])
+    plt.savefig(FIGURES_DIR / "roc_curves_comparison.png")
+    plt.close()
+    print("  ✓ roc_curves_comparison.png")
+def fig_pr_curves(results: dict) -> None:
+    """Precision-Recall curves — critical for imbalanced classes."""
+    fig, ax = plt.subplots(figsize=(8, 6.5))
+    colors = plt.cm.plasma(np.linspace(0.15, 0.85, 10))
+    best = results.get("_best_model", "XGBoost")
+    items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)]
+    items.sort(key=lambda x: x[1].get("roc_auc", 0), reverse=True)
+    for idx, (name, data) in enumerate(items):
+        y_true = np.array(data["y_true"])
+        y_prob = np.array(data["y_prob"])
+        prec, rec, _ = precision_recall_curve(y_true, y_prob)
+        ap = average_precision_score(y_true, y_prob)
+        lw = 3 if name == best else 1.5
+        ls = "-" if name == best else "--"
+        ax.plot(
+            rec, prec,
+            color=colors[idx],
+            lw=lw, ls=ls,
+            label=f"{name} (AP = {ap:.4f})",
+        )
+    ax.set_xlabel("Duyarlılık / Recall")
+    ax.set_ylabel("Kesinlik / Precision")
+    ax.set_title("Precision-Recall Eğrileri", fontsize=13, fontweight="bold")
+    ax.legend(loc="lower left", framealpha=0.85)
+    ax.set_xlim([0, 1])
+    ax.set_ylim([0, 1.02])
+    plt.savefig(FIGURES_DIR / "precision_recall_curves.png")
+    plt.close()
+    print("  ✓ precision_recall_curves.png")
+def fig_feature_importance(results: dict, top_n: int = 20) -> None:
+    """Top N feature importance bar chart."""
+    imp = results.get("_feature_importance", {})
+    if not imp:
+        return
+    items = sorted(imp.items(), key=lambda x: x[1], reverse=True)[:top_n]
+    names = [n for n, _ in items]
+    vals = [v for _, v in items]
+    fig, ax = plt.subplots(figsize=(9, 7))
+    y_pos = np.arange(len(names))
+    colors_grad = plt.cm.copper(np.linspace(0.3, 0.85, len(names)))
+    ax.barh(y_pos, vals, color=colors_grad, edgecolor=PALETTE["fg"], linewidth=0.5)
+    ax.set_yticks(y_pos)
+    ax.set_yticklabels(names, fontsize=10)
+    ax.invert_yaxis()
+    ax.set_xlabel("Normalize Önem / Normalized Importance")
+    ax.set_title(f"En Önemli {top_n} Özellik — {results.get('_best_model', 'XGBoost')}",
+                 fontsize=13, fontweight="bold")
+    for i, v in enumerate(vals):
+        ax.text(v + max(vals) * 0.01, i, f"{v:.4f}", va="center", fontsize=8)
+    plt.savefig(FIGURES_DIR / "feature_importance_top20.png")
+    plt.close()
+    print("  ✓ feature_importance_top20.png")
+def fig_calibration(results: dict) -> None:
+    """Calibration curve — does predicted probability match reality?"""
+    fig, ax = plt.subplots(figsize=(7, 6.5))
+    best = results.get("_best_model", "XGBoost")
+    items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)]
+    colors = plt.cm.plasma(np.linspace(0.2, 0.8, len(items)))
+    for idx, (name, data) in enumerate(items):
+        y_true = np.array(data["y_true"])
+        y_prob = np.array(data["y_prob"])
+        frac_pos, mean_pred = calibration_curve(y_true, y_prob, n_bins=10)
+        lw = 3 if name == best else 1.2
+        ax.plot(mean_pred, frac_pos, "o-", color=colors[idx], lw=lw,
+                label=f"{name}", markersize=6 if name == best else 4)
+    ax.plot([0, 1], [0, 1], "k:", alpha=0.5, label="Mükemmel / Perfect")
+    ax.set_xlabel("Ortalama Tahmin Olasılığı / Mean Predicted Probability")
+    ax.set_ylabel("Gerçek Pozitif Oranı / Fraction of Positives")
+    ax.set_title("Kalibrasyon Eğrisi", fontsize=13, fontweight="bold")
+    ax.legend(loc="upper left", framealpha=0.85, fontsize=9)
+    plt.savefig(FIGURES_DIR / "calibration_plot.png")
+    plt.close()
+    print("  ✓ calibration_plot.png")
+def fig_feature_distributions(feature_cols: list[str], top_features: list[str]) -> None:
+    """Distribution of top-8 features by AI vs Human."""
+    with open(FEATURES_CSV, "r", encoding="utf-8") as f:
+        rows = list(csv.DictReader(f))
+    n = min(8, len(top_features))
+    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
+    axes = axes.flatten()
+    for i in range(n):
+        col = top_features[i]
+        ai_vals, hum_vals = [], []
+        for r in rows:
+            try:
+                v = float(r[col])
+                if np.isnan(v) or np.isinf(v): continue
+                (ai_vals if r["label_int"] == "1" else hum_vals).append(v)
+            except (ValueError, KeyError):
+                continue
+        ax = axes[i]
+        # histogram overlay
+        bins = 30
+        ax.hist(hum_vals, bins=bins, alpha=0.55, color=PALETTE["secondary"],
+                label=f"İnsan (n={len(hum_vals)})", density=True)
+        ax.hist(ai_vals, bins=bins, alpha=0.55, color=PALETTE["error"],
+                label=f"AI (n={len(ai_vals)})", density=True)
+        ax.set_title(col, fontsize=10, fontweight="bold")
+        ax.set_ylabel("Yoğunluk" if i % 4 == 0 else "")
+        ax.legend(fontsize=7, loc="best")
+        ax.tick_params(labelsize=8)
+    for i in range(n, len(axes)):
+        axes[i].axis("off")
+    fig.suptitle("AI vs İnsan — En Önemli 8 Özelliğin Dağılımı",
+                 fontsize=14, fontweight="bold", y=1.02)
+    plt.tight_layout()
+    plt.savefig(FIGURES_DIR / "feature_distribution_ai_vs_human.png")
+    plt.close()
+    print("  ✓ feature_distribution_ai_vs_human.png")
+def fig_shap_summary(model, scaler, feature_cols, X, max_display: int = 20) -> None:
+    """SHAP summary — global feature importance with directional info."""
+    try:
+        import shap
+    except ImportError:
+        print("  ! SHAP not available, skipping")
+        return
+    X_scaled = scaler.transform(X)
+    # Subsample for speed
+    if len(X_scaled) > 1000:
+        idx = np.random.RandomState(42).choice(len(X_scaled), 1000, replace=False)
+        X_sub = X_scaled[idx]
+    else:
+        X_sub = X_scaled
+    explainer = shap.TreeExplainer(model)
+    shap_values = explainer.shap_values(X_sub)
+    if isinstance(shap_values, list):
+        sv = shap_values[1] if len(shap_values) > 1 else shap_values[0]
+    else:
+        sv = shap_values
+    fig = plt.figure(figsize=(10, 8))
+    shap.summary_plot(
+        sv, X_sub,
+        feature_names=feature_cols,
+        max_display=max_display,
+        show=False,
+        plot_size=None,
+    )
+    plt.title("SHAP Özet Grafiği — Global Özellik Etkisi",
+              fontsize=13, fontweight="bold", pad=14)
+    plt.savefig(FIGURES_DIR / "shap_summary.png", bbox_inches="tight")
+    plt.close()
+    print("  ✓ shap_summary.png")
+def fig_model_comparison(results: dict) -> None:
+    """Bar chart comparing accuracy/f1/auc across all models."""
+    items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)]
+    items.sort(key=lambda x: x[1].get("roc_auc", 0), reverse=True)
+    names = [n for n, _ in items]
+    metrics = {
+        "Accuracy": [d["accuracy"] for _, d in items],
+        "F1 Score": [d["f1"] for _, d in items],
+        "ROC-AUC": [d["roc_auc"] for _, d in items],
+        "Precision": [d["precision"] for _, d in items],
+        "Recall": [d["recall"] for _, d in items],
+    }
+    x = np.arange(len(names))
+    width = 0.16
+    fig, ax = plt.subplots(figsize=(12, 6.5))
+    colors = [PALETTE["primary"], PALETTE["secondary"], PALETTE["error"],
+              PALETTE["accent"], "#7a5c3c"]
+    for i, (metric, vals) in enumerate(metrics.items()):
+        ax.bar(x + i * width - 2 * width, vals, width, label=metric,
+               color=colors[i], edgecolor=PALETTE["fg"], linewidth=0.3)
+    ax.set_ylabel("Skor / Score")
+    ax.set_title("Model Performans Karşılaştırması", fontsize=13, fontweight="bold")
+    ax.set_xticks(x)
+    ax.set_xticklabels(names, rotation=20, ha="right")
+    ax.legend(loc="lower right", framealpha=0.85)
+    ax.set_ylim([0.5, 1.0])
+    ax.grid(True, axis="y", alpha=0.4)
+    plt.savefig(FIGURES_DIR / "model_comparison_bars.png")
+    plt.close()
+    print("  ✓ model_comparison_bars.png")
+def main() -> None:
+    FIGURES_DIR.mkdir(parents=True, exist_ok=True)
+    print(f"Output directory: {FIGURES_DIR}")
+    print("Loading artifacts...")
+    results, model, scaler, feature_cols = _load_artifacts()
+    importance = results.get("_feature_importance", {})
+    top_features = [n for n, _ in sorted(
+        importance.items(), key=lambda x: x[1], reverse=True,
+    )]
+    print("\nGenerating figures...")
+    fig_confusion_matrix(results)
+    fig_roc_comparison(results)
+    fig_pr_curves(results)
+    fig_feature_importance(results)
+    fig_calibration(results)
+    fig_model_comparison(results)
+    fig_feature_distributions(feature_cols, top_features)
+    print("\nLoading data for SHAP (this may take ~30s)...")
+    X, y = _load_csv_data(feature_cols)
+    fig_shap_summary(model, scaler, feature_cols, X)
+    print(f"\nDone. {len(list(FIGURES_DIR.glob('*.png')))} figures in {FIGURES_DIR}")
+if __name__ == "__main__":
+    main()