Spaces:

Rthur2003
/

crowncode-backend

Sleeping

File size: 15,689 Bytes

"""Generate academic-quality figures from training results.

Produces publication-ready figures in docs/academic/figures/:
  - confusion_matrix.png
  - roc_curves_comparison.png
  - precision_recall_curves.png
  - feature_importance_top20.png
  - calibration_plot.png
  - feature_distribution_ai_vs_human.png
  - shap_summary.png
  - model_comparison_bars.png

Usage:
    python -m app.training.generate_figures
"""

from __future__ import annotations

import csv
import json
import pickle
from pathlib import Path

import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

from sklearn.metrics import (
    confusion_matrix, roc_curve, auc,
    precision_recall_curve, average_precision_score,
)
from sklearn.calibration import calibration_curve
from sklearn.model_selection import StratifiedKFold, cross_val_predict
from sklearn.base import clone

# ── Paths ────────────────────────────────────────────────────────────────
BACKEND = Path(__file__).resolve().parents[2]
MODELS_DIR = BACKEND / "models"
DATASET_DIR = BACKEND.parent / "DataSet"
FIGURES_DIR = BACKEND.parent / "docs" / "academic" / "figures"
FEATURES_CSV = DATASET_DIR / "features.csv"

# ── Theme (AURIS parchment gold palette) ─────────────────────────────────
PALETTE = {
    "bg": "#faf6ed",
    "fg": "#3d2817",
    "primary": "#c99347",
    "secondary": "#7fb069",
    "error": "#a64b3c",
    "grid": "#d8c9a8",
    "accent": "#e7c77a",
}

plt.rcParams.update({
    "figure.facecolor": PALETTE["bg"],
    "axes.facecolor": PALETTE["bg"],
    "axes.edgecolor": PALETTE["fg"],
    "axes.labelcolor": PALETTE["fg"],
    "xtick.color": PALETTE["fg"],
    "ytick.color": PALETTE["fg"],
    "text.color": PALETTE["fg"],
    "font.family": "DejaVu Sans",
    "font.size": 11,
    "axes.grid": True,
    "grid.color": PALETTE["grid"],
    "grid.alpha": 0.4,
    "savefig.dpi": 150,
    "savefig.bbox": "tight",
    "figure.dpi": 100,
})


def _load_artifacts():
    """Load training results, model, features CSV."""
    with open(MODELS_DIR / "training_results.json", "r") as f:
        results = json.load(f)
    with open(MODELS_DIR / "auris_classifier_v1.pkl", "rb") as f:
        model = pickle.load(f)
    with open(MODELS_DIR / "feature_scaler_v1.pkl", "rb") as f:
        scaler = pickle.load(f)
    with open(MODELS_DIR / "feature_columns_v1.json", "r") as f:
        feature_cols = json.load(f)
    return results, model, scaler, feature_cols


def _load_csv_data(feature_cols):
    with open(FEATURES_CSV, "r", encoding="utf-8") as f:
        rows = list(csv.DictReader(f))
    X = np.array([[float(r[c]) for c in feature_cols] for r in rows])
    X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
    y = np.array([int(r["label_int"]) for r in rows])
    return X, y


def _get_cv_predictions(model, X_scaled, y, cache: dict) -> tuple:
    """Cross-validated predictions with caching across figures."""
    key = id(model)
    if key in cache:
        return cache[key]
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    y_prob = cross_val_predict(
        clone(model), X_scaled, y, cv=cv, method="predict_proba", n_jobs=-1,
    )[:, 1]
    y_pred = (y_prob > 0.5).astype(int)
    cache[key] = (y, y_pred, y_prob)
    return y, y_pred, y_prob


def fig_confusion_matrix(results: dict, y_true: np.ndarray, y_pred: np.ndarray) -> None:
    """Confusion matrix for the best model (CV predictions)."""
    best = results.get("_best_model", "XGBoost")
    data = results.get(best, {})
    cm = confusion_matrix(y_true, y_pred)

    fig, ax = plt.subplots(figsize=(6.5, 5.5))
    cmap = LinearSegmentedColormap.from_list(
        "auris", [PALETTE["bg"], PALETTE["primary"]],
    )
    im = ax.imshow(cm, cmap=cmap, aspect="auto")
    acc = data.get("accuracy", (y_true == y_pred).mean())
    f1v = data.get("f1", 0.0)
    aucv = data.get("roc_auc", 0.0)
    ax.set_title(
        f"Karışıklık Matrisi — {best}\n"
        f"Accuracy: {acc:.1%}  F1: {f1v:.3f}  AUC: {aucv:.3f}",
        fontsize=13, fontweight="bold",
    )
    classes = ["İnsan / Human", "AI"]
    ax.set_xticks([0, 1])
    ax.set_yticks([0, 1])
    ax.set_xticklabels(classes)
    ax.set_yticklabels(classes)
    ax.set_xlabel("Tahmin / Predicted")
    ax.set_ylabel("Gerçek / Actual")

    # cell annotations with count + percentage
    total = cm.sum()
    for i in range(2):
        for j in range(2):
            count = cm[i, j]
            pct = 100 * count / total
            color = PALETTE["bg"] if count > total * 0.25 else PALETTE["fg"]
            ax.text(
                j, i, f"{count}\n({pct:.1f}%)",
                ha="center", va="center",
                color=color, fontsize=13, fontweight="bold",
            )

    plt.colorbar(im, ax=ax, shrink=0.7)
    plt.savefig(FIGURES_DIR / "confusion_matrix.png")
    plt.close()
    print("  ✓ confusion_matrix.png")


def fig_roc_comparison(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
    """ROC curve for best model + reference diagonal."""
    fig, ax = plt.subplots(figsize=(8, 6.5))
    best = results.get("_best_model", "XGBoost")
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)

    ax.plot(fpr, tpr, color=PALETTE["primary"], lw=3,
            label=f"{best} (AUC = {roc_auc:.4f})")
    ax.fill_between(fpr, tpr, alpha=0.15, color=PALETTE["primary"])
    ax.plot([0, 1], [0, 1], "k:", alpha=0.5, lw=1.5, label="Rastgele / Random")

    # Add other models as AUC markers
    for name, data in results.items():
        if name.startswith("_") or not isinstance(data, dict) or name == best:
            continue
        auc_v = data.get("roc_auc", 0)
        ax.annotate(
            f"{name}: AUC={auc_v:.3f}",
            xy=(0.45, 0.05 + 0.04 * list(results.keys()).index(name)),
            fontsize=8, alpha=0.7,
        )

    ax.set_xlabel("Yanlış Pozitif Oranı / False Positive Rate")
    ax.set_ylabel("Doğru Pozitif Oranı / True Positive Rate")
    ax.set_title("ROC Eğrisi — En İyi Model (5-fold CV)", fontsize=13, fontweight="bold")
    ax.legend(loc="lower right", framealpha=0.85)
    ax.set_xlim([0, 1])
    ax.set_ylim([0, 1.02])
    plt.savefig(FIGURES_DIR / "roc_curves_comparison.png")
    plt.close()
    print("  ✓ roc_curves_comparison.png")


def fig_pr_curves(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
    """Precision-Recall curve for best model."""
    fig, ax = plt.subplots(figsize=(8, 6.5))
    best = results.get("_best_model", "XGBoost")
    prec, rec, _ = precision_recall_curve(y_true, y_prob)
    ap = average_precision_score(y_true, y_prob)
    baseline = y_true.mean()

    ax.plot(rec, prec, color=PALETTE["primary"], lw=3,
            label=f"{best} (AP = {ap:.4f})")
    ax.fill_between(rec, prec, alpha=0.15, color=PALETTE["primary"])
    ax.axhline(baseline, color="k", linestyle=":", alpha=0.5,
               label=f"Baseline = {baseline:.3f}")

    ax.set_xlabel("Duyarlılık / Recall")
    ax.set_ylabel("Kesinlik / Precision")
    ax.set_title("Precision-Recall Eğrisi — En İyi Model", fontsize=13, fontweight="bold")
    ax.legend(loc="lower left", framealpha=0.85)
    ax.set_xlim([0, 1])
    ax.set_ylim([0, 1.02])
    plt.savefig(FIGURES_DIR / "precision_recall_curves.png")
    plt.close()
    print("  ✓ precision_recall_curves.png")


def fig_feature_importance(results: dict, top_n: int = 20) -> None:
    """Top N feature importance bar chart."""
    imp = results.get("_feature_importance", {})
    if not imp:
        return
    items = sorted(imp.items(), key=lambda x: x[1], reverse=True)[:top_n]
    names = [n for n, _ in items]
    vals = [v for _, v in items]

    fig, ax = plt.subplots(figsize=(9, 7))
    y_pos = np.arange(len(names))
    colors_grad = plt.cm.copper(np.linspace(0.3, 0.85, len(names)))
    ax.barh(y_pos, vals, color=colors_grad, edgecolor=PALETTE["fg"], linewidth=0.5)
    ax.set_yticks(y_pos)
    ax.set_yticklabels(names, fontsize=10)
    ax.invert_yaxis()
    ax.set_xlabel("Normalize Önem / Normalized Importance")
    ax.set_title(f"En Önemli {top_n} Özellik — {results.get('_best_model', 'XGBoost')}",
                 fontsize=13, fontweight="bold")
    for i, v in enumerate(vals):
        ax.text(v + max(vals) * 0.01, i, f"{v:.4f}", va="center", fontsize=8)
    plt.savefig(FIGURES_DIR / "feature_importance_top20.png")
    plt.close()
    print("  ✓ feature_importance_top20.png")


def fig_calibration(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
    """Calibration curve — does predicted probability match reality?"""
    fig, ax = plt.subplots(figsize=(7, 6.5))
    best = results.get("_best_model", "XGBoost")
    frac_pos, mean_pred = calibration_curve(y_true, y_prob, n_bins=10)

    ax.plot(mean_pred, frac_pos, "o-", color=PALETTE["primary"], lw=3,
            markersize=8, label=f"{best}")
    ax.fill_between(mean_pred, frac_pos, mean_pred, alpha=0.15,
                    color=PALETTE["primary"])
    ax.plot([0, 1], [0, 1], "k:", alpha=0.5, label="Mükemmel / Perfect")

    # Brier score annotation
    brier = float(np.mean((y_prob - y_true) ** 2))
    ax.text(
        0.04, 0.94,
        f"Brier Score = {brier:.4f}\nN = {len(y_true)} (5-fold CV)",
        transform=ax.transAxes,
        fontsize=10, va="top",
        bbox=dict(boxstyle="round,pad=0.5", facecolor=PALETTE["bg"],
                  edgecolor=PALETTE["primary"], alpha=0.85),
    )

    ax.set_xlabel("Ortalama Tahmin Olasılığı / Mean Predicted Probability")
    ax.set_ylabel("Gerçek Pozitif Oranı / Fraction of Positives")
    ax.set_title("Kalibrasyon Eğrisi — En İyi Model", fontsize=13, fontweight="bold")
    ax.legend(loc="lower right", framealpha=0.85, fontsize=10)
    ax.set_xlim([0, 1])
    ax.set_ylim([0, 1])
    plt.savefig(FIGURES_DIR / "calibration_plot.png")
    plt.close()
    print("  ✓ calibration_plot.png")


def fig_feature_distributions(feature_cols: list[str], top_features: list[str]) -> None:
    """Distribution of top-8 features by AI vs Human."""
    with open(FEATURES_CSV, "r", encoding="utf-8") as f:
        rows = list(csv.DictReader(f))

    n = min(8, len(top_features))
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.flatten()

    for i in range(n):
        col = top_features[i]
        ai_vals, hum_vals = [], []
        for r in rows:
            try:
                v = float(r[col])
                if np.isnan(v) or np.isinf(v): continue
                (ai_vals if r["label_int"] == "1" else hum_vals).append(v)
            except (ValueError, KeyError):
                continue
        ax = axes[i]
        # histogram overlay
        bins = 30
        ax.hist(hum_vals, bins=bins, alpha=0.55, color=PALETTE["secondary"],
                label=f"İnsan (n={len(hum_vals)})", density=True)
        ax.hist(ai_vals, bins=bins, alpha=0.55, color=PALETTE["error"],
                label=f"AI (n={len(ai_vals)})", density=True)
        ax.set_title(col, fontsize=10, fontweight="bold")
        ax.set_ylabel("Yoğunluk" if i % 4 == 0 else "")
        ax.legend(fontsize=7, loc="best")
        ax.tick_params(labelsize=8)

    for i in range(n, len(axes)):
        axes[i].axis("off")

    fig.suptitle("AI vs İnsan — En Önemli 8 Özelliğin Dağılımı",
                 fontsize=14, fontweight="bold", y=1.02)
    plt.tight_layout()
    plt.savefig(FIGURES_DIR / "feature_distribution_ai_vs_human.png")
    plt.close()
    print("  ✓ feature_distribution_ai_vs_human.png")


def fig_shap_summary(model, scaler, feature_cols, X, max_display: int = 20) -> None:
    """SHAP summary — global feature importance with directional info."""
    try:
        import shap
    except ImportError:
        print("  ! SHAP not available, skipping")
        return

    X_scaled = scaler.transform(X)
    # Subsample for speed
    if len(X_scaled) > 1000:
        idx = np.random.RandomState(42).choice(len(X_scaled), 1000, replace=False)
        X_sub = X_scaled[idx]
    else:
        X_sub = X_scaled

    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_sub)

    if isinstance(shap_values, list):
        sv = shap_values[1] if len(shap_values) > 1 else shap_values[0]
    else:
        sv = shap_values

    fig = plt.figure(figsize=(10, 8))
    shap.summary_plot(
        sv, X_sub,
        feature_names=feature_cols,
        max_display=max_display,
        show=False,
        plot_size=None,
    )
    plt.title("SHAP Özet Grafiği — Global Özellik Etkisi",
              fontsize=13, fontweight="bold", pad=14)
    plt.savefig(FIGURES_DIR / "shap_summary.png", bbox_inches="tight")
    plt.close()
    print("  ✓ shap_summary.png")


def fig_model_comparison(results: dict) -> None:
    """Bar chart comparing accuracy/f1/auc across all models."""
    items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)]
    items.sort(key=lambda x: x[1].get("roc_auc", 0), reverse=True)

    names = [n for n, _ in items]
    metrics = {
        "Accuracy": [d["accuracy"] for _, d in items],
        "F1 Score": [d["f1"] for _, d in items],
        "ROC-AUC": [d["roc_auc"] for _, d in items],
        "Precision": [d["precision"] for _, d in items],
        "Recall": [d["recall"] for _, d in items],
    }

    x = np.arange(len(names))
    width = 0.16
    fig, ax = plt.subplots(figsize=(12, 6.5))
    colors = [PALETTE["primary"], PALETTE["secondary"], PALETTE["error"],
              PALETTE["accent"], "#7a5c3c"]

    for i, (metric, vals) in enumerate(metrics.items()):
        ax.bar(x + i * width - 2 * width, vals, width, label=metric,
               color=colors[i], edgecolor=PALETTE["fg"], linewidth=0.3)

    ax.set_ylabel("Skor / Score")
    ax.set_title("Model Performans Karşılaştırması", fontsize=13, fontweight="bold")
    ax.set_xticks(x)
    ax.set_xticklabels(names, rotation=20, ha="right")
    ax.legend(loc="lower right", framealpha=0.85)
    ax.set_ylim([0.5, 1.0])
    ax.grid(True, axis="y", alpha=0.4)

    plt.savefig(FIGURES_DIR / "model_comparison_bars.png")
    plt.close()
    print("  ✓ model_comparison_bars.png")


def main() -> None:
    FIGURES_DIR.mkdir(parents=True, exist_ok=True)
    print(f"Output directory: {FIGURES_DIR}")
    print("Loading artifacts...")
    results, model, scaler, feature_cols = _load_artifacts()

    importance = results.get("_feature_importance", {})
    top_features = [n for n, _ in sorted(
        importance.items(), key=lambda x: x[1], reverse=True,
    )]

    print("\nLoading dataset...")
    X, y = _load_csv_data(feature_cols)
    X_scaled = scaler.transform(X)

    print("Computing 5-fold cross-validated predictions (this may take ~1-2 min)...")
    cache: dict = {}
    y_true, y_pred, y_prob = _get_cv_predictions(model, X_scaled, y, cache)

    print("\nGenerating figures...")
    fig_confusion_matrix(results, y_true, y_pred)
    fig_roc_comparison(results, y_true, y_prob)
    fig_pr_curves(results, y_true, y_prob)
    fig_feature_importance(results)
    fig_calibration(results, y_true, y_prob)
    fig_model_comparison(results)
    fig_feature_distributions(feature_cols, top_features)

    print("\nGenerating SHAP summary...")
    fig_shap_summary(model, scaler, feature_cols, X)

    print(f"\nDone. {len(list(FIGURES_DIR.glob('*.png')))} figures in {FIGURES_DIR}")


if __name__ == "__main__":
    main()