Spaces:
Sleeping
Sleeping
| """Generate academic-quality figures from training results. | |
| Produces publication-ready figures in docs/academic/figures/: | |
| - confusion_matrix.png | |
| - roc_curves_comparison.png | |
| - precision_recall_curves.png | |
| - feature_importance_top20.png | |
| - calibration_plot.png | |
| - feature_distribution_ai_vs_human.png | |
| - shap_summary.png | |
| - model_comparison_bars.png | |
| Usage: | |
| python -m app.training.generate_figures | |
| """ | |
| from __future__ import annotations | |
| import csv | |
| import json | |
| import pickle | |
| from pathlib import Path | |
| import numpy as np | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| from matplotlib.colors import LinearSegmentedColormap | |
| from sklearn.metrics import ( | |
| confusion_matrix, roc_curve, auc, | |
| precision_recall_curve, average_precision_score, | |
| ) | |
| from sklearn.calibration import calibration_curve | |
| from sklearn.model_selection import StratifiedKFold, cross_val_predict | |
| from sklearn.base import clone | |
| # ── Paths ──────────────────────────────────────────────────────────────── | |
| BACKEND = Path(__file__).resolve().parents[2] | |
| MODELS_DIR = BACKEND / "models" | |
| DATASET_DIR = BACKEND.parent / "DataSet" | |
| FIGURES_DIR = BACKEND.parent / "docs" / "academic" / "figures" | |
| FEATURES_CSV = DATASET_DIR / "features.csv" | |
| # ── Theme (AURIS parchment gold palette) ───────────────────────────────── | |
| PALETTE = { | |
| "bg": "#faf6ed", | |
| "fg": "#3d2817", | |
| "primary": "#c99347", | |
| "secondary": "#7fb069", | |
| "error": "#a64b3c", | |
| "grid": "#d8c9a8", | |
| "accent": "#e7c77a", | |
| } | |
| plt.rcParams.update({ | |
| "figure.facecolor": PALETTE["bg"], | |
| "axes.facecolor": PALETTE["bg"], | |
| "axes.edgecolor": PALETTE["fg"], | |
| "axes.labelcolor": PALETTE["fg"], | |
| "xtick.color": PALETTE["fg"], | |
| "ytick.color": PALETTE["fg"], | |
| "text.color": PALETTE["fg"], | |
| "font.family": "DejaVu Sans", | |
| "font.size": 11, | |
| "axes.grid": True, | |
| "grid.color": PALETTE["grid"], | |
| "grid.alpha": 0.4, | |
| "savefig.dpi": 150, | |
| "savefig.bbox": "tight", | |
| "figure.dpi": 100, | |
| }) | |
| def _load_artifacts(): | |
| """Load training results, model, features CSV.""" | |
| with open(MODELS_DIR / "training_results.json", "r") as f: | |
| results = json.load(f) | |
| with open(MODELS_DIR / "auris_classifier_v1.pkl", "rb") as f: | |
| model = pickle.load(f) | |
| with open(MODELS_DIR / "feature_scaler_v1.pkl", "rb") as f: | |
| scaler = pickle.load(f) | |
| with open(MODELS_DIR / "feature_columns_v1.json", "r") as f: | |
| feature_cols = json.load(f) | |
| return results, model, scaler, feature_cols | |
| def _load_csv_data(feature_cols): | |
| with open(FEATURES_CSV, "r", encoding="utf-8") as f: | |
| rows = list(csv.DictReader(f)) | |
| X = np.array([[float(r[c]) for c in feature_cols] for r in rows]) | |
| X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0) | |
| y = np.array([int(r["label_int"]) for r in rows]) | |
| return X, y | |
| def _get_cv_predictions(model, X_scaled, y, cache: dict) -> tuple: | |
| """Cross-validated predictions with caching across figures.""" | |
| key = id(model) | |
| if key in cache: | |
| return cache[key] | |
| cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) | |
| y_prob = cross_val_predict( | |
| clone(model), X_scaled, y, cv=cv, method="predict_proba", n_jobs=-1, | |
| )[:, 1] | |
| y_pred = (y_prob > 0.5).astype(int) | |
| cache[key] = (y, y_pred, y_prob) | |
| return y, y_pred, y_prob | |
| def fig_confusion_matrix(results: dict, y_true: np.ndarray, y_pred: np.ndarray) -> None: | |
| """Confusion matrix for the best model (CV predictions).""" | |
| best = results.get("_best_model", "XGBoost") | |
| data = results.get(best, {}) | |
| cm = confusion_matrix(y_true, y_pred) | |
| fig, ax = plt.subplots(figsize=(6.5, 5.5)) | |
| cmap = LinearSegmentedColormap.from_list( | |
| "auris", [PALETTE["bg"], PALETTE["primary"]], | |
| ) | |
| im = ax.imshow(cm, cmap=cmap, aspect="auto") | |
| acc = data.get("accuracy", (y_true == y_pred).mean()) | |
| f1v = data.get("f1", 0.0) | |
| aucv = data.get("roc_auc", 0.0) | |
| ax.set_title( | |
| f"Karışıklık Matrisi — {best}\n" | |
| f"Accuracy: {acc:.1%} F1: {f1v:.3f} AUC: {aucv:.3f}", | |
| fontsize=13, fontweight="bold", | |
| ) | |
| classes = ["İnsan / Human", "AI"] | |
| ax.set_xticks([0, 1]) | |
| ax.set_yticks([0, 1]) | |
| ax.set_xticklabels(classes) | |
| ax.set_yticklabels(classes) | |
| ax.set_xlabel("Tahmin / Predicted") | |
| ax.set_ylabel("Gerçek / Actual") | |
| # cell annotations with count + percentage | |
| total = cm.sum() | |
| for i in range(2): | |
| for j in range(2): | |
| count = cm[i, j] | |
| pct = 100 * count / total | |
| color = PALETTE["bg"] if count > total * 0.25 else PALETTE["fg"] | |
| ax.text( | |
| j, i, f"{count}\n({pct:.1f}%)", | |
| ha="center", va="center", | |
| color=color, fontsize=13, fontweight="bold", | |
| ) | |
| plt.colorbar(im, ax=ax, shrink=0.7) | |
| plt.savefig(FIGURES_DIR / "confusion_matrix.png") | |
| plt.close() | |
| print(" ✓ confusion_matrix.png") | |
| def fig_roc_comparison(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None: | |
| """ROC curve for best model + reference diagonal.""" | |
| fig, ax = plt.subplots(figsize=(8, 6.5)) | |
| best = results.get("_best_model", "XGBoost") | |
| fpr, tpr, _ = roc_curve(y_true, y_prob) | |
| roc_auc = auc(fpr, tpr) | |
| ax.plot(fpr, tpr, color=PALETTE["primary"], lw=3, | |
| label=f"{best} (AUC = {roc_auc:.4f})") | |
| ax.fill_between(fpr, tpr, alpha=0.15, color=PALETTE["primary"]) | |
| ax.plot([0, 1], [0, 1], "k:", alpha=0.5, lw=1.5, label="Rastgele / Random") | |
| # Add other models as AUC markers | |
| for name, data in results.items(): | |
| if name.startswith("_") or not isinstance(data, dict) or name == best: | |
| continue | |
| auc_v = data.get("roc_auc", 0) | |
| ax.annotate( | |
| f"{name}: AUC={auc_v:.3f}", | |
| xy=(0.45, 0.05 + 0.04 * list(results.keys()).index(name)), | |
| fontsize=8, alpha=0.7, | |
| ) | |
| ax.set_xlabel("Yanlış Pozitif Oranı / False Positive Rate") | |
| ax.set_ylabel("Doğru Pozitif Oranı / True Positive Rate") | |
| ax.set_title("ROC Eğrisi — En İyi Model (5-fold CV)", fontsize=13, fontweight="bold") | |
| ax.legend(loc="lower right", framealpha=0.85) | |
| ax.set_xlim([0, 1]) | |
| ax.set_ylim([0, 1.02]) | |
| plt.savefig(FIGURES_DIR / "roc_curves_comparison.png") | |
| plt.close() | |
| print(" ✓ roc_curves_comparison.png") | |
| def fig_pr_curves(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None: | |
| """Precision-Recall curve for best model.""" | |
| fig, ax = plt.subplots(figsize=(8, 6.5)) | |
| best = results.get("_best_model", "XGBoost") | |
| prec, rec, _ = precision_recall_curve(y_true, y_prob) | |
| ap = average_precision_score(y_true, y_prob) | |
| baseline = y_true.mean() | |
| ax.plot(rec, prec, color=PALETTE["primary"], lw=3, | |
| label=f"{best} (AP = {ap:.4f})") | |
| ax.fill_between(rec, prec, alpha=0.15, color=PALETTE["primary"]) | |
| ax.axhline(baseline, color="k", linestyle=":", alpha=0.5, | |
| label=f"Baseline = {baseline:.3f}") | |
| ax.set_xlabel("Duyarlılık / Recall") | |
| ax.set_ylabel("Kesinlik / Precision") | |
| ax.set_title("Precision-Recall Eğrisi — En İyi Model", fontsize=13, fontweight="bold") | |
| ax.legend(loc="lower left", framealpha=0.85) | |
| ax.set_xlim([0, 1]) | |
| ax.set_ylim([0, 1.02]) | |
| plt.savefig(FIGURES_DIR / "precision_recall_curves.png") | |
| plt.close() | |
| print(" ✓ precision_recall_curves.png") | |
| def fig_feature_importance(results: dict, top_n: int = 20) -> None: | |
| """Top N feature importance bar chart.""" | |
| imp = results.get("_feature_importance", {}) | |
| if not imp: | |
| return | |
| items = sorted(imp.items(), key=lambda x: x[1], reverse=True)[:top_n] | |
| names = [n for n, _ in items] | |
| vals = [v for _, v in items] | |
| fig, ax = plt.subplots(figsize=(9, 7)) | |
| y_pos = np.arange(len(names)) | |
| colors_grad = plt.cm.copper(np.linspace(0.3, 0.85, len(names))) | |
| ax.barh(y_pos, vals, color=colors_grad, edgecolor=PALETTE["fg"], linewidth=0.5) | |
| ax.set_yticks(y_pos) | |
| ax.set_yticklabels(names, fontsize=10) | |
| ax.invert_yaxis() | |
| ax.set_xlabel("Normalize Önem / Normalized Importance") | |
| ax.set_title(f"En Önemli {top_n} Özellik — {results.get('_best_model', 'XGBoost')}", | |
| fontsize=13, fontweight="bold") | |
| for i, v in enumerate(vals): | |
| ax.text(v + max(vals) * 0.01, i, f"{v:.4f}", va="center", fontsize=8) | |
| plt.savefig(FIGURES_DIR / "feature_importance_top20.png") | |
| plt.close() | |
| print(" ✓ feature_importance_top20.png") | |
| def fig_calibration(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None: | |
| """Calibration curve — does predicted probability match reality?""" | |
| fig, ax = plt.subplots(figsize=(7, 6.5)) | |
| best = results.get("_best_model", "XGBoost") | |
| frac_pos, mean_pred = calibration_curve(y_true, y_prob, n_bins=10) | |
| ax.plot(mean_pred, frac_pos, "o-", color=PALETTE["primary"], lw=3, | |
| markersize=8, label=f"{best}") | |
| ax.fill_between(mean_pred, frac_pos, mean_pred, alpha=0.15, | |
| color=PALETTE["primary"]) | |
| ax.plot([0, 1], [0, 1], "k:", alpha=0.5, label="Mükemmel / Perfect") | |
| # Brier score annotation | |
| brier = float(np.mean((y_prob - y_true) ** 2)) | |
| ax.text( | |
| 0.04, 0.94, | |
| f"Brier Score = {brier:.4f}\nN = {len(y_true)} (5-fold CV)", | |
| transform=ax.transAxes, | |
| fontsize=10, va="top", | |
| bbox=dict(boxstyle="round,pad=0.5", facecolor=PALETTE["bg"], | |
| edgecolor=PALETTE["primary"], alpha=0.85), | |
| ) | |
| ax.set_xlabel("Ortalama Tahmin Olasılığı / Mean Predicted Probability") | |
| ax.set_ylabel("Gerçek Pozitif Oranı / Fraction of Positives") | |
| ax.set_title("Kalibrasyon Eğrisi — En İyi Model", fontsize=13, fontweight="bold") | |
| ax.legend(loc="lower right", framealpha=0.85, fontsize=10) | |
| ax.set_xlim([0, 1]) | |
| ax.set_ylim([0, 1]) | |
| plt.savefig(FIGURES_DIR / "calibration_plot.png") | |
| plt.close() | |
| print(" ✓ calibration_plot.png") | |
| def fig_feature_distributions(feature_cols: list[str], top_features: list[str]) -> None: | |
| """Distribution of top-8 features by AI vs Human.""" | |
| with open(FEATURES_CSV, "r", encoding="utf-8") as f: | |
| rows = list(csv.DictReader(f)) | |
| n = min(8, len(top_features)) | |
| fig, axes = plt.subplots(2, 4, figsize=(16, 8)) | |
| axes = axes.flatten() | |
| for i in range(n): | |
| col = top_features[i] | |
| ai_vals, hum_vals = [], [] | |
| for r in rows: | |
| try: | |
| v = float(r[col]) | |
| if np.isnan(v) or np.isinf(v): continue | |
| (ai_vals if r["label_int"] == "1" else hum_vals).append(v) | |
| except (ValueError, KeyError): | |
| continue | |
| ax = axes[i] | |
| # histogram overlay | |
| bins = 30 | |
| ax.hist(hum_vals, bins=bins, alpha=0.55, color=PALETTE["secondary"], | |
| label=f"İnsan (n={len(hum_vals)})", density=True) | |
| ax.hist(ai_vals, bins=bins, alpha=0.55, color=PALETTE["error"], | |
| label=f"AI (n={len(ai_vals)})", density=True) | |
| ax.set_title(col, fontsize=10, fontweight="bold") | |
| ax.set_ylabel("Yoğunluk" if i % 4 == 0 else "") | |
| ax.legend(fontsize=7, loc="best") | |
| ax.tick_params(labelsize=8) | |
| for i in range(n, len(axes)): | |
| axes[i].axis("off") | |
| fig.suptitle("AI vs İnsan — En Önemli 8 Özelliğin Dağılımı", | |
| fontsize=14, fontweight="bold", y=1.02) | |
| plt.tight_layout() | |
| plt.savefig(FIGURES_DIR / "feature_distribution_ai_vs_human.png") | |
| plt.close() | |
| print(" ✓ feature_distribution_ai_vs_human.png") | |
| def fig_shap_summary(model, scaler, feature_cols, X, max_display: int = 20) -> None: | |
| """SHAP summary — global feature importance with directional info.""" | |
| try: | |
| import shap | |
| except ImportError: | |
| print(" ! SHAP not available, skipping") | |
| return | |
| X_scaled = scaler.transform(X) | |
| # Subsample for speed | |
| if len(X_scaled) > 1000: | |
| idx = np.random.RandomState(42).choice(len(X_scaled), 1000, replace=False) | |
| X_sub = X_scaled[idx] | |
| else: | |
| X_sub = X_scaled | |
| explainer = shap.TreeExplainer(model) | |
| shap_values = explainer.shap_values(X_sub) | |
| if isinstance(shap_values, list): | |
| sv = shap_values[1] if len(shap_values) > 1 else shap_values[0] | |
| else: | |
| sv = shap_values | |
| fig = plt.figure(figsize=(10, 8)) | |
| shap.summary_plot( | |
| sv, X_sub, | |
| feature_names=feature_cols, | |
| max_display=max_display, | |
| show=False, | |
| plot_size=None, | |
| ) | |
| plt.title("SHAP Özet Grafiği — Global Özellik Etkisi", | |
| fontsize=13, fontweight="bold", pad=14) | |
| plt.savefig(FIGURES_DIR / "shap_summary.png", bbox_inches="tight") | |
| plt.close() | |
| print(" ✓ shap_summary.png") | |
| def fig_model_comparison(results: dict) -> None: | |
| """Bar chart comparing accuracy/f1/auc across all models.""" | |
| items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)] | |
| items.sort(key=lambda x: x[1].get("roc_auc", 0), reverse=True) | |
| names = [n for n, _ in items] | |
| metrics = { | |
| "Accuracy": [d["accuracy"] for _, d in items], | |
| "F1 Score": [d["f1"] for _, d in items], | |
| "ROC-AUC": [d["roc_auc"] for _, d in items], | |
| "Precision": [d["precision"] for _, d in items], | |
| "Recall": [d["recall"] for _, d in items], | |
| } | |
| x = np.arange(len(names)) | |
| width = 0.16 | |
| fig, ax = plt.subplots(figsize=(12, 6.5)) | |
| colors = [PALETTE["primary"], PALETTE["secondary"], PALETTE["error"], | |
| PALETTE["accent"], "#7a5c3c"] | |
| for i, (metric, vals) in enumerate(metrics.items()): | |
| ax.bar(x + i * width - 2 * width, vals, width, label=metric, | |
| color=colors[i], edgecolor=PALETTE["fg"], linewidth=0.3) | |
| ax.set_ylabel("Skor / Score") | |
| ax.set_title("Model Performans Karşılaştırması", fontsize=13, fontweight="bold") | |
| ax.set_xticks(x) | |
| ax.set_xticklabels(names, rotation=20, ha="right") | |
| ax.legend(loc="lower right", framealpha=0.85) | |
| ax.set_ylim([0.5, 1.0]) | |
| ax.grid(True, axis="y", alpha=0.4) | |
| plt.savefig(FIGURES_DIR / "model_comparison_bars.png") | |
| plt.close() | |
| print(" ✓ model_comparison_bars.png") | |
| def main() -> None: | |
| FIGURES_DIR.mkdir(parents=True, exist_ok=True) | |
| print(f"Output directory: {FIGURES_DIR}") | |
| print("Loading artifacts...") | |
| results, model, scaler, feature_cols = _load_artifacts() | |
| importance = results.get("_feature_importance", {}) | |
| top_features = [n for n, _ in sorted( | |
| importance.items(), key=lambda x: x[1], reverse=True, | |
| )] | |
| print("\nLoading dataset...") | |
| X, y = _load_csv_data(feature_cols) | |
| X_scaled = scaler.transform(X) | |
| print("Computing 5-fold cross-validated predictions (this may take ~1-2 min)...") | |
| cache: dict = {} | |
| y_true, y_pred, y_prob = _get_cv_predictions(model, X_scaled, y, cache) | |
| print("\nGenerating figures...") | |
| fig_confusion_matrix(results, y_true, y_pred) | |
| fig_roc_comparison(results, y_true, y_prob) | |
| fig_pr_curves(results, y_true, y_prob) | |
| fig_feature_importance(results) | |
| fig_calibration(results, y_true, y_prob) | |
| fig_model_comparison(results) | |
| fig_feature_distributions(feature_cols, top_features) | |
| print("\nGenerating SHAP summary...") | |
| fig_shap_summary(model, scaler, feature_cols, X) | |
| print(f"\nDone. {len(list(FIGURES_DIR.glob('*.png')))} figures in {FIGURES_DIR}") | |
| if __name__ == "__main__": | |
| main() | |