Spaces:

Rthur2003
/

crowncode-backend

Sleeping

App Files Files Community

crowncode-backend / app /training /generate_figures.py

Rthur2003

feat: update output directory for generated figures to docs/academic/figures

983f3ef about 1 month ago

raw

history blame contribute delete

15.7 kB

	"""Generate academic-quality figures from training results.

	Produces publication-ready figures in docs/academic/figures/:
	- confusion_matrix.png
	- roc_curves_comparison.png
	- precision_recall_curves.png
	- feature_importance_top20.png
	- calibration_plot.png
	- feature_distribution_ai_vs_human.png
	- shap_summary.png
	- model_comparison_bars.png

	Usage:
	python -m app.training.generate_figures
	"""

	from __future__ import annotations

	import csv
	import json
	import pickle
	from pathlib import Path

	import numpy as np
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	from matplotlib.colors import LinearSegmentedColormap

	from sklearn.metrics import (
	confusion_matrix, roc_curve, auc,
	precision_recall_curve, average_precision_score,
	)
	from sklearn.calibration import calibration_curve
	from sklearn.model_selection import StratifiedKFold, cross_val_predict
	from sklearn.base import clone

	# ── Paths ────────────────────────────────────────────────────────────────
	BACKEND = Path(__file__).resolve().parents[2]
	MODELS_DIR = BACKEND / "models"
	DATASET_DIR = BACKEND.parent / "DataSet"
	FIGURES_DIR = BACKEND.parent / "docs" / "academic" / "figures"
	FEATURES_CSV = DATASET_DIR / "features.csv"

	# ── Theme (AURIS parchment gold palette) ─────────────────────────────────
	PALETTE = {
	"bg": "#faf6ed",
	"fg": "#3d2817",
	"primary": "#c99347",
	"secondary": "#7fb069",
	"error": "#a64b3c",
	"grid": "#d8c9a8",
	"accent": "#e7c77a",
	}

	plt.rcParams.update({
	"figure.facecolor": PALETTE["bg"],
	"axes.facecolor": PALETTE["bg"],
	"axes.edgecolor": PALETTE["fg"],
	"axes.labelcolor": PALETTE["fg"],
	"xtick.color": PALETTE["fg"],
	"ytick.color": PALETTE["fg"],
	"text.color": PALETTE["fg"],
	"font.family": "DejaVu Sans",
	"font.size": 11,
	"axes.grid": True,
	"grid.color": PALETTE["grid"],
	"grid.alpha": 0.4,
	"savefig.dpi": 150,
	"savefig.bbox": "tight",
	"figure.dpi": 100,
	})


	def _load_artifacts():
	"""Load training results, model, features CSV."""
	with open(MODELS_DIR / "training_results.json", "r") as f:
	results = json.load(f)
	with open(MODELS_DIR / "auris_classifier_v1.pkl", "rb") as f:
	model = pickle.load(f)
	with open(MODELS_DIR / "feature_scaler_v1.pkl", "rb") as f:
	scaler = pickle.load(f)
	with open(MODELS_DIR / "feature_columns_v1.json", "r") as f:
	feature_cols = json.load(f)
	return results, model, scaler, feature_cols


	def _load_csv_data(feature_cols):
	with open(FEATURES_CSV, "r", encoding="utf-8") as f:
	rows = list(csv.DictReader(f))
	X = np.array([[float(r[c]) for c in feature_cols] for r in rows])
	X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
	y = np.array([int(r["label_int"]) for r in rows])
	return X, y


	def _get_cv_predictions(model, X_scaled, y, cache: dict) -> tuple:
	"""Cross-validated predictions with caching across figures."""
	key = id(model)
	if key in cache:
	return cache[key]
	cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
	y_prob = cross_val_predict(
	clone(model), X_scaled, y, cv=cv, method="predict_proba", n_jobs=-1,
	)[:, 1]
	y_pred = (y_prob > 0.5).astype(int)
	cache[key] = (y, y_pred, y_prob)
	return y, y_pred, y_prob


	def fig_confusion_matrix(results: dict, y_true: np.ndarray, y_pred: np.ndarray) -> None:
	"""Confusion matrix for the best model (CV predictions)."""
	best = results.get("_best_model", "XGBoost")
	data = results.get(best, {})
	cm = confusion_matrix(y_true, y_pred)

	fig, ax = plt.subplots(figsize=(6.5, 5.5))
	cmap = LinearSegmentedColormap.from_list(
	"auris", [PALETTE["bg"], PALETTE["primary"]],
	)
	im = ax.imshow(cm, cmap=cmap, aspect="auto")
	acc = data.get("accuracy", (y_true == y_pred).mean())
	f1v = data.get("f1", 0.0)
	aucv = data.get("roc_auc", 0.0)
	ax.set_title(
	f"Karışıklık Matrisi — {best}\n"
	f"Accuracy: {acc:.1%} F1: {f1v:.3f} AUC: {aucv:.3f}",
	fontsize=13, fontweight="bold",
	)
	classes = ["İnsan / Human", "AI"]
	ax.set_xticks([0, 1])
	ax.set_yticks([0, 1])
	ax.set_xticklabels(classes)
	ax.set_yticklabels(classes)
	ax.set_xlabel("Tahmin / Predicted")
	ax.set_ylabel("Gerçek / Actual")

	# cell annotations with count + percentage
	total = cm.sum()
	for i in range(2):
	for j in range(2):
	count = cm[i, j]
	pct = 100 * count / total
	color = PALETTE["bg"] if count > total * 0.25 else PALETTE["fg"]
	ax.text(
	j, i, f"{count}\n({pct:.1f}%)",
	ha="center", va="center",
	color=color, fontsize=13, fontweight="bold",
	)

	plt.colorbar(im, ax=ax, shrink=0.7)
	plt.savefig(FIGURES_DIR / "confusion_matrix.png")
	plt.close()
	print(" ✓ confusion_matrix.png")


	def fig_roc_comparison(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
	"""ROC curve for best model + reference diagonal."""
	fig, ax = plt.subplots(figsize=(8, 6.5))
	best = results.get("_best_model", "XGBoost")
	fpr, tpr, _ = roc_curve(y_true, y_prob)
	roc_auc = auc(fpr, tpr)

	ax.plot(fpr, tpr, color=PALETTE["primary"], lw=3,
	label=f"{best} (AUC = {roc_auc:.4f})")
	ax.fill_between(fpr, tpr, alpha=0.15, color=PALETTE["primary"])
	ax.plot([0, 1], [0, 1], "k:", alpha=0.5, lw=1.5, label="Rastgele / Random")

	# Add other models as AUC markers
	for name, data in results.items():
	if name.startswith("_") or not isinstance(data, dict) or name == best:
	continue
	auc_v = data.get("roc_auc", 0)
	ax.annotate(
	f"{name}: AUC={auc_v:.3f}",
	xy=(0.45, 0.05 + 0.04 * list(results.keys()).index(name)),
	fontsize=8, alpha=0.7,
	)

	ax.set_xlabel("Yanlış Pozitif Oranı / False Positive Rate")
	ax.set_ylabel("Doğru Pozitif Oranı / True Positive Rate")
	ax.set_title("ROC Eğrisi — En İyi Model (5-fold CV)", fontsize=13, fontweight="bold")
	ax.legend(loc="lower right", framealpha=0.85)
	ax.set_xlim([0, 1])
	ax.set_ylim([0, 1.02])
	plt.savefig(FIGURES_DIR / "roc_curves_comparison.png")
	plt.close()
	print(" ✓ roc_curves_comparison.png")


	def fig_pr_curves(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
	"""Precision-Recall curve for best model."""
	fig, ax = plt.subplots(figsize=(8, 6.5))
	best = results.get("_best_model", "XGBoost")
	prec, rec, _ = precision_recall_curve(y_true, y_prob)
	ap = average_precision_score(y_true, y_prob)
	baseline = y_true.mean()

	ax.plot(rec, prec, color=PALETTE["primary"], lw=3,
	label=f"{best} (AP = {ap:.4f})")
	ax.fill_between(rec, prec, alpha=0.15, color=PALETTE["primary"])
	ax.axhline(baseline, color="k", linestyle=":", alpha=0.5,
	label=f"Baseline = {baseline:.3f}")

	ax.set_xlabel("Duyarlılık / Recall")
	ax.set_ylabel("Kesinlik / Precision")
	ax.set_title("Precision-Recall Eğrisi — En İyi Model", fontsize=13, fontweight="bold")
	ax.legend(loc="lower left", framealpha=0.85)
	ax.set_xlim([0, 1])
	ax.set_ylim([0, 1.02])
	plt.savefig(FIGURES_DIR / "precision_recall_curves.png")
	plt.close()
	print(" ✓ precision_recall_curves.png")


	def fig_feature_importance(results: dict, top_n: int = 20) -> None:
	"""Top N feature importance bar chart."""
	imp = results.get("_feature_importance", {})
	if not imp:
	return
	items = sorted(imp.items(), key=lambda x: x[1], reverse=True)[:top_n]
	names = [n for n, _ in items]
	vals = [v for _, v in items]

	fig, ax = plt.subplots(figsize=(9, 7))
	y_pos = np.arange(len(names))
	colors_grad = plt.cm.copper(np.linspace(0.3, 0.85, len(names)))
	ax.barh(y_pos, vals, color=colors_grad, edgecolor=PALETTE["fg"], linewidth=0.5)
	ax.set_yticks(y_pos)
	ax.set_yticklabels(names, fontsize=10)
	ax.invert_yaxis()
	ax.set_xlabel("Normalize Önem / Normalized Importance")
	ax.set_title(f"En Önemli {top_n} Özellik — {results.get('_best_model', 'XGBoost')}",
	fontsize=13, fontweight="bold")
	for i, v in enumerate(vals):
	ax.text(v + max(vals) * 0.01, i, f"{v:.4f}", va="center", fontsize=8)
	plt.savefig(FIGURES_DIR / "feature_importance_top20.png")
	plt.close()
	print(" ✓ feature_importance_top20.png")


	def fig_calibration(results: dict, y_true: np.ndarray, y_prob: np.ndarray) -> None:
	"""Calibration curve — does predicted probability match reality?"""
	fig, ax = plt.subplots(figsize=(7, 6.5))
	best = results.get("_best_model", "XGBoost")
	frac_pos, mean_pred = calibration_curve(y_true, y_prob, n_bins=10)

	ax.plot(mean_pred, frac_pos, "o-", color=PALETTE["primary"], lw=3,
	markersize=8, label=f"{best}")
	ax.fill_between(mean_pred, frac_pos, mean_pred, alpha=0.15,
	color=PALETTE["primary"])
	ax.plot([0, 1], [0, 1], "k:", alpha=0.5, label="Mükemmel / Perfect")

	# Brier score annotation
	brier = float(np.mean((y_prob - y_true) ** 2))
	ax.text(
	0.04, 0.94,
	f"Brier Score = {brier:.4f}\nN = {len(y_true)} (5-fold CV)",
	transform=ax.transAxes,
	fontsize=10, va="top",
	bbox=dict(boxstyle="round,pad=0.5", facecolor=PALETTE["bg"],
	edgecolor=PALETTE["primary"], alpha=0.85),
	)

	ax.set_xlabel("Ortalama Tahmin Olasılığı / Mean Predicted Probability")
	ax.set_ylabel("Gerçek Pozitif Oranı / Fraction of Positives")
	ax.set_title("Kalibrasyon Eğrisi — En İyi Model", fontsize=13, fontweight="bold")
	ax.legend(loc="lower right", framealpha=0.85, fontsize=10)
	ax.set_xlim([0, 1])
	ax.set_ylim([0, 1])
	plt.savefig(FIGURES_DIR / "calibration_plot.png")
	plt.close()
	print(" ✓ calibration_plot.png")


	def fig_feature_distributions(feature_cols: list[str], top_features: list[str]) -> None:
	"""Distribution of top-8 features by AI vs Human."""
	with open(FEATURES_CSV, "r", encoding="utf-8") as f:
	rows = list(csv.DictReader(f))

	n = min(8, len(top_features))
	fig, axes = plt.subplots(2, 4, figsize=(16, 8))
	axes = axes.flatten()

	for i in range(n):
	col = top_features[i]
	ai_vals, hum_vals = [], []
	for r in rows:
	try:
	v = float(r[col])
	if np.isnan(v) or np.isinf(v): continue
	(ai_vals if r["label_int"] == "1" else hum_vals).append(v)
	except (ValueError, KeyError):
	continue
	ax = axes[i]
	# histogram overlay
	bins = 30
	ax.hist(hum_vals, bins=bins, alpha=0.55, color=PALETTE["secondary"],
	label=f"İnsan (n={len(hum_vals)})", density=True)
	ax.hist(ai_vals, bins=bins, alpha=0.55, color=PALETTE["error"],
	label=f"AI (n={len(ai_vals)})", density=True)
	ax.set_title(col, fontsize=10, fontweight="bold")
	ax.set_ylabel("Yoğunluk" if i % 4 == 0 else "")
	ax.legend(fontsize=7, loc="best")
	ax.tick_params(labelsize=8)

	for i in range(n, len(axes)):
	axes[i].axis("off")

	fig.suptitle("AI vs İnsan — En Önemli 8 Özelliğin Dağılımı",
	fontsize=14, fontweight="bold", y=1.02)
	plt.tight_layout()
	plt.savefig(FIGURES_DIR / "feature_distribution_ai_vs_human.png")
	plt.close()
	print(" ✓ feature_distribution_ai_vs_human.png")


	def fig_shap_summary(model, scaler, feature_cols, X, max_display: int = 20) -> None:
	"""SHAP summary — global feature importance with directional info."""
	try:
	import shap
	except ImportError:
	print(" ! SHAP not available, skipping")
	return

	X_scaled = scaler.transform(X)
	# Subsample for speed
	if len(X_scaled) > 1000:
	idx = np.random.RandomState(42).choice(len(X_scaled), 1000, replace=False)
	X_sub = X_scaled[idx]
	else:
	X_sub = X_scaled

	explainer = shap.TreeExplainer(model)
	shap_values = explainer.shap_values(X_sub)

	if isinstance(shap_values, list):
	sv = shap_values[1] if len(shap_values) > 1 else shap_values[0]
	else:
	sv = shap_values

	fig = plt.figure(figsize=(10, 8))
	shap.summary_plot(
	sv, X_sub,
	feature_names=feature_cols,
	max_display=max_display,
	show=False,
	plot_size=None,
	)
	plt.title("SHAP Özet Grafiği — Global Özellik Etkisi",
	fontsize=13, fontweight="bold", pad=14)
	plt.savefig(FIGURES_DIR / "shap_summary.png", bbox_inches="tight")
	plt.close()
	print(" ✓ shap_summary.png")


	def fig_model_comparison(results: dict) -> None:
	"""Bar chart comparing accuracy/f1/auc across all models."""
	items = [(k, v) for k, v in results.items() if not k.startswith("_") and isinstance(v, dict)]
	items.sort(key=lambda x: x[1].get("roc_auc", 0), reverse=True)

	names = [n for n, _ in items]
	metrics = {
	"Accuracy": [d["accuracy"] for _, d in items],
	"F1 Score": [d["f1"] for _, d in items],
	"ROC-AUC": [d["roc_auc"] for _, d in items],
	"Precision": [d["precision"] for _, d in items],
	"Recall": [d["recall"] for _, d in items],
	}

	x = np.arange(len(names))
	width = 0.16
	fig, ax = plt.subplots(figsize=(12, 6.5))
	colors = [PALETTE["primary"], PALETTE["secondary"], PALETTE["error"],
	PALETTE["accent"], "#7a5c3c"]

	for i, (metric, vals) in enumerate(metrics.items()):
	ax.bar(x + i * width - 2 * width, vals, width, label=metric,
	color=colors[i], edgecolor=PALETTE["fg"], linewidth=0.3)

	ax.set_ylabel("Skor / Score")
	ax.set_title("Model Performans Karşılaştırması", fontsize=13, fontweight="bold")
	ax.set_xticks(x)
	ax.set_xticklabels(names, rotation=20, ha="right")
	ax.legend(loc="lower right", framealpha=0.85)
	ax.set_ylim([0.5, 1.0])
	ax.grid(True, axis="y", alpha=0.4)

	plt.savefig(FIGURES_DIR / "model_comparison_bars.png")
	plt.close()
	print(" ✓ model_comparison_bars.png")


	def main() -> None:
	FIGURES_DIR.mkdir(parents=True, exist_ok=True)
	print(f"Output directory: {FIGURES_DIR}")
	print("Loading artifacts...")
	results, model, scaler, feature_cols = _load_artifacts()

	importance = results.get("_feature_importance", {})
	top_features = [n for n, _ in sorted(
	importance.items(), key=lambda x: x[1], reverse=True,
	)]

	print("\nLoading dataset...")
	X, y = _load_csv_data(feature_cols)
	X_scaled = scaler.transform(X)

	print("Computing 5-fold cross-validated predictions (this may take ~1-2 min)...")
	cache: dict = {}
	y_true, y_pred, y_prob = _get_cv_predictions(model, X_scaled, y, cache)

	print("\nGenerating figures...")
	fig_confusion_matrix(results, y_true, y_pred)
	fig_roc_comparison(results, y_true, y_prob)
	fig_pr_curves(results, y_true, y_prob)
	fig_feature_importance(results)
	fig_calibration(results, y_true, y_prob)
	fig_model_comparison(results)
	fig_feature_distributions(feature_cols, top_features)

	print("\nGenerating SHAP summary...")
	fig_shap_summary(model, scaler, feature_cols, X)

	print(f"\nDone. {len(list(FIGURES_DIR.glob('*.png')))} figures in {FIGURES_DIR}")


	if __name__ == "__main__":
	main()