Spaces:

Rthur2003
/

crowncode-backend

Sleeping

App Files Files Community

Rthur2003 commited on Apr 16

Commit

20fe6c3

1 Parent(s): 94ed8e9

Add Gradio dependency to requirements.txt with version constraints

Browse files

Files changed (4) hide show

app/training/evaluate.py +2 -2
app/training/train_classifier.py +421 -154
local_demo.py +901 -503
requirements.txt +2 -1

app/training/evaluate.py CHANGED Viewed

@@ -111,7 +111,7 @@ def evaluate_predictions(
     # Print report
     print(f"\n{'=' * 50}")
-    print(f"  {title} — Evaluation Report")
     print(f"{'=' * 50}")
     print(f"  Accuracy:  {acc:.4f} ({acc:.1%})")
     print(f"  Precision: {prec:.4f}")
@@ -176,7 +176,7 @@ def evaluate_heuristic_baseline(features_csv: str | Path) -> dict:
     y_pred_combined = (combined_scores > 0.5).astype(int)
     print("\n" + "=" * 60)
-    print("  BASELINE EVALUATION — Current Heuristic System")
     print("=" * 60)
     print("\n--- Heuristic Only (spectral + temporal + harmonic) ---")

     # Print report
     print(f"\n{'=' * 50}")
+    print(f"  {title} - Evaluation Report")
     print(f"{'=' * 50}")
     print(f"  Accuracy:  {acc:.4f} ({acc:.1%})")
     print(f"  Precision: {prec:.4f}")
     y_pred_combined = (combined_scores > 0.5).astype(int)
     print("\n" + "=" * 60)
+    print("  BASELINE EVALUATION - Current Heuristic System")
     print("=" * 60)
     print("\n--- Heuristic Only (spectral + temporal + harmonic) ---")

app/training/train_classifier.py CHANGED Viewed

@@ -2,25 +2,26 @@
 Comprehensive multi-model training pipeline for AURIS.
 Trains and evaluates multiple classifier families on extracted
-audio features using stratified k-fold cross-validation, then
-selects the best model and exports it for production use.
 Models compared:
   - Random Forest
   - Gradient Boosting
-  - XGBoost
-  - LightGBM
   - Support Vector Machine (RBF)
-  - Multi-Layer Perceptron (Neural Network)
 Usage:
     python -m app.training.train_classifier data/training/features.csv
 Outputs:
-    models/auris_classifier_v1.pkl   — best trained model
-    models/feature_scaler_v1.pkl     — fitted StandardScaler
-    models/feature_columns_v1.json   — ordered feature column names
-    models/training_results.json     — all model metrics + CV folds
 """
 from __future__ import annotations
@@ -30,23 +31,15 @@ import json
 import pickle
 import sys
 import time
 from pathlib import Path
 from typing import Any
 import numpy as np
-from sklearn.ensemble import (
-    GradientBoostingClassifier,
-    RandomForestClassifier,
-)
 from sklearn.linear_model import LogisticRegression
-from sklearn.neural_network import MLPClassifier
-from sklearn.svm import SVC
-from sklearn.model_selection import (
-    StratifiedKFold,
-    cross_val_predict,
-)
-from sklearn.preprocessing import StandardScaler
 from sklearn.metrics import (
     accuracy_score,
     f1_score,
@@ -54,10 +47,16 @@ from sklearn.metrics import (
     recall_score,
     roc_auc_score,
 )
 # Optional: XGBoost
 try:
     import xgboost as xgb
     HAS_XGB = True
 except ImportError:
     HAS_XGB = False
@@ -65,15 +64,63 @@ except ImportError:
 # Optional: LightGBM
 try:
     import lightgbm as lgb
     HAS_LGBM = True
 except ImportError:
     HAS_LGBM = False
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
-from app.training.evaluate import (
-    load_features_csv,
-    evaluate_predictions,
-)
 def train(
@@ -87,53 +134,42 @@ def train(
     Returns:
         Dict with per-model metrics, best model info, and saved paths.
     """
     models_dir = Path(models_dir)
     models_dir.mkdir(parents=True, exist_ok=True)
-    # ── Load data ──────────────────────────────────
     X, y = load_features_csv(features_csv)
-    with open(features_csv, "r", encoding="utf-8") as f:
-        reader = csv.DictReader(f)
-        # duration_sec and sample_rate are metadata, not audio features —
-        # including them causes data leakage (duration correlates with source, not content)
-        _EXCLUDE = {"file_path", "label_int", "duration_sec", "sample_rate"}
-        feature_cols = [
-            c for c in reader.fieldnames
-            if c not in _EXCLUDE
-        ]
-    # ── Handle NaN/Inf ─���───────────────────────────
     X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
-    # ── Scale features ─────────────────────────────
     scaler = StandardScaler()
     X_scaled = scaler.fit_transform(X)
-    # ── Train multiple models ──────────────────────
-    candidates = _build_candidates()
-    cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
-    best_model = None
     best_name = ""
-    best_auc = 0.0
     all_results: dict[str, dict[str, Any]] = {}
-    for name, model in candidates:
-        print(f"\n{'─' * 50}")
-        print(f"  Training: {name}")
-        print(f"{'─' * 50}")
         t0 = time.time()
-        # Cross-validated probability predictions
-        y_prob = cross_val_predict(
-            model, X_scaled, y,
-            cv=cv, method="predict_proba",
-        )[:, 1]
-        y_pred = (y_prob > 0.5).astype(int)
-        train_time = time.time() - t0
         acc = accuracy_score(y, y_pred)
         prec = precision_score(y, y_pred, zero_division=0)
@@ -141,12 +177,14 @@ def train(
         f1 = f1_score(y, y_pred, zero_division=0)
         auc = roc_auc_score(y, y_prob)
-        print(f"  Accuracy:   {acc:.4f}")
-        print(f"  Precision:  {prec:.4f}")
-        print(f"  Recall:     {rec:.4f}")
-        print(f"  F1 Score:   {f1:.4f}")
-        print(f"  ROC-AUC:    {auc:.4f}")
-        print(f"  Train time: {train_time:.1f}s")
         all_results[name] = {
             "accuracy": round(acc, 4),
@@ -154,7 +192,10 @@ def train(
             "recall": round(rec, 4),
             "f1": round(f1, 4),
             "roc_auc": round(auc, 4),
-            "train_time_sec": round(train_time, 2),
             "y_true": y.tolist(),
             "y_pred": y_pred.tolist(),
             "y_prob": y_prob.tolist(),
@@ -163,48 +204,38 @@ def train(
         if auc > best_auc:
             best_auc = auc
             best_name = name
-            best_model = model
-    # ── Final evaluation of best model ─────────────
-    print(f"\n{'═' * 60}")
-    print(f"  BEST MODEL: {best_name}  (ROC-AUC = {best_auc:.4f})")
-    print(f"{'═' * 60}")
     y_prob_best = np.array(all_results[best_name]["y_prob"])
     y_pred_best = np.array(all_results[best_name]["y_pred"])
-    evaluate_predictions(
-        y, y_pred_best, y_prob_best,
-        title=f"Best: {best_name}",
-    )
-    # ── Train ALL models on full data ────────────────
     all_model_paths: dict[str, str] = {}
-    for name, model in candidates:
-        print(f"\nTraining final {name} on all {len(y)} samples...")
-        model.fit(X_scaled, y)
-        safe_name = name.lower().replace(" ", "_").replace("(", "").replace(")", "")
-        model_pkl = models_dir / f"model_{safe_name}.pkl"
         with open(model_pkl, "wb") as f:
-            pickle.dump(model, f)
         all_model_paths[name] = str(model_pkl)
-        print(f"  → Saved: {model_pkl}")
-    best_model = None
-    for name, model in candidates:
-        if name == best_name:
-            best_model = model
-            break
-    # ── Feature importance ─────────────────────────
     importance_data = _extract_importance(best_model, feature_cols)
     if importance_data:
         print("\nTop 15 features:")
         for fname, imp in importance_data[:15]:
-            bar = "█" * int(imp * 100)
-            print(f"  {fname:<35} {imp:.4f} {bar}")
-    # ── Save artifacts ─────────────────────────────
     model_path = models_dir / "auris_classifier_v1.pkl"
     scaler_path = models_dir / "feature_scaler_v1.pkl"
     columns_path = models_dir / "feature_columns_v1.json"
@@ -214,31 +245,38 @@ def train(
         pickle.dump(best_model, f)
     with open(scaler_path, "wb") as f:
         pickle.dump(scaler, f)
-    with open(columns_path, "w") as f:
         json.dump(feature_cols, f, indent=2)
-    # Save full results (without numpy arrays for JSON)
-    json_results = {}
     for name, data in all_results.items():
         json_results[name] = {
-            k: v for k, v in data.items()
-            if k not in ("y_true", "y_pred", "y_prob")
         }
     json_results["_best_model"] = best_name
     json_results["_n_samples"] = len(y)
     json_results["_n_features"] = X.shape[1]
     json_results["_n_folds"] = n_folds
-    json_results["_data_leakage_fix"] = "duration_sec and sample_rate removed from features (v2)"
     json_results["_model_paths"] = all_model_paths
     if importance_data:
         json_results["_feature_importance"] = {
-            name: round(imp, 6) for name, imp in importance_data
         }
-    with open(results_path, "w") as f:
         json.dump(json_results, f, indent=2)
-    print(f"\nSaved:")
     print(f"  Model:   {model_path}")
     print(f"  Scaler:  {scaler_path}")
     print(f"  Columns: {columns_path}")
@@ -253,116 +291,346 @@ def train(
     }
-def _build_candidates() -> list[tuple[str, Any]]:
-    """Build list of classifier candidates to evaluate."""
-    candidates: list[tuple[str, Any]] = [
-        (
-            "Logistic Regression",
             LogisticRegression(
-                C=1.0,
-                max_iter=1000,
                 class_weight="balanced",
                 random_state=42,
             ),
-        ),
-        (
-            "Random Forest",
             RandomForestClassifier(
-                n_estimators=200,
-                max_depth=10,
-                min_samples_leaf=10,
-                min_samples_split=15,
-                class_weight="balanced",
                 random_state=42,
                 n_jobs=-1,
             ),
-        ),
-        (
-            "Gradient Boosting",
             GradientBoostingClassifier(
                 n_estimators=200,
                 max_depth=4,
-                learning_rate=0.08,
                 subsample=0.75,
-                min_samples_leaf=12,
-                min_samples_split=20,
                 random_state=42,
             ),
-        ),
-        (
-            "SVM (RBF)",
             SVC(
                 kernel="rbf",
-                C=10.0,
                 gamma="scale",
                 class_weight="balanced",
                 probability=True,
                 random_state=42,
             ),
-        ),
-        (
-            "MLP Neural Network",
             MLPClassifier(
-                hidden_layer_sizes=(128, 64, 32),
                 activation="relu",
                 solver="adam",
-                alpha=0.001,
                 learning_rate="adaptive",
                 max_iter=500,
                 early_stopping=True,
                 validation_fraction=0.15,
                 random_state=42,
             ),
-        ),
-    ]
     if HAS_XGB:
-        candidates.append((
-            "XGBoost",
             xgb.XGBClassifier(
-                n_estimators=200,
                 max_depth=4,
-                learning_rate=0.08,
                 subsample=0.75,
                 colsample_bytree=0.75,
-                min_child_weight=8,
-                reg_alpha=0.3,
                 reg_lambda=1.5,
                 gamma=0.2,
-                scale_pos_weight=1.0,
                 eval_metric="logloss",
                 random_state=42,
                 verbosity=0,
             ),
-        ))
     if HAS_LGBM:
-        candidates.append((
-            "LightGBM",
             lgb.LGBMClassifier(
-                n_estimators=200,
-                max_depth=4,
-                learning_rate=0.08,
-                num_leaves=12,
                 subsample=0.75,
                 colsample_bytree=0.75,
-                min_child_weight=8,
                 reg_alpha=0.3,
                 reg_lambda=1.5,
                 class_weight="balanced",
                 random_state=42,
                 verbose=-1,
             ),
-        ))
-    return candidates
 def _extract_importance(
     model: Any,
     feature_cols: list[str],
 ) -> list[tuple[str, float]]:
-    """Extract feature importance from the trained model."""
     importances = None
     if hasattr(model, "feature_importances_"):
@@ -373,14 +641,13 @@ def _extract_importance(
     if importances is None:
         return []
-    # Normalize to sum to 1
     total = np.sum(importances)
     if total > 0:
         importances = importances / total
     return sorted(
         zip(feature_cols, importances.tolist()),
-        key=lambda x: x[1],
         reverse=True,
     )

 Comprehensive multi-model training pipeline for AURIS.
 Trains and evaluates multiple classifier families on extracted
+audio features using stratified cross-validation, then selects
+the best model and exports it for production use.
 Models compared:
+  - Logistic Regression
   - Random Forest
   - Gradient Boosting
   - Support Vector Machine (RBF)
+  - Multi-Layer Perceptron
+  - XGBoost (optional)
+  - LightGBM (optional)
 Usage:
     python -m app.training.train_classifier data/training/features.csv
 Outputs:
+    models/auris_classifier_v1.pkl   - best trained model
+    models/feature_scaler_v1.pkl     - fitted StandardScaler
+    models/feature_columns_v1.json   - ordered feature column names
+    models/training_results.json     - model metrics and metadata
 """
 from __future__ import annotations
 import pickle
 import sys
 import time
+import warnings
 from pathlib import Path
 from typing import Any
 import numpy as np
+from sklearn.base import clone
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import (
     accuracy_score,
     f1_score,
     recall_score,
     roc_auc_score,
 )
+from sklearn.model_selection import StratifiedKFold, cross_val_predict, train_test_split
+from sklearn.neural_network import MLPClassifier
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
 # Optional: XGBoost
 try:
     import xgboost as xgb
     HAS_XGB = True
 except ImportError:
     HAS_XGB = False
 # Optional: LightGBM
 try:
     import lightgbm as lgb
     HAS_LGBM = True
 except ImportError:
     HAS_LGBM = False
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+from app.training.evaluate import evaluate_predictions, load_features_csv
+_EXCLUDED_COLUMNS = {"file_path", "label_int", "duration_sec", "sample_rate"}
+_TUNED_PARAM_KEYS: dict[str, tuple[str, ...]] = {
+    "Logistic Regression": ("C", "class_weight", "max_iter"),
+    "Random Forest": (
+        "n_estimators",
+        "max_depth",
+        "min_samples_leaf",
+        "min_samples_split",
+        "class_weight",
+        "max_features",
+    ),
+    "Gradient Boosting": (
+        "n_estimators",
+        "max_depth",
+        "learning_rate",
+        "subsample",
+        "min_samples_leaf",
+        "min_samples_split",
+    ),
+    "SVM (RBF)": ("C", "gamma", "class_weight"),
+    "MLP Neural Network": (
+        "hidden_layer_sizes",
+        "alpha",
+        "max_iter",
+        "validation_fraction",
+    ),
+    "XGBoost": (
+        "n_estimators",
+        "max_depth",
+        "learning_rate",
+        "subsample",
+        "colsample_bytree",
+        "min_child_weight",
+        "reg_alpha",
+        "reg_lambda",
+        "gamma",
+    ),
+    "LightGBM": (
+        "n_estimators",
+        "max_depth",
+        "learning_rate",
+        "num_leaves",
+        "subsample",
+        "colsample_bytree",
+        "min_child_samples",
+        "reg_alpha",
+        "reg_lambda",
+    ),
+}
 def train(
     Returns:
         Dict with per-model metrics, best model info, and saved paths.
     """
+    features_csv = Path(features_csv)
     models_dir = Path(models_dir)
     models_dir.mkdir(parents=True, exist_ok=True)
     X, y = load_features_csv(features_csv)
+    feature_cols = _load_feature_columns(features_csv)
     X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
+    selected_candidates, tuning_results = _select_best_candidates(X, y)
+    cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
     scaler = StandardScaler()
     X_scaled = scaler.fit_transform(X)
     best_name = ""
+    best_auc = -1.0
     all_results: dict[str, dict[str, Any]] = {}
+    for name, model in selected_candidates:
+        print("\n" + "-" * 56)
+        print(f"Training: {name}")
+        print("-" * 56)
         t0 = time.time()
+        pipeline = _build_eval_pipeline(model)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=ConvergenceWarning)
+            y_prob = cross_val_predict(
+                pipeline,
+                X,
+                y,
+                cv=cv,
+                method="predict_proba",
+            )[:, 1]
+        y_pred = (y_prob >= 0.5).astype(int)
+        cv_time = time.time() - t0
         acc = accuracy_score(y, y_pred)
         prec = precision_score(y, y_pred, zero_division=0)
         f1 = f1_score(y, y_pred, zero_division=0)
         auc = roc_auc_score(y, y_prob)
+        tuning_meta = tuning_results.get(name, {})
+        print(f"  Validation AUC: {tuning_meta.get('validation_auc', 0.0):.4f}")
+        print(f"  CV Accuracy:    {acc:.4f}")
+        print(f"  CV Precision:   {prec:.4f}")
+        print(f"  CV Recall:      {rec:.4f}")
+        print(f"  CV F1 Score:    {f1:.4f}")
+        print(f"  CV ROC-AUC:     {auc:.4f}")
+        print(f"  CV Time:        {cv_time:.1f}s")
         all_results[name] = {
             "accuracy": round(acc, 4),
             "recall": round(rec, 4),
             "f1": round(f1, 4),
             "roc_auc": round(auc, 4),
+            "validation_auc": round(tuning_meta.get("validation_auc", 0.0), 4),
+            "selection_time_sec": round(tuning_meta.get("selection_time_sec", 0.0), 2),
+            "train_time_sec": round(cv_time, 2),
+            "selected_params": tuning_meta.get("selected_params", {}),
             "y_true": y.tolist(),
             "y_pred": y_pred.tolist(),
             "y_prob": y_prob.tolist(),
         if auc > best_auc:
             best_auc = auc
             best_name = name
+    print("\n" + "=" * 64)
+    print(f"BEST MODEL: {best_name} (ROC-AUC = {best_auc:.4f})")
+    print("=" * 64)
     y_prob_best = np.array(all_results[best_name]["y_prob"])
     y_pred_best = np.array(all_results[best_name]["y_pred"])
+    evaluate_predictions(y, y_pred_best, y_prob_best, title=f"Best: {best_name}")
+    fitted_models: dict[str, Any] = {}
     all_model_paths: dict[str, str] = {}
+    for name, model in selected_candidates:
+        print(f"\nFitting final {name} on all {len(y)} samples...")
+        final_model = clone(model)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=ConvergenceWarning)
+            final_model.fit(X_scaled, y)
+        fitted_models[name] = final_model
+        model_pkl = models_dir / f"model_{_safe_model_name(name)}.pkl"
         with open(model_pkl, "wb") as f:
+            pickle.dump(final_model, f)
         all_model_paths[name] = str(model_pkl)
+        print(f"  Saved: {model_pkl}")
+    best_model = fitted_models[best_name]
     importance_data = _extract_importance(best_model, feature_cols)
     if importance_data:
         print("\nTop 15 features:")
         for fname, imp in importance_data[:15]:
+            print(f"  {fname:<35} {imp:.4f}")
     model_path = models_dir / "auris_classifier_v1.pkl"
     scaler_path = models_dir / "feature_scaler_v1.pkl"
     columns_path = models_dir / "feature_columns_v1.json"
         pickle.dump(best_model, f)
     with open(scaler_path, "wb") as f:
         pickle.dump(scaler, f)
+    with open(columns_path, "w", encoding="utf-8") as f:
         json.dump(feature_cols, f, indent=2)
+    json_results: dict[str, Any] = {}
     for name, data in all_results.items():
         json_results[name] = {
+            key: value
+            for key, value in data.items()
+            if key not in ("y_true", "y_pred", "y_prob")
         }
     json_results["_best_model"] = best_name
     json_results["_n_samples"] = len(y)
     json_results["_n_features"] = X.shape[1]
     json_results["_n_folds"] = n_folds
+    json_results["_dataset_path"] = str(features_csv)
+    json_results["_class_balance"] = {
+        "ai": int(np.sum(y == 1)),
+        "human": int(np.sum(y == 0)),
+    }
+    json_results["_data_leakage_fix"] = (
+        "duration_sec and sample_rate removed from features; scaler fitted per fold during CV"
+    )
     json_results["_model_paths"] = all_model_paths
     if importance_data:
         json_results["_feature_importance"] = {
+            feature_name: round(imp, 6) for feature_name, imp in importance_data
         }
+    with open(results_path, "w", encoding="utf-8") as f:
         json.dump(json_results, f, indent=2)
+    print("\nSaved artifacts:")
     print(f"  Model:   {model_path}")
     print(f"  Scaler:  {scaler_path}")
     print(f"  Columns: {columns_path}")
     }
+def _load_feature_columns(features_csv: Path) -> list[str]:
+    with open(features_csv, "r", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        return [
+            column
+            for column in (reader.fieldnames or [])
+            if column not in _EXCLUDED_COLUMNS
+        ]
+def _select_best_candidates(
+    X: np.ndarray,
+    y: np.ndarray,
+) -> tuple[list[tuple[str, Any]], dict[str, dict[str, Any]]]:
+    """
+    Pick one tuned configuration per model family using a stratified holdout.
+    """
+    X_train, X_val, y_train, y_val = train_test_split(
+        X,
+        y,
+        test_size=0.2,
+        stratify=y,
+        random_state=42,
+    )
+    selected: list[tuple[str, Any]] = []
+    tuning_results: dict[str, dict[str, Any]] = {}
+    for name, variants in _build_candidate_families().items():
+        print("\n" + "." * 56)
+        print(f"Selecting hyperparameters for: {name}")
+        print("." * 56)
+        best_model = None
+        best_auc = -1.0
+        best_params: dict[str, Any] = {}
+        selection_start = time.time()
+        for idx, model in enumerate(variants, start=1):
+            pipeline = _build_eval_pipeline(model)
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", category=ConvergenceWarning)
+                pipeline.fit(X_train, y_train)
+            y_prob = pipeline.predict_proba(X_val)[:, 1]
+            auc = roc_auc_score(y_val, y_prob)
+            params = _summarize_selected_params(name, model)
+            print(f"  Candidate {idx}: holdout AUC={auc:.4f} | params={params}")
+            if auc > best_auc:
+                best_auc = auc
+                best_model = model
+                best_params = params
+        if best_model is None:
+            raise RuntimeError(f"No valid candidate selected for {name}")
+        tuning_results[name] = {
+            "validation_auc": float(best_auc),
+            "selected_params": best_params,
+            "selection_time_sec": time.time() - selection_start,
+        }
+        selected.append((name, best_model))
+        print(f"  Selected {name}: AUC={best_auc:.4f}")
+    return selected, tuning_results
+def _build_candidate_families() -> dict[str, list[Any]]:
+    families: dict[str, list[Any]] = {
+        "Logistic Regression": [
             LogisticRegression(
+                C=value,
+                max_iter=2500,
                 class_weight="balanced",
                 random_state=42,
+            )
+            for value in (0.25, 0.5, 1.0, 2.0)
+        ],
+        "Random Forest": [
+            RandomForestClassifier(
+                n_estimators=300,
+                max_depth=12,
+                min_samples_leaf=4,
+                min_samples_split=8,
+                max_features="sqrt",
+                class_weight="balanced_subsample",
+                random_state=42,
+                n_jobs=-1,
+            ),
+            RandomForestClassifier(
+                n_estimators=450,
+                max_depth=18,
+                min_samples_leaf=2,
+                min_samples_split=4,
+                max_features="sqrt",
+                class_weight="balanced_subsample",
+                random_state=42,
+                n_jobs=-1,
             ),
             RandomForestClassifier(
+                n_estimators=500,
+                max_depth=None,
+                min_samples_leaf=1,
+                min_samples_split=2,
+                max_features="log2",
+                class_weight="balanced_subsample",
                 random_state=42,
                 n_jobs=-1,
             ),
+        ],
+        "Gradient Boosting": [
             GradientBoostingClassifier(
                 n_estimators=200,
+                max_depth=3,
+                learning_rate=0.05,
+                subsample=0.8,
+                min_samples_leaf=10,
+                min_samples_split=20,
+                random_state=42,
+            ),
+            GradientBoostingClassifier(
+                n_estimators=260,
+                max_depth=2,
+                learning_rate=0.04,
+                subsample=0.85,
+                min_samples_leaf=12,
+                min_samples_split=24,
+                random_state=42,
+            ),
+            GradientBoostingClassifier(
+                n_estimators=180,
                 max_depth=4,
+                learning_rate=0.07,
                 subsample=0.75,
+                min_samples_leaf=8,
+                min_samples_split=16,
                 random_state=42,
             ),
+        ],
+        "SVM (RBF)": [
             SVC(
                 kernel="rbf",
+                C=1.0,
+                gamma="scale",
+                class_weight="balanced",
+                probability=True,
+                random_state=42,
+            ),
+            SVC(
+                kernel="rbf",
+                C=3.0,
                 gamma="scale",
                 class_weight="balanced",
                 probability=True,
                 random_state=42,
             ),
+            SVC(
+                kernel="rbf",
+                C=6.0,
+                gamma=0.02,
+                class_weight="balanced",
+                probability=True,
+                random_state=42,
+            ),
+            SVC(
+                kernel="rbf",
+                C=10.0,
+                gamma=0.05,
+                class_weight="balanced",
+                probability=True,
+                random_state=42,
+            ),
+        ],
+        "MLP Neural Network": [
             MLPClassifier(
+                hidden_layer_sizes=(128, 64),
                 activation="relu",
                 solver="adam",
+                alpha=0.0005,
                 learning_rate="adaptive",
                 max_iter=500,
                 early_stopping=True,
                 validation_fraction=0.15,
                 random_state=42,
             ),
+            MLPClassifier(
+                hidden_layer_sizes=(192, 96, 32),
+                activation="relu",
+                solver="adam",
+                alpha=0.001,
+                learning_rate="adaptive",
+                max_iter=600,
+                early_stopping=True,
+                validation_fraction=0.15,
+                random_state=42,
+            ),
+            MLPClassifier(
+                hidden_layer_sizes=(256, 128),
+                activation="relu",
+                solver="adam",
+                alpha=0.002,
+                learning_rate="adaptive",
+                max_iter=700,
+                early_stopping=True,
+                validation_fraction=0.15,
+                random_state=42,
+            ),
+        ],
+    }
     if HAS_XGB:
+        families["XGBoost"] = [
             xgb.XGBClassifier(
+                n_estimators=300,
                 max_depth=4,
+                learning_rate=0.05,
+                subsample=0.8,
+                colsample_bytree=0.8,
+                min_child_weight=4,
+                reg_alpha=0.2,
+                reg_lambda=1.2,
+                gamma=0.1,
+                eval_metric="logloss",
+                tree_method="hist",
+                random_state=42,
+                n_jobs=-1,
+                verbosity=0,
+            ),
+            xgb.XGBClassifier(
+                n_estimators=500,
+                max_depth=3,
+                learning_rate=0.03,
+                subsample=0.9,
+                colsample_bytree=0.8,
+                min_child_weight=2,
+                reg_alpha=0.1,
+                reg_lambda=1.0,
+                gamma=0.0,
+                eval_metric="logloss",
+                tree_method="hist",
+                random_state=42,
+                n_jobs=-1,
+                verbosity=0,
+            ),
+            xgb.XGBClassifier(
+                n_estimators=240,
+                max_depth=5,
+                learning_rate=0.06,
                 subsample=0.75,
                 colsample_bytree=0.75,
+                min_child_weight=6,
+                reg_alpha=0.4,
                 reg_lambda=1.5,
                 gamma=0.2,
                 eval_metric="logloss",
+                tree_method="hist",
                 random_state=42,
+                n_jobs=-1,
                 verbosity=0,
             ),
+        ]
     if HAS_LGBM:
+        families["LightGBM"] = [
             lgb.LGBMClassifier(
+                n_estimators=300,
+                max_depth=-1,
+                learning_rate=0.05,
+                num_leaves=31,
+                subsample=0.8,
+                colsample_bytree=0.8,
+                min_child_samples=20,
+                reg_alpha=0.1,
+                reg_lambda=1.0,
+                class_weight="balanced",
+                random_state=42,
+                verbose=-1,
+            ),
+            lgb.LGBMClassifier(
+                n_estimators=500,
+                max_depth=8,
+                learning_rate=0.03,
+                num_leaves=24,
+                subsample=0.9,
+                colsample_bytree=0.8,
+                min_child_samples=30,
+                reg_alpha=0.2,
+                reg_lambda=1.2,
+                class_weight="balanced",
+                random_state=42,
+                verbose=-1,
+            ),
+            lgb.LGBMClassifier(
+                n_estimators=220,
+                max_depth=6,
+                learning_rate=0.07,
+                num_leaves=18,
                 subsample=0.75,
                 colsample_bytree=0.75,
+                min_child_samples=24,
                 reg_alpha=0.3,
                 reg_lambda=1.5,
                 class_weight="balanced",
                 random_state=42,
                 verbose=-1,
             ),
+        ]
+    return families
+def _build_eval_pipeline(model: Any) -> Pipeline:
+    return Pipeline(
+        [
+            ("scaler", StandardScaler()),
+            ("model", clone(model)),
+        ]
+    )
+def _safe_model_name(name: str) -> str:
+    return (
+        name.lower()
+        .replace(" ", "_")
+        .replace("(", "")
+        .replace(")", "")
+        .replace("/", "_")
+    )
+def _summarize_selected_params(name: str, model: Any) -> dict[str, Any]:
+    tuned_keys = _TUNED_PARAM_KEYS.get(name, ())
+    params = model.get_params()
+    return {key: params[key] for key in tuned_keys if key in params}
 def _extract_importance(
     model: Any,
     feature_cols: list[str],
 ) -> list[tuple[str, float]]:
     importances = None
     if hasattr(model, "feature_importances_"):
     if importances is None:
         return []
     total = np.sum(importances)
     if total > 0:
         importances = importances / total
     return sorted(
         zip(feature_cols, importances.tolist()),
+        key=lambda item: item[1],
         reverse=True,
     )

local_demo.py CHANGED Viewed

@@ -1,574 +1,972 @@
 """
-AURIS Local Demo — AI Music Detection
-Tüm eğitilmiş modelleri test edebileceğin local Gradio arayüzü.
-Çalıştır:
     python local_demo.py
 """
-import io
 import json
 import pickle
 import time
 from pathlib import Path
 import gradio as gr
 import numpy as np
-# ── Paths ───────────────────────────────────────────────────────
-MODELS_DIR = Path(__file__).parent / "models"
-FIGURES_DIR = Path(__file__).parent.parent / "docs" / "academic" / "figures"
-# ── Load artifacts ──────────────────────────────────────────────
-with open(MODELS_DIR / "auris_classifier_v1.pkl", "rb") as f:
-    main_model = pickle.load(f)
-with open(MODELS_DIR / "feature_scaler_v1.pkl", "rb") as f:
-    scaler = pickle.load(f)
-with open(MODELS_DIR / "feature_columns_v1.json", "r") as f:
-    feature_cols = json.load(f)
-with open(MODELS_DIR / "training_results.json", "r") as f:
-    training_results = json.load(f)
-best_model_name = training_results.get("_best_model", "Gradient Boosting")
-n_features = training_results.get("_n_features", 47)
-importance = training_results.get("_feature_importance", {})
-top_features = sorted(importance.items(), key=lambda x: x[1], reverse=True)[:15]
-# All model results sorted by AUC
-all_models = sorted(
-    ((k, v) for k, v in training_results.items()
-     if not k.startswith("_") and isinstance(v, dict)),
-    key=lambda x: -x[1].get("roc_auc", 0),
-)
-print(f"AURIS Local Demo")
-print(f"Model: {best_model_name} | Features: {n_features}")
-print(f"Total models: {len(all_models)}")
-# ── Feature extraction ──────────────────────────────────────────
-def extract_features_from_audio(audio_path: str) -> tuple[dict, float]:
-    """Extract 47 features from audio file using librosa."""
-    import librosa
-    from scipy import stats as sp_stats
-    y, sr = librosa.load(audio_path, sr=22050, mono=True, duration=60.0)
-    duration_sec = len(y) / sr
-    # RMS energy
-    rms = librosa.feature.rms(y=y, hop_length=512)[0]
-    rms_mean = float(np.mean(rms))
-    rms_std = float(np.std(rms))
-    rms_dynamic_range = float(np.max(rms) - np.min(rms))
-    # Spectral features
-    cent = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=512)[0]
-    flat = librosa.feature.spectral_flatness(y=y, hop_length=512)[0]
-    bw = librosa.feature.spectral_bandwidth(y=y, sr=sr, hop_length=512)[0]
-    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, hop_length=512)[0]
-    contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=512)
-    # MFCCs
-    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512)
-    mfcc_delta = librosa.feature.delta(mfcc)
-    mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
-    # Zero crossing
-    zcr = librosa.feature.zero_crossing_rate(y, hop_length=512)[0]
-    # Tempo
-    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=512)
-    tempo_val = float(np.atleast_1d(tempo)[0])
-    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=512)
-    if len(beat_times) > 1:
-        ibi = np.diff(beat_times)
-        tempo_stability = float(np.std(ibi))
-        tempo_cv = float(np.std(ibi) / np.mean(ibi)) if np.mean(ibi) > 0 else 0.0
-    else:
-        tempo_stability = 0.0
-        tempo_cv = 0.0
-    # Chroma
-    chroma = librosa.feature.chroma_stft(y=y, sr=sr, hop_length=512)
-    chroma_std = float(np.mean(np.std(chroma, axis=1)))
-    chroma_entropy = float(-np.sum(
-        np.mean(chroma, axis=1) * np.log2(np.mean(chroma, axis=1) + 1e-10)
-    ))
-    chroma_diff = np.diff(chroma, axis=1)
-    chroma_transition_rate = float(np.mean(np.abs(chroma_diff)))
-    # Tonnetz
-    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
-    tonnetz_std = float(np.mean(np.std(tonnetz, axis=1)))
-    # Harmonic ratio
-    y_harm, y_perc = librosa.effects.hpss(y)
-    harm_energy = float(np.sum(y_harm ** 2))
-    perc_energy = float(np.sum(y_perc ** 2))
-    total_energy = harm_energy + perc_energy + 1e-10
-    harmonic_ratio = harm_energy / total_energy
-    # Mel
-    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=512)
-    mel_flatness = float(np.mean(librosa.feature.spectral_flatness(S=mel)))
-    # Onset
-    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
-    # Pitch
-    pitches, magnitudes = librosa.piptrack(y=y, sr=sr, hop_length=512)
-    pitch_vals = []
-    for t in range(pitches.shape[1]):
-        idx = magnitudes[:, t].argmax()
-        p = pitches[idx, t]
-        if p > 50:
-            pitch_vals.append(p)
-    pitch_mean_hz = float(np.mean(pitch_vals)) if pitch_vals else 0.0
-    if len(pitch_vals) > 1 and pitch_mean_hz > 0:
-        cents = 1200 * np.log2(np.array(pitch_vals) / pitch_mean_hz + 1e-10)
-        pitch_std_cents = float(np.std(cents))
-    else:
-        pitch_std_cents = 0.0
-    def _sigmoid(x, center=0.5, steepness=6.0):
-        return 1.0 / (1.0 + np.exp(-steepness * (x - center)))
-    spectral_regularity = float(_sigmoid(1.0 - float(np.std(flat)), 0.5, 4))
-    temporal_patterns = float(_sigmoid(1.0 - tempo_cv, 0.6, 5) if tempo_cv > 0 else 0.5)
-    harmonic_structure = float(_sigmoid(harmonic_ratio, 0.5, 4))
-    feats = {
-        "rms_energy": rms_mean,
-        "rms_std": rms_std,
-        "spectral_centroid_mean": float(np.mean(cent)),
-        "spectral_centroid_std": float(np.std(cent)),
-        "spectral_flatness_mean": float(np.mean(flat)),
-        "spectral_flatness_std": float(np.std(flat)),
-        "spectral_bandwidth_mean": float(np.mean(bw)),
-        "spectral_bandwidth_std": float(np.std(bw)),
-        "spectral_rolloff_mean": float(np.mean(rolloff)),
-        "spectral_rolloff_std": float(np.std(rolloff)),
-        "spectral_contrast_mean": float(np.mean(contrast)),
-        "spectral_contrast_std": float(np.std(contrast)),
-        "mfcc_variance": float(np.mean(np.var(mfcc, axis=1))),
-        "mfcc_delta_var": float(np.mean(np.var(mfcc_delta, axis=1))),
-        "mfcc_delta2_var": float(np.mean(np.var(mfcc_delta2, axis=1))),
-        "zero_crossing_rate": float(np.mean(zcr)),
-        "zero_crossing_std": float(np.std(zcr)),
-        "tempo_bpm": tempo_val,
-        "tempo_stability": tempo_stability,
-        "tempo_cv": tempo_cv,
-        "beat_count": float(len(beats)),
-        "rms_dynamic_range": rms_dynamic_range,
-        "chroma_std": chroma_std,
-        "chroma_entropy": chroma_entropy,
-        "chroma_transition_rate": chroma_transition_rate,
-        "tonnetz_std": tonnetz_std,
-        "harmonic_ratio": harmonic_ratio,
-        "mel_flatness": mel_flatness,
-        "onset_strength_mean": float(np.mean(onset_env)),
-        "onset_strength_std": float(np.std(onset_env)),
-        "pitch_mean_hz": pitch_mean_hz,
-        "pitch_std_cents": pitch_std_cents,
-        "spectral_regularity": spectral_regularity,
-        "temporal_patterns": temporal_patterns,
-        "harmonic_structure": harmonic_structure,
-        "vocal_confidence": 0.0,
-        "vocal_ai_score": 0.0,
-        "vocal_energy_ratio": 0.0,
-        "vocal_harmonic_ratio": 0.0,
-        "vocal_texture_score": 0.0,
-        "has_vocals": 0.0,
-        "pitch_stability_score": float(_sigmoid(1.0 - min(pitch_std_cents / 200, 1.0), 0.5, 4)),
-        "vibrato_rate_hz": 0.0,
-        "vibrato_extent_cents": 0.0,
-        "vibrato_regularity_score": 0.0,
-        "formant_consistency_score": 0.0,
-        "breath_pattern_score": float(_sigmoid(rms_dynamic_range, 0.3, 5)),
     }
-    return feats, duration_sec
-# ── Prediction ──────────────────────────────────────────────────
-def predict(audio_file):
-    """Run AURIS model on uploaded audio and return rich results."""
-    if audio_file is None:
-        return "Dosya yükleyin", None, None, ""
-    import matplotlib
-    matplotlib.use("Agg")
-    import matplotlib.pyplot as plt
-    t0 = time.time()
-    if isinstance(audio_file, tuple):
-        audio_path = audio_file[0] if isinstance(audio_file[0], str) else None
-        if audio_path is None:
-            return "Gecersiz dosya", None, None, ""
-    else:
-        audio_path = audio_file
-    try:
-        feats, duration = extract_features_from_audio(audio_path)
-    except Exception as e:
-        return f"Hata: {e}", None, None, ""
-    X = np.array([[feats.get(col, 0.0) for col in feature_cols]], dtype=np.float32)
-    X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
-    X_scaled = scaler.transform(X)
-    elapsed = time.time() - t0
-    prob = main_model.predict_proba(X_scaled)[0]
-    ai_prob = float(prob[1])
-    human_prob = float(prob[0])
-    if ai_prob > 0.8:
-        verdict = f"AI Uretimi Muzik Tespit Edildi  |  %{ai_prob*100:.1f} guven"
-    elif ai_prob > 0.5:
-        verdict = f"Muhtemelen AI Uretimi  |  %{ai_prob*100:.1f} guven"
-    elif ai_prob > 0.3:
-        verdict = f"Muhtemelen Insan Yapimi  |  %{human_prob*100:.1f} guven"
-    else:
-        verdict = f"Insan Yapimi Muzik  |  %{human_prob*100:.1f} guven"
-    sr_pct = feats["spectral_regularity"] * 100
-    tp_pct = feats["temporal_patterns"] * 100
-    hs_pct = feats["harmonic_structure"] * 100
-    # ── Main dashboard plot (16:9) ──
-    fig = plt.figure(figsize=(16, 9), facecolor="#1a1207")
-    # ── Left: Gauge + verdict ──
-    ax_gauge = fig.add_axes([0.02, 0.45, 0.28, 0.50], projection="polar")
-    ax_gauge.set_facecolor("#1a1207")
-    theta = np.linspace(np.pi, 0, 100)
-    r = np.ones(100)
-    ax_gauge.plot(theta, r, color="#3d2817", linewidth=24, alpha=0.4)
-    score_end = max(1, int(ai_prob * 100))
-    c = "#7fb069" if ai_prob < 0.4 else "#c99347" if ai_prob < 0.7 else "#a64b3c"
-    ax_gauge.plot(theta[:score_end], r[:score_end], color=c, linewidth=24)
-    needle = np.pi - ai_prob * np.pi
-    ax_gauge.plot([needle, needle], [0, 0.82], color="#faf6ed", linewidth=2.5)
-    ax_gauge.scatter([needle], [0.82], color="#faf6ed", s=40, zorder=5)
-    ax_gauge.set_ylim(0, 1.2)
-    ax_gauge.set_yticklabels([])
-    ax_gauge.set_xticklabels([])
-    ax_gauge.spines["polar"].set_visible(False)
-    ax_gauge.grid(False)
-    fig.text(0.16, 0.42, f"%{ai_prob*100:.0f}", ha="center", va="center",
-             fontsize=42, fontweight="bold", color="#faf6ed")
-    fig.text(0.16, 0.36, "AI Olasiligi", ha="center", va="center",
-             fontsize=12, color="#c99347")
-    label = "AI URETIMI" if ai_prob > 0.5 else "INSAN YAPIMI"
-    label_color = "#a64b3c" if ai_prob > 0.5 else "#7fb069"
-    fig.text(0.16, 0.30, label, ha="center", va="center",
-             fontsize=16, fontweight="bold", color=label_color,
-             bbox=dict(boxstyle="round,pad=0.4", facecolor="#2a1f10",
-                       edgecolor=label_color, linewidth=2))
-    # ── Middle: Feature bars ──
-    ax_bars = fig.add_axes([0.35, 0.55, 0.28, 0.35])
-    ax_bars.set_facecolor("#2a1f10")
-    bars = [
-        ("Spektral Duzenlilik", sr_pct),
-        ("Zamansal Oruntuler", tp_pct),
-        ("Harmonik Yapi", hs_pct),
-    ]
-    y_pos = np.arange(len(bars))
-    vals = [v for _, v in bars]
-    colors_b = ["#c99347" if v > 60 else "#7fb069" if v < 40 else "#e7c77a" for v in vals]
-    ax_bars.barh(y_pos, vals, color=colors_b, edgecolor="#3d2817", height=0.55)
-    ax_bars.set_yticks(y_pos)
-    ax_bars.set_yticklabels([n for n, _ in bars], color="#faf6ed", fontsize=11)
-    ax_bars.set_xlim(0, 100)
-    ax_bars.set_xlabel("Skor (%)", color="#c99347", fontsize=10)
-    ax_bars.tick_params(colors="#c99347", labelsize=9)
-    for spine in ax_bars.spines.values():
-        spine.set_color("#3d2817")
-    for i, v in enumerate(vals):
-        ax_bars.text(v + 1.5, i, f"%{v:.0f}", va="center", color="#faf6ed",
-                     fontsize=11, fontweight="bold")
-    ax_bars.set_title("Ses Ozellik Analizi", color="#c99347", fontsize=13,
-                      fontweight="bold", pad=8)
-    # ── Right: Top feature importance ──
-    ax_imp = fig.add_axes([0.70, 0.35, 0.27, 0.58])
-    ax_imp.set_facecolor("#2a1f10")
-    top10 = top_features[:10]
-    imp_names = [n for n, _ in top10]
-    imp_vals = [v * 100 for _, v in top10]
-    imp_colors = plt.cm.copper(np.linspace(0.3, 0.85, len(imp_names)))
-    ax_imp.barh(np.arange(len(imp_names)), imp_vals, color=imp_colors,
-                edgecolor="#3d2817", height=0.6)
-    ax_imp.set_yticks(np.arange(len(imp_names)))
-    ax_imp.set_yticklabels(imp_names, color="#faf6ed", fontsize=8)
-    ax_imp.invert_yaxis()
-    ax_imp.set_xlabel("Onem (%)", color="#c99347", fontsize=9)
-    ax_imp.tick_params(colors="#c99347", labelsize=8)
-    for spine in ax_imp.spines.values():
-        spine.set_color("#3d2817")
-    ax_imp.set_title("En Onemli Ozellikler", color="#c99347", fontsize=12,
-                      fontweight="bold", pad=8)
-    # ── Bottom: Info strip ──
-    info_text = (
-        f"Model: {best_model_name}  |  Sure: {duration:.1f}s  |  "
-        f"Islem: {elapsed:.2f}s  |  Ozellik: {n_features}  |  "
-        f"Tempo: {feats['tempo_bpm']:.0f} BPM  |  "
-        f"RMS: {feats['rms_energy']:.4f}"
     )
-    fig.text(0.50, 0.06, info_text, ha="center", va="center",
-             fontsize=10, color="#c99347",
-             bbox=dict(boxstyle="round,pad=0.6", facecolor="#2a1f10",
-                       edgecolor="#3d2817", linewidth=1))
-    # ── Title ──
-    fig.text(0.50, 0.97, "AURIS — AI Music Detection System",
-             ha="center", va="top", fontsize=20, fontweight="bold",
-             color="#c99347")
-    fig.text(0.50, 0.93,
-             f"{best_model_name} | AUC={training_results.get(best_model_name, {}).get('roc_auc', 0):.4f}",
-             ha="center", va="top", fontsize=11, color="#faf6ed", alpha=0.7)
-    # ── Bottom left: Mini model comparison ──
-    ax_models = fig.add_axes([0.04, 0.12, 0.58, 0.22])
-    ax_models.set_facecolor("#2a1f10")
-    model_names = [n for n, _ in all_models]
-    model_aucs = [d.get("roc_auc", 0) for _, d in all_models]
-    model_types = []
-    for _, d in all_models:
-        if d.get("type") == "deep_learning":
-            model_types.append("#a64b3c")
-        else:
-            model_types.append("#c99347")
-    x_pos = np.arange(len(model_names))
-    ax_models.bar(x_pos, model_aucs, color=model_types, edgecolor="#3d2817",
-                  width=0.7)
-    ax_models.set_xticks(x_pos)
-    ax_models.set_xticklabels(model_names, rotation=30, ha="right",
-                               color="#faf6ed", fontsize=7)
-    ax_models.set_ylabel("ROC-AUC", color="#c99347", fontsize=9)
-    ax_models.set_ylim(0.80, 0.97)
-    ax_models.tick_params(colors="#c99347", labelsize=8)
-    for spine in ax_models.spines.values():
-        spine.set_color("#3d2817")
-    ax_models.set_title("Tum Modeller  (sari=ML, kirmizi=DL)", color="#c99347",
-                         fontsize=10, fontweight="bold", pad=6)
-    for i, v in enumerate(model_aucs):
-        ax_models.text(i, v + 0.002, f"{v:.3f}", ha="center", va="bottom",
-                       color="#faf6ed", fontsize=6)
-    dashboard_path = str(Path(__file__).parent / "_dashboard_temp.png")
-    plt.savefig(dashboard_path, dpi=120, bbox_inches="tight",
-                facecolor="#1a1207", edgecolor="none")
-    plt.close()
-    # ── Details markdown ──
-    details_md = f"""
-## Detayli Sonuclar
-| Metrik | Deger |
-|--------|-------|
-| AI Olasiligi | %{ai_prob*100:.2f} |
-| Insan Olasiligi | %{human_prob*100:.2f} |
-| Model | {best_model_name} |
-| Audio Suresi | {duration:.1f}s |
-| Islem Suresi | {elapsed:.2f}s |
-| Tempo | {feats['tempo_bpm']:.1f} BPM |
-| RMS Energy | {feats['rms_energy']:.6f} |
-| Spectral Centroid | {feats['spectral_centroid_mean']:.1f} Hz |
-| Spectral Flatness | {feats['spectral_flatness_mean']:.6f} |
-| Harmonic Ratio | {feats['harmonic_ratio']:.4f} |
-| Zero Crossing Rate | {feats['zero_crossing_rate']:.6f} |
-| Beat Count | {feats['beat_count']:.0f} |
-## Tum {n_features} Ozellik Degerleri
-| Ozellik | Deger | Global Onem |
-|---------|-------|-------------|
-"""
-    for col in feature_cols:
-        val = feats.get(col, 0.0)
-        imp_val = importance.get(col, 0.0)
-        bar = "█" * int(imp_val * 200)
-        details_md += f"| {col} | {val:.6f} | {imp_val:.4f} {bar} |\n"
-    return verdict, dashboard_path, None, details_md
-# ── Figures gallery ─────────────────────────────────────────────
-def get_figure_paths():
-    if FIGURES_DIR.exists():
-        return sorted(str(p) for p in FIGURES_DIR.glob("*.png"))
-    return []
-# ── Model comparison table ──────────────────────────────────────
-ALL_MODELS_MD = """## Tum Egitilmis Model Sonuclari
-> **En iyi model**: {best} | **Veri**: {n_samples} ornek | **Ozellik**: {n_feat} | **CV**: 5-fold stratified
-### Makine Ogrenmesi (ML) Modelleri
-| Model | Accuracy | Precision | Recall | F1 Score | ROC-AUC | Egitim Suresi |
-|-------|----------|-----------|--------|----------|---------|---------------|
-""".format(
-    best=best_model_name,
-    n_samples=training_results.get("_n_samples", "?"),
-    n_feat=n_features,
-)
-DL_MODELS_MD = """
-### Derin Ogrenme (DL) Modelleri
-| Model | Accuracy | Precision | Recall | F1 Score | ROC-AUC | Egitim Suresi |
-|-------|----------|-----------|--------|----------|---------|---------------|
-"""
-for name, data in all_models:
-    row = (
-        f"| {'**' + name + '**' if name == best_model_name else name} | "
-        f"{data.get('accuracy', 0):.4f} | "
-        f"{data.get('precision', 0):.4f} | "
-        f"{data.get('recall', 0):.4f} | "
-        f"{data.get('f1', 0):.4f} | "
-        f"{data.get('roc_auc', 0):.4f} | "
-        f"{data.get('train_time_sec', 0):.1f}s |\n"
     )
-    if data.get("type") == "deep_learning":
-        DL_MODELS_MD += row
-    else:
-        ALL_MODELS_MD += row
-ALL_MODELS_MD += DL_MODELS_MD
-ALL_MODELS_MD += f"""
-### Ozellik Onemi (En Iyi Model: {best_model_name})
-| Sira | Ozellik | Onem | |
-|------|---------|------|-|
-"""
-for i, (fname, imp) in enumerate(top_features, 1):
-    bar = "█" * int(imp * 300)
-    ALL_MODELS_MD += f"| {i} | {fname} | {imp:.4f} | {bar} |\n"
-ALL_MODELS_MD += f"""
-### Notlar
-- **Veri sizintisi duzeltildi**: `duration_sec` ve `sample_rate` ozelliklerden cikarildi (v2)
-- **Regularizasyon**: max_depth=4, subsample=0.75, min_samples_leaf=12
-- Tree ensemble modellerinde train AUC=1.0 yapisal bir ozelliktir (overfitting degil)
-- DL modelleri (Deep MLP, Residual MLP) regularize edilmis ML'yi geciyor
-"""
-# ── Gradio UI ───────────────────────────────────────────────────
 AURIS_CSS = """
 .gradio-container {
-    background: linear-gradient(135deg, #1a1207 0%, #2a1f10 50%, #1a1207 100%) !important;
-    max-width: 1400px !important;
-}
-h1, h2, h3 { color: #c99347 !important; }
-p, span, label, td, th { color: #faf6ed !important; }
-table { border-color: #3d2817 !important; }
-th { background: #2a1f10 !important; color: #c99347 !important; }
-footer { display: none !important; }
-"""
-HEADER_HTML = f"""
-<div style="text-align:center; padding: 16px 0;">
-    <h1 style="color:#c99347; margin:0; font-size:2.2em;">AURIS</h1>
-    <p style="color:#faf6ed; opacity:0.8; margin:4px 0;">
-        AI Music Detection System — Yapay Zeka Muzik Tespit Platformu
-    </p>
-    <p style="color:#c99347; font-size:0.9em;">
-        {best_model_name} | {n_features} ozellik |
-        {training_results.get('_n_samples', '?')} ornek |
-        AUC: {training_results.get(best_model_name, {}).get('roc_auc', '?')}
-    </p>
-</div>
 """
-with gr.Blocks(title="AURIS — AI Music Detection") as demo:
-    gr.HTML(HEADER_HTML)
     with gr.Tabs():
         with gr.Tab("Analiz"):
-            with gr.Row():
-                with gr.Column(scale=1):
                     audio_input = gr.Audio(
-                        label="Audio Dosyasi Yukle (.mp3, .wav, .flac)",
                         type="filepath",
                     )
-                    analyze_btn = gr.Button(
-                        "Analiz Et",
-                        variant="primary",
-                        size="lg",
                     )
-                    verdict_text = gr.Textbox(
-                        label="Sonuc",
-                        interactive=False,
-                        lines=2,
-                    )
-                with gr.Column(scale=2):
-                    dashboard_img = gr.Image(
-                        label="AURIS Dashboard",
-                        type="filepath",
-                        height=500,
-                    )
-            details_output = gr.Markdown()
-            analyze_btn.click(
-                fn=predict,
-                inputs=[audio_input],
-                outputs=[verdict_text, dashboard_img, gr.State(), details_output],
             )
-        with gr.Tab("Model Karsilastirmasi"):
-            gr.Markdown(ALL_MODELS_MD)
-        with gr.Tab("Akademik Gorseller"):
-            gr.Markdown("## Egitim ve Degerlendirme Gorselleri")
-            figure_paths = get_figure_paths()
             if figure_paths:
                 gr.Gallery(
                     value=figure_paths,
-                    label="Figures",
                     columns=3,
                     height="auto",
                     object_fit="contain",
                 )
-    gr.HTML("""
-    <div style="text-align:center; padding:12px; opacity:0.6;">
-        <p style="color:#c99347; font-size:0.85em;">
-            AURIS v1 — Duzce Universitesi Bilgisayar Muhendisligi Bitirme Projesi<br>
-            Hasan Arthur Altuntas — 2026
-        </p>
-    </div>
-    """)
 if __name__ == "__main__":
     demo.launch(
-        server_name="0.0.0.0",
-        server_port=7862,
         share=False,
-        inbrowser=True,
         css=AURIS_CSS,
     )

 """
+AURIS Local Demo - AI Music Detection
+Calistir:
     python local_demo.py
 """
+from __future__ import annotations
+import argparse
+import csv
 import json
 import pickle
+import socket
 import time
+import warnings
+from collections import Counter
+from dataclasses import dataclass
 from pathlib import Path
+from typing import Any
 import gradio as gr
 import numpy as np
+from app.training.extract_features_batch import extract_sample_features
+BASE_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = BASE_DIR.parent
+MODELS_DIR = BASE_DIR / "models"
+FIGURES_DIR = PROJECT_ROOT / "docs" / "academic" / "figures"
+DATASET_DIR = PROJECT_ROOT / "DataSet"
+TEST_AUDIO_DIR = BASE_DIR / "test_audio"
+@dataclass(frozen=True)
+class DemoArtifacts:
+    feature_cols: list[str]
+    training_results: dict[str, Any]
+    scaler: Any
+    loaded_models: dict[str, Any]
+    best_model_name: str
+    best_model_label: str
+    model_labels: list[str]
+    label_to_name: dict[str, str]
+    feature_importance: dict[str, float]
+    feature_stats: dict[str, Any]
+    dataset_summary: dict[str, Any]
+def _safe_model_name(name: str) -> str:
+    return (
+        name.lower()
+        .replace(" ", "_")
+        .replace("(", "")
+        .replace(")", "")
+        .replace("/", "_")
+    )
+def _load_pickle(path: Path) -> Any:
+    with open(path, "rb") as f:
+        return pickle.load(f)
+def _load_json(path: Path) -> dict[str, Any]:
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+def _require_file(path: Path) -> None:
+    if not path.exists():
+        raise FileNotFoundError(f"Missing required artifact: {path}")
+def _load_feature_stats() -> dict[str, Any]:
+    stats_path = MODELS_DIR / "feature_stats_v1.json"
+    if not stats_path.exists():
+        return {}
+    return _load_json(stats_path)
+def _load_dataset_summary() -> dict[str, Any]:
+    manifest_path = DATASET_DIR / "manifest.csv"
+    if not manifest_path.exists():
+        return {}
+    label_counts: Counter[str] = Counter()
+    generator_counts: Counter[str] = Counter()
+    total = 0
+    with open(manifest_path, "r", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            total += 1
+            label = row.get("label", "").strip() or str(row.get("label_int", ""))
+            generator = row.get("generator", "").strip() or "unknown"
+            label_counts[label] += 1
+            generator_counts[generator] += 1
+    return {
+        "manifest_path": str(manifest_path),
+        "total": total,
+        "ai": label_counts.get("ai", 0) + label_counts.get("1", 0),
+        "human": label_counts.get("human", 0) + label_counts.get("0", 0),
+        "generators": generator_counts.most_common(8),
     }
+def _find_matching_name(raw_name: str, training_results: dict[str, Any]) -> str:
+    for name in training_results:
+        if name.startswith("_"):
+            continue
+        if _safe_model_name(name) == raw_name:
+            return name
+    return raw_name.replace("_", " ").title()
+def _is_model_compatible(model: Any, n_features: int) -> bool:
+    expected = getattr(model, "n_features_in_", None)
+    return expected in (None, n_features)
+def _load_artifacts() -> DemoArtifacts:
+    scaler_path = MODELS_DIR / "feature_scaler_v1.pkl"
+    columns_path = MODELS_DIR / "feature_columns_v1.json"
+    results_path = MODELS_DIR / "training_results.json"
+    best_model_path = MODELS_DIR / "auris_classifier_v1.pkl"
+    for required in (scaler_path, columns_path, results_path, best_model_path):
+        _require_file(required)
+    scaler = _load_pickle(scaler_path)
+    feature_cols = _load_json(columns_path)
+    training_results = _load_json(results_path)
+    feature_importance = training_results.get("_feature_importance", {})
+    best_model_name = training_results.get("_best_model", "Gradient Boosting")
+    loaded_models: dict[str, Any] = {}
+    for model_path in sorted(MODELS_DIR.glob("model_*.pkl")):
+        try:
+            model = _load_pickle(model_path)
+        except Exception as exc:  # noqa: BLE001
+            print(f"Skipping model file {model_path.name}: {exc}")
+            continue
+        raw_name = model_path.stem.replace("model_", "")
+        model_name = _find_matching_name(raw_name, training_results)
+        if not _is_model_compatible(model, len(feature_cols)):
+            print(
+                f"Skipping incompatible model {model_path.name}: "
+                f"expected {len(feature_cols)} features"
+            )
+            continue
+        loaded_models[model_name] = model
+    if best_model_name not in loaded_models:
+        best_model = _load_pickle(best_model_path)
+        if _is_model_compatible(best_model, len(feature_cols)):
+            loaded_models[best_model_name] = best_model
+    if not loaded_models:
+        raise RuntimeError("No compatible models were found in the models directory.")
+    sorted_names = sorted(
+        loaded_models,
+        key=lambda name: training_results.get(name, {}).get("roc_auc", 0.0),
+        reverse=True,
+    )
+    label_to_name: dict[str, str] = {}
+    model_labels: list[str] = []
+    for name in sorted_names:
+        result = training_results.get(name, {})
+        auc = result.get("roc_auc", 0.0)
+        acc = result.get("accuracy", 0.0)
+        badge = " [EN IYI]" if name == best_model_name else ""
+        label = f"{name}{badge} | AUC {auc:.3f} | Acc {acc:.1%}"
+        label_to_name[label] = name
+        model_labels.append(label)
+    best_model_label = next(
+        label for label, name in label_to_name.items() if name == best_model_name
+    )
+    return DemoArtifacts(
+        feature_cols=feature_cols,
+        training_results=training_results,
+        scaler=scaler,
+        loaded_models=loaded_models,
+        best_model_name=best_model_name,
+        best_model_label=best_model_label,
+        model_labels=model_labels,
+        label_to_name=label_to_name,
+        feature_importance=feature_importance,
+        feature_stats=_load_feature_stats(),
+        dataset_summary=_load_dataset_summary(),
     )
+ARTIFACTS = _load_artifacts()
+def _example_audio_paths(limit: int = 6) -> list[list[str]]:
+    if not TEST_AUDIO_DIR.exists():
+        return []
+    candidates = sorted(
+        path
+        for path in TEST_AUDIO_DIR.iterdir()
+        if path.is_file() and path.suffix.lower() in {".wav", ".mp3", ".flac"}
+    )
+    return [[str(path)] for path in candidates[:limit]]
+def _normalize_score(value: float, cap: float = 1.0) -> float:
+    return max(0.0, min(float(value), cap))
+def _extract_demo_features(audio_path: str) -> tuple[dict[str, float], float]:
+    row = extract_sample_features(audio_path)
+    if row is None:
+        raise RuntimeError("Ozellik cikarimi basarisiz oldu.")
+    features = {
+        column: float(row.get(column, 0.0))
+        for column in ARTIFACTS.feature_cols
+    }
+    duration_sec = float(row.get("duration_sec", 0.0))
+    return features, duration_sec
+def _build_feature_vector(features: dict[str, float]) -> np.ndarray:
+    vector = np.array(
+        [[features.get(column, 0.0) for column in ARTIFACTS.feature_cols]],
+        dtype=np.float32,
+    )
+    return np.nan_to_num(vector, nan=0.0, posinf=1.0, neginf=-1.0)
+def _format_verdict(ai_prob: float) -> tuple[str, str, str]:
+    if ai_prob >= 0.75:
+        return "ai-high", "Yuksek AI ihtimali", "AI kaynakli izler baskin"
+    if ai_prob >= 0.55:
+        return "ai-mid", "AI olasiligi yuksek", "Model sentetik duzene yakin buldu"
+    if ai_prob >= 0.40:
+        return "human-mid", "Sinirda sonuc", "Insan ve AI sinyalleri birbirine yakin"
+    return "human-high", "Insan yapimiya yakin", "Dogal varyasyon daha guclu"
+def _build_result_html(
+    ai_prob: float,
+    duration: float,
+    elapsed: float,
+    selected_model_name: str,
+) -> str:
+    verdict_class, verdict_title, verdict_subtitle = _format_verdict(ai_prob)
+    confidence_pct = ai_prob * 100
+    human_pct = (1.0 - ai_prob) * 100
+    return f"""
+    <section class="hero-card {verdict_class}">
+      <div class="hero-card__eyebrow">Canli analiz sonucu</div>
+      <div class="hero-card__score">%{confidence_pct:.1f}</div>
+      <div class="hero-card__title">{verdict_title}</div>
+      <div class="hero-card__subtitle">{verdict_subtitle}</div>
+      <div class="hero-card__meta">
+        <span>Model: {selected_model_name}</span>
+        <span>Sure: {duration:.1f}s</span>
+        <span>Islem: {elapsed:.2f}s</span>
+        <span>Insan olasiligi: %{human_pct:.1f}</span>
+      </div>
+    </section>
+    """
+def _build_signal_html(features: dict[str, float]) -> str:
+    rows = [
+        ("Spektral duzen", _normalize_score(features.get("spectral_regularity", 0.0))),
+        ("Zamansal kalip", _normalize_score(features.get("temporal_patterns", 0.0))),
+        ("Harmonik yapi", _normalize_score(features.get("harmonic_structure", 0.0))),
+        ("Vokal AI izi", _normalize_score(features.get("vocal_ai_score", 0.0))),
+        ("Vokal guveni", _normalize_score(features.get("vocal_confidence", 0.0))),
+        ("Pitch stabilitesi", _normalize_score(features.get("pitch_stability_score", 0.0))),
+    ]
+    parts = ['<section class="panel-card"><div class="panel-card__title">Sinyal panosu</div>']
+    for label, raw_value in rows:
+        pct = raw_value * 100
+        bar_class = "bar-warm" if pct >= 60 else "bar-cool" if pct <= 35 else "bar-mid"
+        parts.append(
+            f"""
+            <div class="meter-row">
+              <div class="meter-row__label">{label}</div>
+              <div class="meter-row__track">
+                <div class="meter-row__fill {bar_class}" style="width:{pct:.0f}%"></div>
+              </div>
+              <div class="meter-row__value">%{pct:.0f}</div>
+            </div>
+            """
+        )
+    parts.append("</section>")
+    return "".join(parts)
+def _build_model_table_html(
+    selected_model_name: str,
+    feature_vector: np.ndarray,
+) -> str:
+    scaled = ARTIFACTS.scaler.transform(feature_vector)
+    scored_rows: list[tuple[str, float]] = []
+    for name, model in ARTIFACTS.loaded_models.items():
+        try:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", category=UserWarning)
+                probability = float(model.predict_proba(scaled)[0][1])
+        except Exception:  # noqa: BLE001
+            continue
+        scored_rows.append((name, probability))
+    scored_rows.sort(key=lambda item: item[1], reverse=True)
+    parts = [
+        '<section class="panel-card"><div class="panel-card__title">Model karsilastirmasi</div>',
+        "<table class='model-table'><thead><tr><th>Model</th><th>Canli AI %</th><th>CV AUC</th><th>Acc</th></tr></thead><tbody>",
+    ]
+    for name, probability in scored_rows:
+        metrics = ARTIFACTS.training_results.get(name, {})
+        row_class = "is-selected" if name == selected_model_name else ""
+        best_badge = " <span class='badge'>en iyi</span>" if name == ARTIFACTS.best_model_name else ""
+        parts.append(
+            f"""
+            <tr class="{row_class}">
+              <td>{name}{best_badge}</td>
+              <td>%{probability * 100:.1f}</td>
+              <td>{metrics.get("roc_auc", 0.0):.4f}</td>
+              <td>{metrics.get("accuracy", 0.0):.4f}</td>
+            </tr>
+            """
+        )
+    parts.append("</tbody></table></section>")
+    return "".join(parts)
+def _build_feature_details_md(features: dict[str, float], duration: float) -> str:
+    importance = ARTIFACTS.feature_importance
+    lines = [
+        "## Ses ozet",
+        "",
+        "| Metrik | Deger |",
+        "|--------|-------|",
+        f"| Sure | {duration:.1f}s |",
+        f"| Tempo | {features.get('tempo_bpm', 0.0):.1f} BPM |",
+        f"| RMS energy | {features.get('rms_energy', 0.0):.6f} |",
+        f"| Harmonic ratio | {features.get('harmonic_ratio', 0.0):.4f} |",
+        f"| Spectral centroid | {features.get('spectral_centroid_mean', 0.0):.1f} Hz |",
+        f"| Vocal confidence | {features.get('vocal_confidence', 0.0):.3f} |",
+        "",
+    ]
+    insight_block = _build_feature_insights_md(features)
+    if insight_block:
+        lines.extend([insight_block, ""])
+    lines.extend(
+        [
+            "## Tum ozellikler",
+            "",
+            "| Ozellik | Deger | Global onem |",
+            "|---------|-------|-------------|",
+        ]
     )
+    for column in ARTIFACTS.feature_cols:
+        value = features.get(column, 0.0)
+        weight = importance.get(column, 0.0)
+        lines.append(f"| {column} | {value:.6f} | {weight:.4f} |")
+    return "\n".join(lines)
+def _build_feature_insights_md(features: dict[str, float]) -> str:
+    stats = ARTIFACTS.feature_stats
+    if not stats:
+        return ""
+    by_class = stats.get("_by_class", {})
+    rows: list[tuple[float, str, float, float, float, float]] = []
+    for column in ARTIFACTS.feature_cols:
+        feature_stats = stats.get(column)
+        if not feature_stats:
+            continue
+        std = float(feature_stats.get("std", 1.0)) or 1.0
+        value = float(features.get(column, 0.0))
+        z_score = (value - float(feature_stats.get("mean", 0.0))) / std
+        ai_mean = float(by_class.get("ai", {}).get(column, {}).get("mean", 0.0))
+        human_mean = float(by_class.get("human", {}).get(column, {}).get("mean", 0.0))
+        rows.append((abs(z_score), column, value, z_score, ai_mean, human_mean))
+    if not rows:
+        return ""
+    rows.sort(reverse=True)
+    lines = [
+        "## Dikkat ceken sapmalar",
+        "",
+        "| Ozellik | Deger | Z-score | AI ort. | Human ort. |",
+        "|---------|-------|---------|---------|------------|",
+    ]
+    for _, column, value, z_score, ai_mean, human_mean in rows[:10]:
+        lines.append(
+            f"| {column} | {value:.6f} | {z_score:+.2f} | {ai_mean:.6f} | {human_mean:.6f} |"
+        )
+    return "\n".join(lines)
+def analyze_audio(audio_file: Any, selected_model_label: str) -> tuple[str, str, str, str]:
+    if not audio_file:
+        empty = '<section class="hero-card neutral"><div class="hero-card__title">Ses dosyasi bekleniyor</div><div class="hero-card__subtitle">Analiz icin bir .wav, .mp3 veya .flac yukleyin.</div></section>'
+        return empty, "", "", ""
+    audio_path = audio_file[0] if isinstance(audio_file, tuple) else audio_file
+    selected_model_name = ARTIFACTS.label_to_name.get(
+        selected_model_label,
+        ARTIFACTS.best_model_name,
+    )
+    start_time = time.time()
+    try:
+        features, duration = _extract_demo_features(str(audio_path))
+    except Exception as exc:  # noqa: BLE001
+        error_html = f'<section class="hero-card neutral"><div class="hero-card__title">Analiz basarisiz</div><div class="hero-card__subtitle">{exc}</div></section>'
+        return error_html, "", "", ""
+    feature_vector = _build_feature_vector(features)
+    scaled = ARTIFACTS.scaler.transform(feature_vector)
+    model = ARTIFACTS.loaded_models[selected_model_name]
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", category=UserWarning)
+        ai_prob = float(model.predict_proba(scaled)[0][1])
+    elapsed = time.time() - start_time
+    result_html = _build_result_html(ai_prob, duration, elapsed, selected_model_name)
+    signal_html = _build_signal_html(features)
+    model_table_html = _build_model_table_html(selected_model_name, feature_vector)
+    details_md = _build_feature_details_md(features, duration)
+    return result_html, signal_html, model_table_html, details_md
+def build_models_md() -> str:
+    training_results = ARTIFACTS.training_results
+    lines = [
+        "## Egitilmis modeller",
+        "",
+        f"- En iyi model: **{ARTIFACTS.best_model_name}**",
+        f"- Ornek sayisi: **{training_results.get('_n_samples', '?')}**",
+        f"- Ozellik sayisi: **{training_results.get('_n_features', len(ARTIFACTS.feature_cols))}**",
+        f"- CV kat sayisi: **{training_results.get('_n_folds', '?')}**",
+        "",
+        "| Model | CV AUC | Holdout AUC | Acc | F1 |",
+        "|------|--------|-------------|-----|----|",
+    ]
+    model_names = [
+        name
+        for name in training_results
+        if not name.startswith("_") and isinstance(training_results[name], dict)
+    ]
+    model_names.sort(key=lambda name: training_results[name].get("roc_auc", 0.0), reverse=True)
+    for name in model_names:
+        result = training_results[name]
+        display = f"**{name}**" if name == ARTIFACTS.best_model_name else name
+        lines.append(
+            f"| {display} | {result.get('roc_auc', 0.0):.4f} | "
+            f"{result.get('validation_auc', 0.0):.4f} | "
+            f"{result.get('accuracy', 0.0):.4f} | {result.get('f1', 0.0):.4f} |"
+        )
+    lines.extend(["", "## Secilen parametreler", ""])
+    for name in model_names:
+        params = training_results[name].get("selected_params", {})
+        lines.append(f"- **{name}**: `{json.dumps(params, ensure_ascii=True)}`")
+    importance_items = sorted(
+        ARTIFACTS.feature_importance.items(),
+        key=lambda item: item[1],
+        reverse=True,
+    )[:15]
+    if importance_items:
+        lines.extend(["", "## Ilk 15 ozellik onemi", "", "| Ozellik | Onem |", "|---------|------|"])
+        for feature_name, score in importance_items:
+            lines.append(f"| {feature_name} | {score:.4f} |")
+    return "\n".join(lines)
+def build_dataset_md() -> str:
+    summary = ARTIFACTS.dataset_summary
+    if not summary:
+        return "Veri seti ozeti bulunamadi."
+    lines = [
+        "## Egitim veri seti",
+        "",
+        "| Metrik | Deger |",
+        "|--------|-------|",
+        f"| Manifest | `{summary.get('manifest_path', '-')}` |",
+        f"| Toplam ornek | {summary.get('total', 0)} |",
+        f"| AI | {summary.get('ai', 0)} |",
+        f"| Human | {summary.get('human', 0)} |",
+        f"| Ozellik | {len(ARTIFACTS.feature_cols)} |",
+        "",
+        "## Kaynak dagilimi",
+        "",
+        "| Kaynak | Adet |",
+        "|--------|------|",
+    ]
+    for generator, count in summary.get("generators", []):
+        lines.append(f"| {generator} | {count} |")
+    return "\n".join(lines)
 AURIS_CSS = """
+:root {
+    --bg: #120f0b;
+    --panel: rgba(31, 24, 17, 0.92);
+    --panel-strong: rgba(42, 31, 22, 0.98);
+    --line: rgba(215, 182, 122, 0.18);
+    --text: #f5ead8;
+    --muted: #c8af8a;
+    --gold: #dfb56f;
+    --gold-soft: #f1d4a2;
+    --danger: #d66a55;
+    --danger-soft: #5d2218;
+    --safe: #7fbb7c;
+    --safe-soft: #1f3b2d;
+}
+body {
+    background:
+        radial-gradient(circle at top left, rgba(223, 181, 111, 0.12), transparent 28%),
+        radial-gradient(circle at bottom right, rgba(88, 43, 23, 0.24), transparent 26%),
+        linear-gradient(135deg, #0d0a07 0%, #18120d 45%, #120f0b 100%);
+}
 .gradio-container {
+    max-width: 1360px !important;
+    margin: 0 auto !important;
+    background: transparent !important;
+    color: var(--text) !important;
+    font-family: "Segoe UI", sans-serif !important;
+}
+.app-shell {
+    padding: 24px 0 10px;
+}
+.app-hero {
+    display: grid;
+    grid-template-columns: 1.4fr 1fr;
+    gap: 18px;
+    align-items: stretch;
+    margin-bottom: 18px;
+}
+.app-brand,
+.app-meta {
+    background: linear-gradient(160deg, rgba(35, 26, 18, 0.95), rgba(19, 14, 10, 0.96));
+    border: 1px solid var(--line);
+    border-radius: 22px;
+    padding: 22px 24px;
+    box-shadow: 0 24px 70px rgba(0, 0, 0, 0.28);
+}
+.app-brand__eyebrow {
+    text-transform: uppercase;
+    letter-spacing: 0.24em;
+    font-size: 0.78rem;
+    color: var(--gold);
+    margin-bottom: 12px;
+}
+.app-brand__title {
+    font-size: 3rem;
+    font-weight: 800;
+    line-height: 0.98;
+    margin: 0;
+    color: #fff6e6;
+}
+.app-brand__subtitle {
+    margin: 14px 0 0;
+    color: var(--muted);
+    line-height: 1.6;
+    max-width: 46rem;
+}
+.app-meta__grid {
+    display: grid;
+    grid-template-columns: repeat(2, minmax(0, 1fr));
+    gap: 12px;
+}
+.meta-chip {
+    background: rgba(255, 255, 255, 0.03);
+    border: 1px solid rgba(255, 255, 255, 0.06);
+    border-radius: 16px;
+    padding: 14px 16px;
+}
+.meta-chip__label {
+    display: block;
+    color: var(--muted);
+    font-size: 0.78rem;
+    margin-bottom: 6px;
+}
+.meta-chip__value {
+    display: block;
+    color: #fff2db;
+    font-size: 1.1rem;
+    font-weight: 700;
+}
+.hero-card,
+.panel-card {
+    background: linear-gradient(180deg, rgba(34, 26, 19, 0.96), rgba(19, 14, 10, 0.96));
+    border: 1px solid var(--line);
+    border-radius: 20px;
+    padding: 20px;
+    box-shadow: 0 22px 60px rgba(0, 0, 0, 0.24);
+}
+.hero-card__eyebrow,
+.panel-card__title {
+    font-size: 0.82rem;
+    letter-spacing: 0.12em;
+    text-transform: uppercase;
+    color: var(--gold);
+    margin-bottom: 10px;
+}
+.hero-card__score {
+    font-size: clamp(2.8rem, 7vw, 4.8rem);
+    line-height: 0.95;
+    font-weight: 900;
+    color: #fff6e7;
+}
+.hero-card__title {
+    margin-top: 8px;
+    font-size: 1.4rem;
+    font-weight: 800;
+    color: #fff6e7;
+}
+.hero-card__subtitle {
+    margin-top: 8px;
+    color: var(--muted);
+    line-height: 1.6;
+}
+.hero-card__meta {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 10px;
+    margin-top: 16px;
+}
+.hero-card__meta span {
+    padding: 7px 12px;
+    border-radius: 999px;
+    background: rgba(255, 255, 255, 0.04);
+    border: 1px solid rgba(255, 255, 255, 0.08);
+    color: #f8ead4;
+    font-size: 0.88rem;
+}
+.hero-card.ai-high,
+.hero-card.ai-mid {
+    background: linear-gradient(150deg, rgba(84, 28, 20, 0.96), rgba(33, 15, 12, 0.97));
+}
+.hero-card.human-high,
+.hero-card.human-mid {
+    background: linear-gradient(150deg, rgba(23, 49, 34, 0.96), rgba(12, 23, 18, 0.97));
+}
+.hero-card.neutral {
+    background: linear-gradient(150deg, rgba(36, 29, 22, 0.96), rgba(17, 14, 11, 0.97));
+}
+.meter-row {
+    display: grid;
+    grid-template-columns: 170px minmax(0, 1fr) 54px;
+    gap: 12px;
+    align-items: center;
+    margin-top: 12px;
+}
+.meter-row__label {
+    color: #f7ecd8;
+    font-size: 0.92rem;
+}
+.meter-row__track {
+    position: relative;
+    height: 14px;
+    background: rgba(255, 255, 255, 0.06);
+    border-radius: 999px;
+    overflow: hidden;
+    border: 1px solid rgba(255, 255, 255, 0.08);
+}
+.meter-row__fill {
+    height: 100%;
+    border-radius: 999px;
+}
+.bar-warm {
+    background: linear-gradient(90deg, var(--gold), var(--danger));
+}
+.bar-mid {
+    background: linear-gradient(90deg, var(--gold), var(--gold-soft));
+}
+.bar-cool {
+    background: linear-gradient(90deg, #76c490, #5ca39b);
+}
+.meter-row__value {
+    color: var(--gold-soft);
+    font-weight: 700;
+    text-align: right;
+}
+.model-table {
+    width: 100%;
+    border-collapse: collapse;
+}
+.model-table th,
+.model-table td {
+    padding: 10px 12px;
+    text-align: left;
+    border-bottom: 1px solid rgba(255, 255, 255, 0.08);
+}
+.model-table th {
+    color: var(--gold);
+    font-size: 0.82rem;
+    text-transform: uppercase;
+    letter-spacing: 0.06em;
+}
+.model-table td {
+    color: #f6ead6;
+}
+.model-table tr.is-selected td {
+    background: rgba(223, 181, 111, 0.08);
+}
+.badge {
+    display: inline-block;
+    margin-left: 8px;
+    padding: 3px 8px;
+    border-radius: 999px;
+    font-size: 0.72rem;
+    color: #fff2db;
+    background: rgba(223, 181, 111, 0.18);
+    border: 1px solid rgba(223, 181, 111, 0.25);
+}
+.block {
+    border: 1px solid var(--line) !important;
+    border-radius: 18px !important;
+    background: var(--panel) !important;
+}
+.gr-button-primary {
+    background: linear-gradient(135deg, #d4a85c, #b46d3f) !important;
+    border: 0 !important;
+    color: #20150d !important;
+    font-weight: 800 !important;
+}
+.prose,
+.prose * {
+    color: var(--text) !important;
+}
+.prose table {
+    border-collapse: collapse;
+    width: 100%;
+}
+.prose th,
+.prose td {
+    padding: 8px 10px;
+    border: 1px solid rgba(255, 255, 255, 0.08);
+}
+.prose th {
+    background: rgba(255, 255, 255, 0.04);
+    color: var(--gold) !important;
+}
+footer {
+    display: none !important;
+}
+@media (max-width: 920px) {
+    .app-hero {
+        grid-template-columns: 1fr;
+    }
+    .app-meta__grid {
+        grid-template-columns: 1fr;
+    }
+    .meter-row {
+        grid-template-columns: 1fr;
+    }
+}
 """
+def _build_header_html() -> str:
+    dataset_summary = ARTIFACTS.dataset_summary
+    training_results = ARTIFACTS.training_results
+    top_generator = dataset_summary.get("generators", [["-", 0]])[0][0] if dataset_summary else "-"
+    return f"""
+    <section class="app-shell">
+      <div class="app-hero">
+        <div class="app-brand">
+          <div class="app-brand__eyebrow">AURIS local demo</div>
+          <h1 class="app-brand__title">AI Muzik Tespiti<br />Canli Analiz</h1>
+          <p class="app-brand__subtitle">
+            Demo arayuzu artik egitim artefact'lari ile ayni ozellik semasini kullaniyor.
+            Yani yukledigin ses, `DataSet/features.csv` ile egitilen modellerle birebir uyumlu
+            sekilde analiz ediliyor.
+          </p>
+        </div>
+        <div class="app-meta">
+          <div class="app-meta__grid">
+            <div class="meta-chip">
+              <span class="meta-chip__label">En iyi model</span>
+              <span class="meta-chip__value">{ARTIFACTS.best_model_name}</span>
+            </div>
+            <div class="meta-chip">
+              <span class="meta-chip__label">Model sayisi</span>
+              <span class="meta-chip__value">{len(ARTIFACTS.loaded_models)}</span>
+            </div>
+            <div class="meta-chip">
+              <span class="meta-chip__label">Veri seti</span>
+              <span class="meta-chip__value">{training_results.get('_n_samples', '?')} ornek</span>
+            </div>
+            <div class="meta-chip">
+              <span class="meta-chip__label">Baskin kaynak</span>
+              <span class="meta-chip__value">{top_generator}</span>
+            </div>
+          </div>
+        </div>
+      </div>
+    </section>
+    """
+with gr.Blocks(title="AURIS Local Demo") as demo:
+    gr.HTML(_build_header_html())
     with gr.Tabs():
         with gr.Tab("Analiz"):
+            with gr.Row(equal_height=False):
+                with gr.Column(scale=1, min_width=320):
                     audio_input = gr.Audio(
+                        label="Ses dosyasi yukle",
                         type="filepath",
                     )
+                    model_dropdown = gr.Dropdown(
+                        choices=ARTIFACTS.model_labels,
+                        value=ARTIFACTS.best_model_label,
+                        label="Calistirilacak model",
+                        interactive=True,
                     )
+                    analyze_button = gr.Button("Analizi baslat", variant="primary", size="lg")
+                    if _example_audio_paths():
+                        gr.Examples(
+                            examples=_example_audio_paths(),
+                            inputs=[audio_input],
+                            label="Hazir ornekler",
+                        )
+                with gr.Column(scale=2, min_width=520):
+                    result_html = gr.HTML()
+                    with gr.Row(equal_height=False):
+                        signal_html = gr.HTML()
+                        model_table_html = gr.HTML()
+                    details_md = gr.Markdown()
+            analyze_button.click(
+                fn=analyze_audio,
+                inputs=[audio_input, model_dropdown],
+                outputs=[result_html, signal_html, model_table_html, details_md],
             )
+        with gr.Tab("Modeller"):
+            gr.Markdown(build_models_md())
+        with gr.Tab("Veri Seti"):
+            gr.Markdown(build_dataset_md())
+        with gr.Tab("Gorseller"):
+            figure_paths = sorted(str(path) for path in FIGURES_DIR.glob("*.png")) if FIGURES_DIR.exists() else []
             if figure_paths:
                 gr.Gallery(
                     value=figure_paths,
+                    label="Akademik ciktılar",
                     columns=3,
                     height="auto",
                     object_fit="contain",
                 )
+            else:
+                gr.Markdown("Gorsel bulunamadi.")
+def _pick_available_port(preferred_port: int) -> int:
+    for port in range(preferred_port, preferred_port + 25):
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            sock.settimeout(0.2)
+            if sock.connect_ex(("127.0.0.1", port)) != 0:
+                return port
+    raise RuntimeError("Bos bir port bulunamadi.")
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run the AURIS local Gradio demo")
+    parser.add_argument("--host", default="127.0.0.1", help="Bind address")
+    parser.add_argument("--port", type=int, default=7864, help="Preferred port")
+    parser.add_argument(
+        "--no-browser",
+        action="store_true",
+        help="Do not open the browser automatically",
+    )
+    return parser.parse_args()
 if __name__ == "__main__":
+    args = _parse_args()
+    port = _pick_available_port(args.port)
+    local_host = "127.0.0.1" if args.host == "0.0.0.0" else args.host
+    print("AURIS local demo")
+    print(f"Host: {args.host}")
+    print(f"Port: {port}")
+    print(f"Open: http://{local_host}:{port}")
     demo.launch(
+        server_name=args.host,
+        server_port=port,
         share=False,
+        inbrowser=not args.no_browser,
+        show_error=True,
         css=AURIS_CSS,
     )

requirements.txt CHANGED Viewed

@@ -38,6 +38,7 @@ scikit-learn>=1.3.2
 aiohttp>=3.9.1
 httpx>=0.26.0
 requests>=2.31.0
 # === Gradio Client (FST Layer 3 - external API) ===
 gradio_client>=1.0.0
@@ -62,4 +63,4 @@ google-api-python-client>=2.134.0
 google-auth-httplib2>=0.2.0
 google-auth-oauthlib>=1.2.0
 isodate>=0.6.1
-youtube-transcript-api>=0.6.2

 aiohttp>=3.9.1
 httpx>=0.26.0
 requests>=2.31.0
+gradio>=6.0.0,<7.0.0
 # === Gradio Client (FST Layer 3 - external API) ===
 gradio_client>=1.0.0
 google-auth-httplib2>=0.2.0
 google-auth-oauthlib>=1.2.0
 isodate>=0.6.1
+youtube-transcript-api>=0.6.2