Spaces:

Rthur2003
/

crowncode-backend

Sleeping

App Files Files Community

Rthur2003 commited on Mar 28

Commit

b74a8cb

1 Parent(s): bc1975b

feat: implement batch feature extraction for AURIS training pipeline

Browse files

Files changed (1) hide show

app/training/extract_features_batch.py +205 -0

app/training/extract_features_batch.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""
+Batch feature extraction for AURIS training pipeline.
+Runs feature_extractor and vocal_analyzer on every sample
+in a manifest CSV, collecting RAW features (not heuristic
+scores) into a single parquet/CSV for classifier training.
+"""
+from __future__ import annotations
+import csv
+import io
+import sys
+import traceback
+from pathlib import Path
+import numpy as np
+# Add parent to path for imports
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+from app.services.feature_extractor import extract_features
+from app.services.vocal_analyzer import analyze_vocals
+# All raw features we extract per sample
+FEATURE_COLUMNS = [
+    # From feature_extractor (raw metrics)
+    "duration_sec",
+    "sample_rate",
+    "rms_energy",
+    "tempo_bpm",
+    "tempo_stability",
+    "spectral_centroid_mean",
+    "spectral_centroid_std",
+    "spectral_flatness_mean",
+    "mfcc_variance",
+    "chroma_entropy",
+    "harmonic_ratio",
+    "zero_crossing_rate",
+    # Heuristic scores (kept as features, not as truth)
+    "spectral_regularity",
+    "temporal_patterns",
+    "harmonic_structure",
+    # From vocal_analyzer (raw metrics)
+    "has_vocals",
+    "vocal_confidence",
+    "vocal_ai_score",
+    "pitch_stability_score",
+    "vibrato_regularity_score",
+    "formant_consistency_score",
+    "breath_pattern_score",
+    "vocal_texture_score",
+    "pitch_mean_hz",
+    "pitch_std_cents",
+    "vibrato_rate_hz",
+    "vibrato_extent_cents",
+    "vocal_harmonic_ratio",
+    "vocal_energy_ratio",
+]
+def extract_sample_features(audio_path: str) -> dict | None:
+    """
+    Extract all raw features from a single audio file.
+    Returns dict of feature_name -> float, or None on failure.
+    """
+    try:
+        path = Path(audio_path)
+        # Feature extraction
+        feat = extract_features(path)
+        row = {
+            "duration_sec": feat.duration_sec,
+            "sample_rate": feat.sample_rate,
+            "rms_energy": feat.rms_energy,
+            "tempo_bpm": feat.tempo_bpm,
+            "tempo_stability": feat.tempo_stability,
+            "spectral_centroid_mean": feat.spectral_centroid_mean,
+            "spectral_centroid_std": feat.spectral_centroid_std,
+            "spectral_flatness_mean": feat.spectral_flatness_mean,
+            "mfcc_variance": feat.mfcc_variance,
+            "chroma_entropy": feat.chroma_entropy,
+            "harmonic_ratio": feat.harmonic_ratio,
+            "zero_crossing_rate": feat.zero_crossing_rate,
+            "spectral_regularity": feat.spectral_regularity,
+            "temporal_patterns": feat.temporal_patterns,
+            "harmonic_structure": feat.harmonic_structure,
+        }
+        # Vocal analysis
+        try:
+            vocals = analyze_vocals(path)
+            row.update({
+                "has_vocals": 1.0 if vocals.has_vocals else 0.0,
+                "vocal_confidence": vocals.vocal_confidence,
+                "vocal_ai_score": vocals.vocal_ai_score,
+                "pitch_stability_score": vocals.pitch_stability_score,
+                "vibrato_regularity_score": vocals.vibrato_regularity_score,
+                "formant_consistency_score": vocals.formant_consistency_score,
+                "breath_pattern_score": vocals.breath_pattern_score,
+                "vocal_texture_score": vocals.vocal_texture_score,
+                "pitch_mean_hz": vocals.pitch_mean_hz,
+                "pitch_std_cents": vocals.pitch_std_cents,
+                "vibrato_rate_hz": vocals.vibrato_rate_hz,
+                "vibrato_extent_cents": vocals.vibrato_extent_cents,
+                "vocal_harmonic_ratio": vocals.vocal_harmonic_ratio,
+                "vocal_energy_ratio": vocals.vocal_energy_ratio,
+            })
+        except Exception:
+            # Fill vocal features with defaults
+            row.update({
+                "has_vocals": 0.0,
+                "vocal_confidence": 0.0,
+                "vocal_ai_score": 0.0,
+                "pitch_stability_score": 0.0,
+                "vibrato_regularity_score": 0.0,
+                "formant_consistency_score": 0.0,
+                "breath_pattern_score": 0.0,
+                "vocal_texture_score": 0.0,
+                "pitch_mean_hz": 0.0,
+                "pitch_std_cents": 0.0,
+                "vibrato_rate_hz": 0.0,
+                "vibrato_extent_cents": 0.0,
+                "vocal_harmonic_ratio": 0.0,
+                "vocal_energy_ratio": 0.0,
+            })
+        return row
+    except Exception as e:
+        print(f"  FAILED: {audio_path}: {e}")
+        return None
+def extract_batch(
+    manifest_path: str | Path,
+    output_path: str | Path | None = None,
+) -> Path:
+    """
+    Extract features for all samples in a manifest.
+    Args:
+        manifest_path: Path to manifest CSV with file_path, label_int.
+        output_path: Path for output CSV. Default: same dir, features.csv.
+    Returns:
+        Path to the output features CSV.
+    """
+    manifest_path = Path(manifest_path)
+    if output_path is None:
+        output_path = manifest_path.parent / "features.csv"
+    output_path = Path(output_path)
+    # Read manifest
+    samples = []
+    with open(manifest_path, "r", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            samples.append(row)
+    print(f"Extracting features from {len(samples)} samples...")
+    out_columns = ["file_path", "label_int"] + FEATURE_COLUMNS
+    success = 0
+    failed = 0
+    with open(output_path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=out_columns)
+        writer.writeheader()
+        for i, sample in enumerate(samples):
+            audio_path = sample["file_path"]
+            label_int = int(sample["label_int"])
+            features = extract_sample_features(audio_path)
+            if features is None:
+                failed += 1
+                continue
+            features["file_path"] = audio_path
+            features["label_int"] = label_int
+            writer.writerow(features)
+            success += 1
+            if (i + 1) % 50 == 0:
+                print(
+                    f"  [{i + 1}/{len(samples)}] "
+                    f"success={success}, failed={failed}"
+                )
+    print(
+        f"\nDone: {success} extracted, "
+        f"{failed} failed"
+    )
+    print(f"Output: {output_path}")
+    return output_path
+if __name__ == "__main__":
+    manifest = sys.argv[1] if len(sys.argv) > 1 else "data/sonics/manifest.csv"
+    out = sys.argv[2] if len(sys.argv) > 2 else None
+    extract_batch(manifest, out)