Rthur2003 commited on
Commit
b74a8cb
·
1 Parent(s): bc1975b

feat: implement batch feature extraction for AURIS training pipeline

Browse files
app/training/extract_features_batch.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Batch feature extraction for AURIS training pipeline.
3
+
4
+ Runs feature_extractor and vocal_analyzer on every sample
5
+ in a manifest CSV, collecting RAW features (not heuristic
6
+ scores) into a single parquet/CSV for classifier training.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import csv
12
+ import io
13
+ import sys
14
+ import traceback
15
+ from pathlib import Path
16
+
17
+ import numpy as np
18
+
19
+ # Add parent to path for imports
20
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
21
+
22
+ from app.services.feature_extractor import extract_features
23
+ from app.services.vocal_analyzer import analyze_vocals
24
+
25
+
26
+ # All raw features we extract per sample
27
+ FEATURE_COLUMNS = [
28
+ # From feature_extractor (raw metrics)
29
+ "duration_sec",
30
+ "sample_rate",
31
+ "rms_energy",
32
+ "tempo_bpm",
33
+ "tempo_stability",
34
+ "spectral_centroid_mean",
35
+ "spectral_centroid_std",
36
+ "spectral_flatness_mean",
37
+ "mfcc_variance",
38
+ "chroma_entropy",
39
+ "harmonic_ratio",
40
+ "zero_crossing_rate",
41
+ # Heuristic scores (kept as features, not as truth)
42
+ "spectral_regularity",
43
+ "temporal_patterns",
44
+ "harmonic_structure",
45
+ # From vocal_analyzer (raw metrics)
46
+ "has_vocals",
47
+ "vocal_confidence",
48
+ "vocal_ai_score",
49
+ "pitch_stability_score",
50
+ "vibrato_regularity_score",
51
+ "formant_consistency_score",
52
+ "breath_pattern_score",
53
+ "vocal_texture_score",
54
+ "pitch_mean_hz",
55
+ "pitch_std_cents",
56
+ "vibrato_rate_hz",
57
+ "vibrato_extent_cents",
58
+ "vocal_harmonic_ratio",
59
+ "vocal_energy_ratio",
60
+ ]
61
+
62
+
63
+ def extract_sample_features(audio_path: str) -> dict | None:
64
+ """
65
+ Extract all raw features from a single audio file.
66
+
67
+ Returns dict of feature_name -> float, or None on failure.
68
+ """
69
+ try:
70
+ path = Path(audio_path)
71
+
72
+ # Feature extraction
73
+ feat = extract_features(path)
74
+ row = {
75
+ "duration_sec": feat.duration_sec,
76
+ "sample_rate": feat.sample_rate,
77
+ "rms_energy": feat.rms_energy,
78
+ "tempo_bpm": feat.tempo_bpm,
79
+ "tempo_stability": feat.tempo_stability,
80
+ "spectral_centroid_mean": feat.spectral_centroid_mean,
81
+ "spectral_centroid_std": feat.spectral_centroid_std,
82
+ "spectral_flatness_mean": feat.spectral_flatness_mean,
83
+ "mfcc_variance": feat.mfcc_variance,
84
+ "chroma_entropy": feat.chroma_entropy,
85
+ "harmonic_ratio": feat.harmonic_ratio,
86
+ "zero_crossing_rate": feat.zero_crossing_rate,
87
+ "spectral_regularity": feat.spectral_regularity,
88
+ "temporal_patterns": feat.temporal_patterns,
89
+ "harmonic_structure": feat.harmonic_structure,
90
+ }
91
+
92
+ # Vocal analysis
93
+ try:
94
+ vocals = analyze_vocals(path)
95
+ row.update({
96
+ "has_vocals": 1.0 if vocals.has_vocals else 0.0,
97
+ "vocal_confidence": vocals.vocal_confidence,
98
+ "vocal_ai_score": vocals.vocal_ai_score,
99
+ "pitch_stability_score": vocals.pitch_stability_score,
100
+ "vibrato_regularity_score": vocals.vibrato_regularity_score,
101
+ "formant_consistency_score": vocals.formant_consistency_score,
102
+ "breath_pattern_score": vocals.breath_pattern_score,
103
+ "vocal_texture_score": vocals.vocal_texture_score,
104
+ "pitch_mean_hz": vocals.pitch_mean_hz,
105
+ "pitch_std_cents": vocals.pitch_std_cents,
106
+ "vibrato_rate_hz": vocals.vibrato_rate_hz,
107
+ "vibrato_extent_cents": vocals.vibrato_extent_cents,
108
+ "vocal_harmonic_ratio": vocals.vocal_harmonic_ratio,
109
+ "vocal_energy_ratio": vocals.vocal_energy_ratio,
110
+ })
111
+ except Exception:
112
+ # Fill vocal features with defaults
113
+ row.update({
114
+ "has_vocals": 0.0,
115
+ "vocal_confidence": 0.0,
116
+ "vocal_ai_score": 0.0,
117
+ "pitch_stability_score": 0.0,
118
+ "vibrato_regularity_score": 0.0,
119
+ "formant_consistency_score": 0.0,
120
+ "breath_pattern_score": 0.0,
121
+ "vocal_texture_score": 0.0,
122
+ "pitch_mean_hz": 0.0,
123
+ "pitch_std_cents": 0.0,
124
+ "vibrato_rate_hz": 0.0,
125
+ "vibrato_extent_cents": 0.0,
126
+ "vocal_harmonic_ratio": 0.0,
127
+ "vocal_energy_ratio": 0.0,
128
+ })
129
+
130
+ return row
131
+
132
+ except Exception as e:
133
+ print(f" FAILED: {audio_path}: {e}")
134
+ return None
135
+
136
+
137
+ def extract_batch(
138
+ manifest_path: str | Path,
139
+ output_path: str | Path | None = None,
140
+ ) -> Path:
141
+ """
142
+ Extract features for all samples in a manifest.
143
+
144
+ Args:
145
+ manifest_path: Path to manifest CSV with file_path, label_int.
146
+ output_path: Path for output CSV. Default: same dir, features.csv.
147
+
148
+ Returns:
149
+ Path to the output features CSV.
150
+ """
151
+ manifest_path = Path(manifest_path)
152
+ if output_path is None:
153
+ output_path = manifest_path.parent / "features.csv"
154
+ output_path = Path(output_path)
155
+
156
+ # Read manifest
157
+ samples = []
158
+ with open(manifest_path, "r", encoding="utf-8") as f:
159
+ reader = csv.DictReader(f)
160
+ for row in reader:
161
+ samples.append(row)
162
+
163
+ print(f"Extracting features from {len(samples)} samples...")
164
+
165
+ out_columns = ["file_path", "label_int"] + FEATURE_COLUMNS
166
+ success = 0
167
+ failed = 0
168
+
169
+ with open(output_path, "w", newline="", encoding="utf-8") as f:
170
+ writer = csv.DictWriter(f, fieldnames=out_columns)
171
+ writer.writeheader()
172
+
173
+ for i, sample in enumerate(samples):
174
+ audio_path = sample["file_path"]
175
+ label_int = int(sample["label_int"])
176
+
177
+ features = extract_sample_features(audio_path)
178
+ if features is None:
179
+ failed += 1
180
+ continue
181
+
182
+ features["file_path"] = audio_path
183
+ features["label_int"] = label_int
184
+ writer.writerow(features)
185
+ success += 1
186
+
187
+ if (i + 1) % 50 == 0:
188
+ print(
189
+ f" [{i + 1}/{len(samples)}] "
190
+ f"success={success}, failed={failed}"
191
+ )
192
+
193
+ print(
194
+ f"\nDone: {success} extracted, "
195
+ f"{failed} failed"
196
+ )
197
+ print(f"Output: {output_path}")
198
+
199
+ return output_path
200
+
201
+
202
+ if __name__ == "__main__":
203
+ manifest = sys.argv[1] if len(sys.argv) > 1 else "data/sonics/manifest.csv"
204
+ out = sys.argv[2] if len(sys.argv) > 2 else None
205
+ extract_batch(manifest, out)