Rthur2003 commited on
Commit
7f1ed48
·
1 Parent(s): 7b2d19a

feat: enhance feature extraction with additional spectral and temporal metrics

Browse files
app/services/feature_extractor.py CHANGED
@@ -124,15 +124,35 @@ def extract_features(
124
  duration_sec=duration_sec,
125
  sample_rate=actual_sr,
126
  rms_energy=spectral["rms_mean"],
 
127
  tempo_bpm=temporal["tempo_bpm"],
128
  tempo_stability=temporal["tempo_stability"],
 
129
  spectral_centroid_mean=spectral["centroid_mean"],
130
  spectral_centroid_std=spectral["centroid_std"],
131
  spectral_flatness_mean=spectral["flatness_mean"],
 
 
 
 
 
 
 
132
  mfcc_variance=spectral["mfcc_variance"],
 
 
133
  chroma_entropy=harmonic["chroma_entropy"],
 
 
134
  harmonic_ratio=harmonic["harmonic_ratio"],
 
135
  zero_crossing_rate=temporal["zcr_mean"],
 
 
 
 
 
 
136
  )
137
 
138
 
 
124
  duration_sec=duration_sec,
125
  sample_rate=actual_sr,
126
  rms_energy=spectral["rms_mean"],
127
+ rms_std=spectral["rms_std"],
128
  tempo_bpm=temporal["tempo_bpm"],
129
  tempo_stability=temporal["tempo_stability"],
130
+ tempo_cv=temporal["tempo_cv"],
131
  spectral_centroid_mean=spectral["centroid_mean"],
132
  spectral_centroid_std=spectral["centroid_std"],
133
  spectral_flatness_mean=spectral["flatness_mean"],
134
+ spectral_flatness_std=spectral["flatness_std"],
135
+ spectral_bandwidth_mean=spectral["bandwidth_mean"],
136
+ spectral_bandwidth_std=spectral["bandwidth_std"],
137
+ spectral_rolloff_mean=spectral["rolloff_mean"],
138
+ spectral_rolloff_std=spectral["rolloff_std"],
139
+ spectral_contrast_mean=spectral["contrast_mean"],
140
+ spectral_contrast_std=spectral["contrast_std"],
141
  mfcc_variance=spectral["mfcc_variance"],
142
+ mfcc_delta_var=spectral["mfcc_delta_var"],
143
+ mfcc_delta2_var=spectral["mfcc_delta2_var"],
144
  chroma_entropy=harmonic["chroma_entropy"],
145
+ chroma_std=harmonic["chroma_std"],
146
+ chroma_transition_rate=harmonic["chroma_transition_rate"],
147
  harmonic_ratio=harmonic["harmonic_ratio"],
148
+ tonnetz_std=harmonic["tonnetz_std"],
149
  zero_crossing_rate=temporal["zcr_mean"],
150
+ zero_crossing_std=temporal["zcr_std"],
151
+ onset_strength_mean=temporal["onset_mean"],
152
+ onset_strength_std=temporal["onset_std"],
153
+ rms_dynamic_range=temporal["rms_dynamic_range"],
154
+ beat_count=temporal["beat_count"],
155
+ mel_flatness=spectral["mel_flatness"],
156
  )
157
 
158
 
app/training/extract_features_batch.py CHANGED
@@ -27,26 +27,49 @@ from app.services.feature_extractor import extract_features
27
  from app.services.vocal_analyzer import analyze_vocals
28
 
29
 
30
- # All raw features we extract per sample
31
  FEATURE_COLUMNS = [
32
- # From feature_extractor (raw metrics)
33
  "duration_sec",
34
  "sample_rate",
 
35
  "rms_energy",
36
- "tempo_bpm",
37
- "tempo_stability",
38
  "spectral_centroid_mean",
39
  "spectral_centroid_std",
40
  "spectral_flatness_mean",
 
 
 
 
 
 
 
41
  "mfcc_variance",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  "chroma_entropy",
 
 
43
  "harmonic_ratio",
44
- "zero_crossing_rate",
45
- # Heuristic scores (kept as features, not as truth)
46
  "spectral_regularity",
47
  "temporal_patterns",
48
  "harmonic_structure",
49
- # From vocal_analyzer (raw metrics)
50
  "has_vocals",
51
  "vocal_confidence",
52
  "vocal_ai_score",
@@ -73,21 +96,41 @@ def extract_sample_features(audio_path: str) -> dict | None:
73
  try:
74
  path = Path(audio_path)
75
 
76
- # Feature extraction
77
  feat = extract_features(path)
78
  row = {
79
  "duration_sec": feat.duration_sec,
80
  "sample_rate": feat.sample_rate,
81
  "rms_energy": feat.rms_energy,
82
- "tempo_bpm": feat.tempo_bpm,
83
- "tempo_stability": feat.tempo_stability,
84
  "spectral_centroid_mean": feat.spectral_centroid_mean,
85
  "spectral_centroid_std": feat.spectral_centroid_std,
86
  "spectral_flatness_mean": feat.spectral_flatness_mean,
 
 
 
 
 
 
 
87
  "mfcc_variance": feat.mfcc_variance,
 
 
 
 
 
 
 
 
 
 
 
 
88
  "chroma_entropy": feat.chroma_entropy,
 
 
89
  "harmonic_ratio": feat.harmonic_ratio,
90
- "zero_crossing_rate": feat.zero_crossing_rate,
91
  "spectral_regularity": feat.spectral_regularity,
92
  "temporal_patterns": feat.temporal_patterns,
93
  "harmonic_structure": feat.harmonic_structure,
 
27
  from app.services.vocal_analyzer import analyze_vocals
28
 
29
 
30
+ # All raw features we extract per sample — comprehensive set for paper
31
  FEATURE_COLUMNS = [
32
+ # ── Basic metadata ──────────────────────────────────────────
33
  "duration_sec",
34
  "sample_rate",
35
+ # ── Spectral features ───────────────────────────────────────
36
  "rms_energy",
37
+ "rms_std",
 
38
  "spectral_centroid_mean",
39
  "spectral_centroid_std",
40
  "spectral_flatness_mean",
41
+ "spectral_flatness_std",
42
+ "spectral_bandwidth_mean",
43
+ "spectral_bandwidth_std",
44
+ "spectral_rolloff_mean",
45
+ "spectral_rolloff_std",
46
+ "spectral_contrast_mean",
47
+ "spectral_contrast_std",
48
  "mfcc_variance",
49
+ "mfcc_delta_var",
50
+ "mfcc_delta2_var",
51
+ "mel_flatness",
52
+ # ── Temporal / rhythm features ──────────────────────────────
53
+ "tempo_bpm",
54
+ "tempo_stability",
55
+ "tempo_cv",
56
+ "zero_crossing_rate",
57
+ "zero_crossing_std",
58
+ "onset_strength_mean",
59
+ "onset_strength_std",
60
+ "rms_dynamic_range",
61
+ "beat_count",
62
+ # ── Harmonic / tonal features ───────────────────────────────
63
  "chroma_entropy",
64
+ "chroma_std",
65
+ "chroma_transition_rate",
66
  "harmonic_ratio",
67
+ "tonnetz_std",
68
+ # ── Heuristic composite scores (kept as features) ───────────
69
  "spectral_regularity",
70
  "temporal_patterns",
71
  "harmonic_structure",
72
+ # ── Vocal analysis features ─────────────────────────────────
73
  "has_vocals",
74
  "vocal_confidence",
75
  "vocal_ai_score",
 
96
  try:
97
  path = Path(audio_path)
98
 
99
+ # Feature extraction — all fields from AudioFeatures dataclass
100
  feat = extract_features(path)
101
  row = {
102
  "duration_sec": feat.duration_sec,
103
  "sample_rate": feat.sample_rate,
104
  "rms_energy": feat.rms_energy,
105
+ "rms_std": feat.rms_std,
 
106
  "spectral_centroid_mean": feat.spectral_centroid_mean,
107
  "spectral_centroid_std": feat.spectral_centroid_std,
108
  "spectral_flatness_mean": feat.spectral_flatness_mean,
109
+ "spectral_flatness_std": feat.spectral_flatness_std,
110
+ "spectral_bandwidth_mean": feat.spectral_bandwidth_mean,
111
+ "spectral_bandwidth_std": feat.spectral_bandwidth_std,
112
+ "spectral_rolloff_mean": feat.spectral_rolloff_mean,
113
+ "spectral_rolloff_std": feat.spectral_rolloff_std,
114
+ "spectral_contrast_mean": feat.spectral_contrast_mean,
115
+ "spectral_contrast_std": feat.spectral_contrast_std,
116
  "mfcc_variance": feat.mfcc_variance,
117
+ "mfcc_delta_var": feat.mfcc_delta_var,
118
+ "mfcc_delta2_var": feat.mfcc_delta2_var,
119
+ "mel_flatness": feat.mel_flatness,
120
+ "tempo_bpm": feat.tempo_bpm,
121
+ "tempo_stability": feat.tempo_stability,
122
+ "tempo_cv": feat.tempo_cv,
123
+ "zero_crossing_rate": feat.zero_crossing_rate,
124
+ "zero_crossing_std": feat.zero_crossing_std,
125
+ "onset_strength_mean": feat.onset_strength_mean,
126
+ "onset_strength_std": feat.onset_strength_std,
127
+ "rms_dynamic_range": feat.rms_dynamic_range,
128
+ "beat_count": feat.beat_count,
129
  "chroma_entropy": feat.chroma_entropy,
130
+ "chroma_std": feat.chroma_std,
131
+ "chroma_transition_rate": feat.chroma_transition_rate,
132
  "harmonic_ratio": feat.harmonic_ratio,
133
+ "tonnetz_std": feat.tonnetz_std,
134
  "spectral_regularity": feat.spectral_regularity,
135
  "temporal_patterns": feat.temporal_patterns,
136
  "harmonic_structure": feat.harmonic_structure,