Rthur2003 commited on
Commit
4718070
·
1 Parent(s): 14fc9a2

fix: add ffmpeg decoding fallback for audio loading in analyze_vocals

Browse files
Files changed (1) hide show
  1. app/services/vocal_analyzer.py +32 -1
app/services/vocal_analyzer.py CHANGED
@@ -14,6 +14,8 @@ Key detection signals:
14
  from __future__ import annotations
15
 
16
  import io
 
 
17
  from dataclasses import dataclass, field
18
  from pathlib import Path
19
  from typing import Optional, Union
@@ -173,12 +175,41 @@ def analyze_vocals(
173
  # PRIVATE — Audio loading
174
  # ═══════════════════════════════════════════════════════════════════════
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  def _load_audio(
177
  source: Union[Path, bytes, io.BytesIO], target_sr: int
178
  ) -> tuple[np.ndarray, int]:
179
  if isinstance(source, bytes):
180
  source = io.BytesIO(source)
181
- y, sr = librosa.load(source, sr=target_sr, mono=True, duration=_DURATION_LIMIT)
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  if len(y) < sr:
183
  raise ValueError("Audio too short for vocal analysis (< 1s)")
184
  return y, sr
 
14
  from __future__ import annotations
15
 
16
  import io
17
+ import subprocess
18
+ import tempfile
19
  from dataclasses import dataclass, field
20
  from pathlib import Path
21
  from typing import Optional, Union
 
175
  # PRIVATE — Audio loading
176
  # ═══════════════════════════════════════════════════════════════════════
177
 
178
+ def _ffmpeg_decode(data: bytes) -> io.BytesIO:
179
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
180
+ tmp_path = tmp.name
181
+ try:
182
+ result = subprocess.run(
183
+ ["ffmpeg", "-y", "-i", "pipe:0", "-ar", "22050", "-ac", "1", "-f", "wav", tmp_path],
184
+ input=data, capture_output=True, timeout=30,
185
+ )
186
+ if result.returncode != 0:
187
+ raise RuntimeError(f"ffmpeg failed: {result.stderr.decode()[:200]}")
188
+ with open(tmp_path, "rb") as f:
189
+ return io.BytesIO(f.read())
190
+ finally:
191
+ Path(tmp_path).unlink(missing_ok=True)
192
+
193
+
194
  def _load_audio(
195
  source: Union[Path, bytes, io.BytesIO], target_sr: int
196
  ) -> tuple[np.ndarray, int]:
197
  if isinstance(source, bytes):
198
  source = io.BytesIO(source)
199
+
200
+ if isinstance(source, io.BytesIO):
201
+ raw_bytes = source.read()
202
+ source = io.BytesIO(raw_bytes)
203
+ else:
204
+ raw_bytes = None
205
+
206
+ try:
207
+ y, sr = librosa.load(source, sr=target_sr, mono=True, duration=_DURATION_LIMIT)
208
+ except Exception:
209
+ if raw_bytes is None:
210
+ raise
211
+ y, sr = librosa.load(_ffmpeg_decode(raw_bytes), sr=target_sr, mono=True, duration=_DURATION_LIMIT)
212
+
213
  if len(y) < sr:
214
  raise ValueError("Audio too short for vocal analysis (< 1s)")
215
  return y, sr