Spaces:

Rthur2003
/

crowncode-backend

Sleeping

App Files Files Community

Rthur2003 commited on 18 days ago

Commit

4718070

1 Parent(s): 14fc9a2

fix: add ffmpeg decoding fallback for audio loading in analyze_vocals

Browse files

Files changed (1) hide show

app/services/vocal_analyzer.py +32 -1

app/services/vocal_analyzer.py CHANGED Viewed

@@ -14,6 +14,8 @@ Key detection signals:
 from __future__ import annotations
 import io
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional, Union
@@ -173,12 +175,41 @@ def analyze_vocals(
 # PRIVATE — Audio loading
 # ═══════════════════════════════════════════════════════════════════════
 def _load_audio(
     source: Union[Path, bytes, io.BytesIO], target_sr: int
 ) -> tuple[np.ndarray, int]:
     if isinstance(source, bytes):
         source = io.BytesIO(source)
-    y, sr = librosa.load(source, sr=target_sr, mono=True, duration=_DURATION_LIMIT)
     if len(y) < sr:
         raise ValueError("Audio too short for vocal analysis (< 1s)")
     return y, sr

 from __future__ import annotations
 import io
+import subprocess
+import tempfile
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional, Union
 # PRIVATE — Audio loading
 # ═══════════════════════════════════════════════════════════════════════
+def _ffmpeg_decode(data: bytes) -> io.BytesIO:
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        tmp_path = tmp.name
+    try:
+        result = subprocess.run(
+            ["ffmpeg", "-y", "-i", "pipe:0", "-ar", "22050", "-ac", "1", "-f", "wav", tmp_path],
+            input=data, capture_output=True, timeout=30,
+        )
+        if result.returncode != 0:
+            raise RuntimeError(f"ffmpeg failed: {result.stderr.decode()[:200]}")
+        with open(tmp_path, "rb") as f:
+            return io.BytesIO(f.read())
+    finally:
+        Path(tmp_path).unlink(missing_ok=True)
 def _load_audio(
     source: Union[Path, bytes, io.BytesIO], target_sr: int
 ) -> tuple[np.ndarray, int]:
     if isinstance(source, bytes):
         source = io.BytesIO(source)
+    if isinstance(source, io.BytesIO):
+        raw_bytes = source.read()
+        source = io.BytesIO(raw_bytes)
+    else:
+        raw_bytes = None
+    try:
+        y, sr = librosa.load(source, sr=target_sr, mono=True, duration=_DURATION_LIMIT)
+    except Exception:
+        if raw_bytes is None:
+            raise
+        y, sr = librosa.load(_ffmpeg_decode(raw_bytes), sr=target_sr, mono=True, duration=_DURATION_LIMIT)
     if len(y) < sr:
         raise ValueError("Audio too short for vocal analysis (< 1s)")
     return y, sr