Spaces:
Sleeping
Sleeping
fix: add ffmpeg decoding fallback for audio loading in analyze_vocals
Browse files
app/services/vocal_analyzer.py
CHANGED
|
@@ -14,6 +14,8 @@ Key detection signals:
|
|
| 14 |
from __future__ import annotations
|
| 15 |
|
| 16 |
import io
|
|
|
|
|
|
|
| 17 |
from dataclasses import dataclass, field
|
| 18 |
from pathlib import Path
|
| 19 |
from typing import Optional, Union
|
|
@@ -173,12 +175,41 @@ def analyze_vocals(
|
|
| 173 |
# PRIVATE — Audio loading
|
| 174 |
# ═══════════════════════════════════════════════════════════════════════
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
def _load_audio(
|
| 177 |
source: Union[Path, bytes, io.BytesIO], target_sr: int
|
| 178 |
) -> tuple[np.ndarray, int]:
|
| 179 |
if isinstance(source, bytes):
|
| 180 |
source = io.BytesIO(source)
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
if len(y) < sr:
|
| 183 |
raise ValueError("Audio too short for vocal analysis (< 1s)")
|
| 184 |
return y, sr
|
|
|
|
| 14 |
from __future__ import annotations
|
| 15 |
|
| 16 |
import io
|
| 17 |
+
import subprocess
|
| 18 |
+
import tempfile
|
| 19 |
from dataclasses import dataclass, field
|
| 20 |
from pathlib import Path
|
| 21 |
from typing import Optional, Union
|
|
|
|
| 175 |
# PRIVATE — Audio loading
|
| 176 |
# ═══════════════════════════════════════════════════════════════════════
|
| 177 |
|
| 178 |
+
def _ffmpeg_decode(data: bytes) -> io.BytesIO:
|
| 179 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
| 180 |
+
tmp_path = tmp.name
|
| 181 |
+
try:
|
| 182 |
+
result = subprocess.run(
|
| 183 |
+
["ffmpeg", "-y", "-i", "pipe:0", "-ar", "22050", "-ac", "1", "-f", "wav", tmp_path],
|
| 184 |
+
input=data, capture_output=True, timeout=30,
|
| 185 |
+
)
|
| 186 |
+
if result.returncode != 0:
|
| 187 |
+
raise RuntimeError(f"ffmpeg failed: {result.stderr.decode()[:200]}")
|
| 188 |
+
with open(tmp_path, "rb") as f:
|
| 189 |
+
return io.BytesIO(f.read())
|
| 190 |
+
finally:
|
| 191 |
+
Path(tmp_path).unlink(missing_ok=True)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
def _load_audio(
|
| 195 |
source: Union[Path, bytes, io.BytesIO], target_sr: int
|
| 196 |
) -> tuple[np.ndarray, int]:
|
| 197 |
if isinstance(source, bytes):
|
| 198 |
source = io.BytesIO(source)
|
| 199 |
+
|
| 200 |
+
if isinstance(source, io.BytesIO):
|
| 201 |
+
raw_bytes = source.read()
|
| 202 |
+
source = io.BytesIO(raw_bytes)
|
| 203 |
+
else:
|
| 204 |
+
raw_bytes = None
|
| 205 |
+
|
| 206 |
+
try:
|
| 207 |
+
y, sr = librosa.load(source, sr=target_sr, mono=True, duration=_DURATION_LIMIT)
|
| 208 |
+
except Exception:
|
| 209 |
+
if raw_bytes is None:
|
| 210 |
+
raise
|
| 211 |
+
y, sr = librosa.load(_ffmpeg_decode(raw_bytes), sr=target_sr, mono=True, duration=_DURATION_LIMIT)
|
| 212 |
+
|
| 213 |
if len(y) < sr:
|
| 214 |
raise ValueError("Audio too short for vocal analysis (< 1s)")
|
| 215 |
return y, sr
|