Spaces:
Sleeping
Sleeping
feat: integrate CLAP and FST detection into score fusion process
Browse files- app/routes/analyze.py +51 -2
app/routes/analyze.py
CHANGED
|
@@ -331,8 +331,57 @@ async def _analyze_file(
|
|
| 331 |
logger.warning(f"[{request_id}] Vocal analysis failed: {e}")
|
| 332 |
warnings.append("vocal_analysis_unavailable")
|
| 333 |
|
| 334 |
-
# ── Step 3:
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
processing_time = round(time.monotonic() - start_time, 3)
|
| 338 |
logger.info(
|
|
|
|
| 331 |
logger.warning(f"[{request_id}] Vocal analysis failed: {e}")
|
| 332 |
warnings.append("vocal_analysis_unavailable")
|
| 333 |
|
| 334 |
+
# ── Step 3: CLAP detection (Layer 2) ────
|
| 335 |
+
clap_result = None
|
| 336 |
+
try:
|
| 337 |
+
audio_bytes_clap = io.BytesIO(content)
|
| 338 |
+
clap_result = clap_service.predict(audio_bytes_clap)
|
| 339 |
+
if clap_result.available:
|
| 340 |
+
logger.info(
|
| 341 |
+
f"[{request_id}] CLAP: "
|
| 342 |
+
f"ai={clap_result.is_ai}, "
|
| 343 |
+
f"conf={clap_result.confidence:.3f}"
|
| 344 |
+
)
|
| 345 |
+
else:
|
| 346 |
+
logger.info(
|
| 347 |
+
f"[{request_id}] CLAP unavailable: "
|
| 348 |
+
f"{clap_result.error}"
|
| 349 |
+
)
|
| 350 |
+
except Exception as e:
|
| 351 |
+
logger.warning(
|
| 352 |
+
f"[{request_id}] CLAP failed: {e}"
|
| 353 |
+
)
|
| 354 |
+
warnings.append("clap_analysis_unavailable")
|
| 355 |
+
|
| 356 |
+
# ── Step 4: FST detection (Layer 3) ────
|
| 357 |
+
fst_result = None
|
| 358 |
+
try:
|
| 359 |
+
audio_bytes_fst = io.BytesIO(content)
|
| 360 |
+
fst_result = await fst_service.predict(audio_bytes_fst)
|
| 361 |
+
if fst_result.available:
|
| 362 |
+
logger.info(
|
| 363 |
+
f"[{request_id}] FST: "
|
| 364 |
+
f"ai={fst_result.is_ai}, "
|
| 365 |
+
f"conf={fst_result.confidence:.3f}, "
|
| 366 |
+
f"label={fst_result.label}"
|
| 367 |
+
)
|
| 368 |
+
else:
|
| 369 |
+
logger.info(
|
| 370 |
+
f"[{request_id}] FST unavailable: "
|
| 371 |
+
f"{fst_result.error}"
|
| 372 |
+
)
|
| 373 |
+
except Exception as e:
|
| 374 |
+
logger.warning(
|
| 375 |
+
f"[{request_id}] FST failed: {e}"
|
| 376 |
+
)
|
| 377 |
+
warnings.append("fst_analysis_unavailable")
|
| 378 |
+
|
| 379 |
+
# ── Step 5: Score fusion ───────────────
|
| 380 |
+
fusion: FusionResult = fuse_scores(
|
| 381 |
+
features, vocals,
|
| 382 |
+
clap_result=clap_result,
|
| 383 |
+
fst_result=fst_result,
|
| 384 |
+
)
|
| 385 |
|
| 386 |
processing_time = round(time.monotonic() - start_time, 3)
|
| 387 |
logger.info(
|