Spaces:
Running
on
Zero
Running
on
Zero
Y Phung Nguyen
commited on
Commit
·
2506ee7
1
Parent(s):
4ea2fc7
Use Maya1 TTS
Browse files
ui.py
CHANGED
|
@@ -6,7 +6,14 @@ from config import TITLE, DESCRIPTION, CSS, MEDSWIN_MODELS, DEFAULT_MEDICAL_MODE
|
|
| 6 |
from indexing import create_or_update_index
|
| 7 |
from pipeline import stream_chat
|
| 8 |
from voice import transcribe_audio, generate_speech
|
| 9 |
-
from models import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from logger import logger
|
| 11 |
|
| 12 |
|
|
@@ -314,7 +321,7 @@ def create_demo():
|
|
| 314 |
return "⚠️ Model not loaded. Click to load or it will load on first use.", False
|
| 315 |
|
| 316 |
# GPU-decorated function to load model on startup
|
| 317 |
-
@spaces.GPU(max_duration=120)
|
| 318 |
def load_default_model_on_startup():
|
| 319 |
"""Load default medical model on startup (GPU-decorated for ZeroGPU compatibility)"""
|
| 320 |
try:
|
|
@@ -336,6 +343,20 @@ def create_demo():
|
|
| 336 |
logger.error(f"Error in model loading startup: {e}")
|
| 337 |
return f"⚠️ Startup loading error: {str(e)[:100]}"
|
| 338 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
# Initialize status on load
|
| 340 |
def init_model_status():
|
| 341 |
status_text, is_ready = check_model_status(DEFAULT_MEDICAL_MODEL)
|
|
@@ -357,11 +378,17 @@ def create_demo():
|
|
| 357 |
outputs=[model_status, submit_button, message_input]
|
| 358 |
)
|
| 359 |
|
| 360 |
-
# Load default model on startup (GPU-decorated function)
|
| 361 |
demo.load(
|
| 362 |
fn=load_default_model_on_startup,
|
| 363 |
outputs=[model_status]
|
| 364 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
# Wrap stream_chat to check model status before execution
|
| 367 |
def stream_chat_with_model_check(
|
|
|
|
| 6 |
from indexing import create_or_update_index
|
| 7 |
from pipeline import stream_chat
|
| 8 |
from voice import transcribe_audio, generate_speech
|
| 9 |
+
from models import (
|
| 10 |
+
initialize_medical_model,
|
| 11 |
+
is_model_loaded,
|
| 12 |
+
get_model_loading_state,
|
| 13 |
+
set_model_loading_state,
|
| 14 |
+
initialize_tts_model,
|
| 15 |
+
TTS_AVAILABLE,
|
| 16 |
+
)
|
| 17 |
from logger import logger
|
| 18 |
|
| 19 |
|
|
|
|
| 321 |
return "⚠️ Model not loaded. Click to load or it will load on first use.", False
|
| 322 |
|
| 323 |
# GPU-decorated function to load model on startup
|
| 324 |
+
# @spaces.GPU(max_duration=120)
|
| 325 |
def load_default_model_on_startup():
|
| 326 |
"""Load default medical model on startup (GPU-decorated for ZeroGPU compatibility)"""
|
| 327 |
try:
|
|
|
|
| 343 |
logger.error(f"Error in model loading startup: {e}")
|
| 344 |
return f"⚠️ Startup loading error: {str(e)[:100]}"
|
| 345 |
|
| 346 |
+
# GPU-decorated function to load default TTS model on startup
|
| 347 |
+
# @spaces.GPU(max_duration=120)
|
| 348 |
+
def load_tts_model_on_startup():
|
| 349 |
+
"""Load default TTS model (maya1) on startup"""
|
| 350 |
+
try:
|
| 351 |
+
if not TTS_AVAILABLE:
|
| 352 |
+
logger.warning("TTS library not installed; skipping TTS preload.")
|
| 353 |
+
return
|
| 354 |
+
logger.info("Loading default TTS model (maya1) on startup...")
|
| 355 |
+
initialize_tts_model()
|
| 356 |
+
logger.info("✅ Default TTS model (maya1) loaded successfully on startup!")
|
| 357 |
+
except Exception as e:
|
| 358 |
+
logger.error(f"Error in TTS model loading startup: {e}")
|
| 359 |
+
|
| 360 |
# Initialize status on load
|
| 361 |
def init_model_status():
|
| 362 |
status_text, is_ready = check_model_status(DEFAULT_MEDICAL_MODEL)
|
|
|
|
| 378 |
outputs=[model_status, submit_button, message_input]
|
| 379 |
)
|
| 380 |
|
| 381 |
+
# Load default medical model on startup (GPU-decorated function)
|
| 382 |
demo.load(
|
| 383 |
fn=load_default_model_on_startup,
|
| 384 |
outputs=[model_status]
|
| 385 |
)
|
| 386 |
+
# Load default TTS model (maya1) on startup (GPU-decorated function)
|
| 387 |
+
demo.load(
|
| 388 |
+
fn=load_tts_model_on_startup,
|
| 389 |
+
inputs=None,
|
| 390 |
+
outputs=None
|
| 391 |
+
)
|
| 392 |
|
| 393 |
# Wrap stream_chat to check model status before execution
|
| 394 |
def stream_chat_with_model_check(
|
voice.py
CHANGED
|
@@ -111,7 +111,7 @@ def transcribe_audio(audio):
|
|
| 111 |
return ""
|
| 112 |
|
| 113 |
async def generate_speech_mcp(text: str) -> str:
|
| 114 |
-
"""Generate speech using MCP text_to_speech tool"""
|
| 115 |
if not MCP_AVAILABLE:
|
| 116 |
return None
|
| 117 |
|
|
@@ -163,37 +163,49 @@ async def generate_speech_mcp(text: str) -> str:
|
|
| 163 |
logger.warning(f"MCP TTS error: {e}")
|
| 164 |
return None
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
def generate_speech(text: str):
|
| 167 |
-
"""Generate speech from text using TTS model (with MCP fallback)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
if not text or len(text.strip()) == 0:
|
| 169 |
return None
|
| 170 |
|
| 171 |
-
if MCP_AVAILABLE:
|
| 172 |
-
try:
|
| 173 |
-
loop = asyncio.get_event_loop()
|
| 174 |
-
if loop.is_running():
|
| 175 |
-
if nest_asyncio:
|
| 176 |
-
audio_path = nest_asyncio.run(generate_speech_mcp(text))
|
| 177 |
-
if audio_path:
|
| 178 |
-
logger.info("Generated speech via MCP")
|
| 179 |
-
return audio_path
|
| 180 |
-
else:
|
| 181 |
-
audio_path = loop.run_until_complete(generate_speech_mcp(text))
|
| 182 |
-
if audio_path:
|
| 183 |
-
return audio_path
|
| 184 |
-
except Exception as e:
|
| 185 |
-
pass
|
| 186 |
-
|
| 187 |
if not TTS_AVAILABLE:
|
| 188 |
logger.error("TTS library not installed. Please install TTS to use voice generation.")
|
| 189 |
-
|
|
|
|
| 190 |
|
| 191 |
if config.global_tts_model is None:
|
| 192 |
initialize_tts_model()
|
| 193 |
|
| 194 |
if config.global_tts_model is None:
|
| 195 |
logger.error("TTS model not available. Please check dependencies.")
|
| 196 |
-
return
|
| 197 |
|
| 198 |
try:
|
| 199 |
wav = config.global_tts_model.tts(text)
|
|
@@ -201,6 +213,6 @@ def generate_speech(text: str):
|
|
| 201 |
sf.write(tmp_file.name, wav, samplerate=22050)
|
| 202 |
return tmp_file.name
|
| 203 |
except Exception as e:
|
| 204 |
-
logger.error(f"TTS error: {e}")
|
| 205 |
-
return
|
| 206 |
|
|
|
|
| 111 |
return ""
|
| 112 |
|
| 113 |
async def generate_speech_mcp(text: str) -> str:
|
| 114 |
+
"""Generate speech using MCP text_to_speech tool (fallback path)."""
|
| 115 |
if not MCP_AVAILABLE:
|
| 116 |
return None
|
| 117 |
|
|
|
|
| 163 |
logger.warning(f"MCP TTS error: {e}")
|
| 164 |
return None
|
| 165 |
|
| 166 |
+
|
| 167 |
+
def _generate_speech_via_mcp(text: str):
|
| 168 |
+
"""Helper to generate speech via MCP in a synchronous context."""
|
| 169 |
+
if not MCP_AVAILABLE:
|
| 170 |
+
return None
|
| 171 |
+
try:
|
| 172 |
+
loop = asyncio.get_event_loop()
|
| 173 |
+
if loop.is_running():
|
| 174 |
+
if nest_asyncio:
|
| 175 |
+
audio_path = nest_asyncio.run(generate_speech_mcp(text))
|
| 176 |
+
else:
|
| 177 |
+
logger.error("nest_asyncio not available for nested async TTS via MCP")
|
| 178 |
+
return None
|
| 179 |
+
else:
|
| 180 |
+
audio_path = loop.run_until_complete(generate_speech_mcp(text))
|
| 181 |
+
if audio_path:
|
| 182 |
+
logger.info("Generated speech via MCP")
|
| 183 |
+
return audio_path
|
| 184 |
+
except Exception as e:
|
| 185 |
+
logger.warning(f"MCP TTS error (sync wrapper): {e}")
|
| 186 |
+
return None
|
| 187 |
+
|
| 188 |
def generate_speech(text: str):
|
| 189 |
+
"""Generate speech from text using local maya1 TTS model (with MCP fallback).
|
| 190 |
+
|
| 191 |
+
The primary path uses the local TTS model (maya-research/maya1). MCP-based
|
| 192 |
+
TTS is only used as a last-resort fallback if the local model is unavailable
|
| 193 |
+
or fails.
|
| 194 |
+
"""
|
| 195 |
if not text or len(text.strip()) == 0:
|
| 196 |
return None
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
if not TTS_AVAILABLE:
|
| 199 |
logger.error("TTS library not installed. Please install TTS to use voice generation.")
|
| 200 |
+
# As a last resort, try MCP-based TTS if available
|
| 201 |
+
return _generate_speech_via_mcp(text)
|
| 202 |
|
| 203 |
if config.global_tts_model is None:
|
| 204 |
initialize_tts_model()
|
| 205 |
|
| 206 |
if config.global_tts_model is None:
|
| 207 |
logger.error("TTS model not available. Please check dependencies.")
|
| 208 |
+
return _generate_speech_via_mcp(text)
|
| 209 |
|
| 210 |
try:
|
| 211 |
wav = config.global_tts_model.tts(text)
|
|
|
|
| 213 |
sf.write(tmp_file.name, wav, samplerate=22050)
|
| 214 |
return tmp_file.name
|
| 215 |
except Exception as e:
|
| 216 |
+
logger.error(f"TTS error (local maya1): {e}")
|
| 217 |
+
return _generate_speech_via_mcp(text)
|
| 218 |
|