Y Phung Nguyen commited on
Commit
2506ee7
·
1 Parent(s): 4ea2fc7

Use Maya1 TTS

Browse files
Files changed (2) hide show
  1. ui.py +30 -3
  2. voice.py +34 -22
ui.py CHANGED
@@ -6,7 +6,14 @@ from config import TITLE, DESCRIPTION, CSS, MEDSWIN_MODELS, DEFAULT_MEDICAL_MODE
6
  from indexing import create_or_update_index
7
  from pipeline import stream_chat
8
  from voice import transcribe_audio, generate_speech
9
- from models import initialize_medical_model, is_model_loaded, get_model_loading_state, set_model_loading_state
 
 
 
 
 
 
 
10
  from logger import logger
11
 
12
 
@@ -314,7 +321,7 @@ def create_demo():
314
  return "⚠️ Model not loaded. Click to load or it will load on first use.", False
315
 
316
  # GPU-decorated function to load model on startup
317
- @spaces.GPU(max_duration=120)
318
  def load_default_model_on_startup():
319
  """Load default medical model on startup (GPU-decorated for ZeroGPU compatibility)"""
320
  try:
@@ -336,6 +343,20 @@ def create_demo():
336
  logger.error(f"Error in model loading startup: {e}")
337
  return f"⚠️ Startup loading error: {str(e)[:100]}"
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  # Initialize status on load
340
  def init_model_status():
341
  status_text, is_ready = check_model_status(DEFAULT_MEDICAL_MODEL)
@@ -357,11 +378,17 @@ def create_demo():
357
  outputs=[model_status, submit_button, message_input]
358
  )
359
 
360
- # Load default model on startup (GPU-decorated function)
361
  demo.load(
362
  fn=load_default_model_on_startup,
363
  outputs=[model_status]
364
  )
 
 
 
 
 
 
365
 
366
  # Wrap stream_chat to check model status before execution
367
  def stream_chat_with_model_check(
 
6
  from indexing import create_or_update_index
7
  from pipeline import stream_chat
8
  from voice import transcribe_audio, generate_speech
9
+ from models import (
10
+ initialize_medical_model,
11
+ is_model_loaded,
12
+ get_model_loading_state,
13
+ set_model_loading_state,
14
+ initialize_tts_model,
15
+ TTS_AVAILABLE,
16
+ )
17
  from logger import logger
18
 
19
 
 
321
  return "⚠️ Model not loaded. Click to load or it will load on first use.", False
322
 
323
  # GPU-decorated function to load model on startup
324
+ # @spaces.GPU(max_duration=120)
325
  def load_default_model_on_startup():
326
  """Load default medical model on startup (GPU-decorated for ZeroGPU compatibility)"""
327
  try:
 
343
  logger.error(f"Error in model loading startup: {e}")
344
  return f"⚠️ Startup loading error: {str(e)[:100]}"
345
 
346
+ # GPU-decorated function to load default TTS model on startup
347
+ # @spaces.GPU(max_duration=120)
348
+ def load_tts_model_on_startup():
349
+ """Load default TTS model (maya1) on startup"""
350
+ try:
351
+ if not TTS_AVAILABLE:
352
+ logger.warning("TTS library not installed; skipping TTS preload.")
353
+ return
354
+ logger.info("Loading default TTS model (maya1) on startup...")
355
+ initialize_tts_model()
356
+ logger.info("✅ Default TTS model (maya1) loaded successfully on startup!")
357
+ except Exception as e:
358
+ logger.error(f"Error in TTS model loading startup: {e}")
359
+
360
  # Initialize status on load
361
  def init_model_status():
362
  status_text, is_ready = check_model_status(DEFAULT_MEDICAL_MODEL)
 
378
  outputs=[model_status, submit_button, message_input]
379
  )
380
 
381
+ # Load default medical model on startup (GPU-decorated function)
382
  demo.load(
383
  fn=load_default_model_on_startup,
384
  outputs=[model_status]
385
  )
386
+ # Load default TTS model (maya1) on startup (GPU-decorated function)
387
+ demo.load(
388
+ fn=load_tts_model_on_startup,
389
+ inputs=None,
390
+ outputs=None
391
+ )
392
 
393
  # Wrap stream_chat to check model status before execution
394
  def stream_chat_with_model_check(
voice.py CHANGED
@@ -111,7 +111,7 @@ def transcribe_audio(audio):
111
  return ""
112
 
113
  async def generate_speech_mcp(text: str) -> str:
114
- """Generate speech using MCP text_to_speech tool"""
115
  if not MCP_AVAILABLE:
116
  return None
117
 
@@ -163,37 +163,49 @@ async def generate_speech_mcp(text: str) -> str:
163
  logger.warning(f"MCP TTS error: {e}")
164
  return None
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  def generate_speech(text: str):
167
- """Generate speech from text using TTS model (with MCP fallback)"""
 
 
 
 
 
168
  if not text or len(text.strip()) == 0:
169
  return None
170
 
171
- if MCP_AVAILABLE:
172
- try:
173
- loop = asyncio.get_event_loop()
174
- if loop.is_running():
175
- if nest_asyncio:
176
- audio_path = nest_asyncio.run(generate_speech_mcp(text))
177
- if audio_path:
178
- logger.info("Generated speech via MCP")
179
- return audio_path
180
- else:
181
- audio_path = loop.run_until_complete(generate_speech_mcp(text))
182
- if audio_path:
183
- return audio_path
184
- except Exception as e:
185
- pass
186
-
187
  if not TTS_AVAILABLE:
188
  logger.error("TTS library not installed. Please install TTS to use voice generation.")
189
- return None
 
190
 
191
  if config.global_tts_model is None:
192
  initialize_tts_model()
193
 
194
  if config.global_tts_model is None:
195
  logger.error("TTS model not available. Please check dependencies.")
196
- return None
197
 
198
  try:
199
  wav = config.global_tts_model.tts(text)
@@ -201,6 +213,6 @@ def generate_speech(text: str):
201
  sf.write(tmp_file.name, wav, samplerate=22050)
202
  return tmp_file.name
203
  except Exception as e:
204
- logger.error(f"TTS error: {e}")
205
- return None
206
 
 
111
  return ""
112
 
113
  async def generate_speech_mcp(text: str) -> str:
114
+ """Generate speech using MCP text_to_speech tool (fallback path)."""
115
  if not MCP_AVAILABLE:
116
  return None
117
 
 
163
  logger.warning(f"MCP TTS error: {e}")
164
  return None
165
 
166
+
167
+ def _generate_speech_via_mcp(text: str):
168
+ """Helper to generate speech via MCP in a synchronous context."""
169
+ if not MCP_AVAILABLE:
170
+ return None
171
+ try:
172
+ loop = asyncio.get_event_loop()
173
+ if loop.is_running():
174
+ if nest_asyncio:
175
+ audio_path = nest_asyncio.run(generate_speech_mcp(text))
176
+ else:
177
+ logger.error("nest_asyncio not available for nested async TTS via MCP")
178
+ return None
179
+ else:
180
+ audio_path = loop.run_until_complete(generate_speech_mcp(text))
181
+ if audio_path:
182
+ logger.info("Generated speech via MCP")
183
+ return audio_path
184
+ except Exception as e:
185
+ logger.warning(f"MCP TTS error (sync wrapper): {e}")
186
+ return None
187
+
188
  def generate_speech(text: str):
189
+ """Generate speech from text using local maya1 TTS model (with MCP fallback).
190
+
191
+ The primary path uses the local TTS model (maya-research/maya1). MCP-based
192
+ TTS is only used as a last-resort fallback if the local model is unavailable
193
+ or fails.
194
+ """
195
  if not text or len(text.strip()) == 0:
196
  return None
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  if not TTS_AVAILABLE:
199
  logger.error("TTS library not installed. Please install TTS to use voice generation.")
200
+ # As a last resort, try MCP-based TTS if available
201
+ return _generate_speech_via_mcp(text)
202
 
203
  if config.global_tts_model is None:
204
  initialize_tts_model()
205
 
206
  if config.global_tts_model is None:
207
  logger.error("TTS model not available. Please check dependencies.")
208
+ return _generate_speech_via_mcp(text)
209
 
210
  try:
211
  wav = config.global_tts_model.tts(text)
 
213
  sf.write(tmp_file.name, wav, samplerate=22050)
214
  return tmp_file.name
215
  except Exception as e:
216
+ logger.error(f"TTS error (local maya1): {e}")
217
+ return _generate_speech_via_mcp(text)
218