xieli commited on
Commit
5bdad28
Β·
1 Parent(s): 8a0d894
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -41,6 +41,23 @@ whisper_asr = None
41
  args_global = None
42
  _model_lock = threading.Lock() # Thread lock for model initialization
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def initialize_models():
45
  """Initialize models on first GPU call (ZeroGPU optimization: load inside GPU context)"""
46
  global encoder, common_tts_engine, args_global
@@ -437,29 +454,19 @@ class EditxTab:
437
  def transcribe_audio(self, audio_input, current_text):
438
  """Transcribe audio using Whisper ASR when prompt text is empty"""
439
  global whisper_asr
440
-
441
  # Only transcribe if current text is empty
442
  if current_text and current_text.strip():
443
  return current_text # Keep existing text
444
-
445
  if not audio_input:
446
  return "" # No audio to transcribe
 
 
447
 
448
  try:
449
- # Initialize whisper if not already loaded
450
- if whisper_asr is None:
451
- if args_global is None:
452
- self.logger.error("Global args not set. Cannot initialize Whisper.")
453
- return ""
454
-
455
- whisper_asr = WhisperWrapper()
456
- self.logger.info("βœ“ WhisperWrapper initialized for ASR")
457
-
458
  # Transcribe audio
459
  transcribed_text = whisper_asr(audio_input)
460
  self.logger.info(f"Audio transcribed: {transcribed_text}")
461
  return transcribed_text
462
-
463
  except Exception as e:
464
  self.logger.error(f"Failed to transcribe audio: {e}")
465
  return ""
 
41
  args_global = None
42
  _model_lock = threading.Lock() # Thread lock for model initialization
43
 
44
+
45
+ def initialize_whisper():
46
+ global whisper_asr
47
+ if whisper_asr is not None:
48
+ return
49
+ with _model_lock:
50
+ if whisper_asr is not None:
51
+ return
52
+ try:
53
+ whisper_asr = WhisperWrapper()
54
+ logger.info("βœ“ WhisperWrapper initialized for ASR")
55
+ except Exception as e:
56
+ logger.error(f"❌ Error loading Whisper ASR model: {e}")
57
+ raise
58
+
59
+
60
+
61
  def initialize_models():
62
  """Initialize models on first GPU call (ZeroGPU optimization: load inside GPU context)"""
63
  global encoder, common_tts_engine, args_global
 
454
  def transcribe_audio(self, audio_input, current_text):
455
  """Transcribe audio using Whisper ASR when prompt text is empty"""
456
  global whisper_asr
 
457
  # Only transcribe if current text is empty
458
  if current_text and current_text.strip():
459
  return current_text # Keep existing text
 
460
  if not audio_input:
461
  return "" # No audio to transcribe
462
+ if whisper_asr is None:
463
+ initialize_whisper()
464
 
465
  try:
 
 
 
 
 
 
 
 
 
466
  # Transcribe audio
467
  transcribed_text = whisper_asr(audio_input)
468
  self.logger.info(f"Audio transcribed: {transcribed_text}")
469
  return transcribed_text
 
470
  except Exception as e:
471
  self.logger.error(f"Failed to transcribe audio: {e}")
472
  return ""