Spaces:

stepfun-ai
/

Step-Audio-EditX

Running on Zero

xieli commited on Nov 10

Commit

5bdad28

1 Parent(s): 8a0d894

feat: fix

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,6 +41,23 @@ whisper_asr = None
 args_global = None
 _model_lock = threading.Lock()  # Thread lock for model initialization
 def initialize_models():
     """Initialize models on first GPU call (ZeroGPU optimization: load inside GPU context)"""
     global encoder, common_tts_engine, args_global
@@ -437,29 +454,19 @@ class EditxTab:
     def transcribe_audio(self, audio_input, current_text):
         """Transcribe audio using Whisper ASR when prompt text is empty"""
         global whisper_asr
         # Only transcribe if current text is empty
         if current_text and current_text.strip():
             return current_text  # Keep existing text
         if not audio_input:
             return ""  # No audio to transcribe
         try:
-            # Initialize whisper if not already loaded
-            if whisper_asr is None:
-                if args_global is None:
-                    self.logger.error("Global args not set. Cannot initialize Whisper.")
-                    return ""
-                whisper_asr = WhisperWrapper()
-                self.logger.info("✓ WhisperWrapper initialized for ASR")
             # Transcribe audio
             transcribed_text = whisper_asr(audio_input)
             self.logger.info(f"Audio transcribed: {transcribed_text}")
             return transcribed_text
         except Exception as e:
             self.logger.error(f"Failed to transcribe audio: {e}")
             return ""

 args_global = None
 _model_lock = threading.Lock()  # Thread lock for model initialization
+def initialize_whisper():
+    global whisper_asr
+    if whisper_asr is not None:
+        return
+    with _model_lock:
+        if whisper_asr is not None:
+            return
+        try:
+            whisper_asr = WhisperWrapper()
+            logger.info("✓ WhisperWrapper initialized for ASR")
+        except Exception as e:
+            logger.error(f"❌ Error loading Whisper ASR model: {e}")
+            raise
 def initialize_models():
     """Initialize models on first GPU call (ZeroGPU optimization: load inside GPU context)"""
     global encoder, common_tts_engine, args_global
     def transcribe_audio(self, audio_input, current_text):
         """Transcribe audio using Whisper ASR when prompt text is empty"""
         global whisper_asr
         # Only transcribe if current text is empty
         if current_text and current_text.strip():
             return current_text  # Keep existing text
         if not audio_input:
             return ""  # No audio to transcribe
+        if whisper_asr is None:
+            initialize_whisper()
         try:
             # Transcribe audio
             transcribed_text = whisper_asr(audio_input)
             self.logger.info(f"Audio transcribed: {transcribed_text}")
             return transcribed_text
         except Exception as e:
             self.logger.error(f"Failed to transcribe audio: {e}")
             return ""