Spaces:
Running
on
Zero
Running
on
Zero
xieli
commited on
Commit
Β·
5bdad28
1
Parent(s):
8a0d894
feat: fix
Browse files
app.py
CHANGED
|
@@ -41,6 +41,23 @@ whisper_asr = None
|
|
| 41 |
args_global = None
|
| 42 |
_model_lock = threading.Lock() # Thread lock for model initialization
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
def initialize_models():
|
| 45 |
"""Initialize models on first GPU call (ZeroGPU optimization: load inside GPU context)"""
|
| 46 |
global encoder, common_tts_engine, args_global
|
|
@@ -437,29 +454,19 @@ class EditxTab:
|
|
| 437 |
def transcribe_audio(self, audio_input, current_text):
|
| 438 |
"""Transcribe audio using Whisper ASR when prompt text is empty"""
|
| 439 |
global whisper_asr
|
| 440 |
-
|
| 441 |
# Only transcribe if current text is empty
|
| 442 |
if current_text and current_text.strip():
|
| 443 |
return current_text # Keep existing text
|
| 444 |
-
|
| 445 |
if not audio_input:
|
| 446 |
return "" # No audio to transcribe
|
|
|
|
|
|
|
| 447 |
|
| 448 |
try:
|
| 449 |
-
# Initialize whisper if not already loaded
|
| 450 |
-
if whisper_asr is None:
|
| 451 |
-
if args_global is None:
|
| 452 |
-
self.logger.error("Global args not set. Cannot initialize Whisper.")
|
| 453 |
-
return ""
|
| 454 |
-
|
| 455 |
-
whisper_asr = WhisperWrapper()
|
| 456 |
-
self.logger.info("β WhisperWrapper initialized for ASR")
|
| 457 |
-
|
| 458 |
# Transcribe audio
|
| 459 |
transcribed_text = whisper_asr(audio_input)
|
| 460 |
self.logger.info(f"Audio transcribed: {transcribed_text}")
|
| 461 |
return transcribed_text
|
| 462 |
-
|
| 463 |
except Exception as e:
|
| 464 |
self.logger.error(f"Failed to transcribe audio: {e}")
|
| 465 |
return ""
|
|
|
|
| 41 |
args_global = None
|
| 42 |
_model_lock = threading.Lock() # Thread lock for model initialization
|
| 43 |
|
| 44 |
+
|
| 45 |
+
def initialize_whisper():
|
| 46 |
+
global whisper_asr
|
| 47 |
+
if whisper_asr is not None:
|
| 48 |
+
return
|
| 49 |
+
with _model_lock:
|
| 50 |
+
if whisper_asr is not None:
|
| 51 |
+
return
|
| 52 |
+
try:
|
| 53 |
+
whisper_asr = WhisperWrapper()
|
| 54 |
+
logger.info("β WhisperWrapper initialized for ASR")
|
| 55 |
+
except Exception as e:
|
| 56 |
+
logger.error(f"β Error loading Whisper ASR model: {e}")
|
| 57 |
+
raise
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
| 61 |
def initialize_models():
|
| 62 |
"""Initialize models on first GPU call (ZeroGPU optimization: load inside GPU context)"""
|
| 63 |
global encoder, common_tts_engine, args_global
|
|
|
|
| 454 |
def transcribe_audio(self, audio_input, current_text):
|
| 455 |
"""Transcribe audio using Whisper ASR when prompt text is empty"""
|
| 456 |
global whisper_asr
|
|
|
|
| 457 |
# Only transcribe if current text is empty
|
| 458 |
if current_text and current_text.strip():
|
| 459 |
return current_text # Keep existing text
|
|
|
|
| 460 |
if not audio_input:
|
| 461 |
return "" # No audio to transcribe
|
| 462 |
+
if whisper_asr is None:
|
| 463 |
+
initialize_whisper()
|
| 464 |
|
| 465 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
# Transcribe audio
|
| 467 |
transcribed_text = whisper_asr(audio_input)
|
| 468 |
self.logger.info(f"Audio transcribed: {transcribed_text}")
|
| 469 |
return transcribed_text
|
|
|
|
| 470 |
except Exception as e:
|
| 471 |
self.logger.error(f"Failed to transcribe audio: {e}")
|
| 472 |
return ""
|