Kokoro-TTS

Runtime error

App Files Files Community

Pendrokar commited on Sep 27

Commit

c26c08b

1 Parent(s): ffe17d7

IPA re-insert

Browse files

Files changed (1) hide show

app.py +15 -3

app.py CHANGED Viewed

@@ -56,9 +56,10 @@ def text_to_ipa(text, lang='en-us'):
     try:
         # Handle IPA sections within brackets
-        regex = r"\[[^\]]*\]"
         ipa_sections = re.findall(regex, text)
-        text = re.sub(regex, '[]', text)
         print(text)
         if lang == 'jb':
@@ -75,7 +76,8 @@ def text_to_ipa(text, lang='en-us'):
         # Add back IPA sections
         for ipa in ipa_sections:
-            ps = ps.replace('[ ]', ipa, 1)
         return ps
     except Exception as e:
@@ -115,6 +117,16 @@ def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE, lang
 # Arena API
 def predict(text, voice='af_heart', speed=1):
     return generate_first(text, voice, speed, use_gpu=False)[0]
 def tokenize_first(text, voice='af_heart', lang='en-us'):

     try:
         # Handle IPA sections within brackets
+        regex = r"\([^\]]*\)[[^\]]*\]"
         ipa_sections = re.findall(regex, text)
+        print(text)
+        text = re.sub(regex, '()[]', text)
         print(text)
         if lang == 'jb':
         # Add back IPA sections
         for ipa in ipa_sections:
+            ps = ps.replace('( )[ ]', ipa, 1)
+        print(ps)
         return ps
     except Exception as e:
 # Arena API
 def predict(text, voice='af_heart', speed=1):
+    """ Convert the text to speech using StyleTTS 2.
+        Args:
+            text: string; accepts IPA within ()[] brackets
+            voice: Literal['af_heart', 'af_bella', 'af_nicole', 'af_aoede', 'af_kore', 'af_sarah', 'af_nova', 'af_sky', 'af_alloy', 'af_jessica', 'af_river', 'am_michael', 'am_fenrir', 'am_puck', 'am_echo', 'am_eric', 'am_liam', 'am_onyx', 'am_santa', 'am_adam', 'bf_emma', 'bf_isabella', 'bf_alice', 'bf_lily', 'bm_george', 'bm_fable', 'bm_lewis', 'bm_daniel']; voice model
+            lang: Literal['en-us', 'cs', 'da', 'nl', 'et', 'fi', 'fr', 'de', 'el', 'it', 'no', 'pl', 'pt', 'ru', 'sl', 'es', 'sv', 'tr', 'jb']; ISO 639-1 code for the text language; 'jb' is a valid code for Lojban
+            speed: talkback speed; 0.5-2
+        Returns: Tuple of (output_audio_path, ipa_results) where output_audio_path is the filepath of output audio
+    """
     return generate_first(text, voice, speed, use_gpu=False)[0]
 def tokenize_first(text, voice='af_heart', lang='en-us'):