| from __future__ import annotations |
|
|
| import tempfile |
| from functools import lru_cache |
| from pathlib import Path |
|
|
| import gradio as gr |
| import numpy as np |
| import soundfile as sf |
| from kokoro import KPipeline |
|
|
| SPACE_TITLE = "VoiceMM TTS API" |
| SAMPLE_RATE = 24_000 |
| MAX_CHARS = 450 |
|
|
| VOICE_OPTIONS = { |
| "pf_dora": "Dora, feminina e clara", |
| "pm_alex": "Alex, masculina e neutra", |
| "pm_santa": "Santa, masculina e encorpada", |
| } |
|
|
| EXAMPLES = [ |
| [ |
| "Seu produto ficou pronto. Agora ele tem uma voz que passa confianca, ritmo e presenca.", |
| "pf_dora", |
| 1.0, |
| ], |
| [ |
| "Apresente sua startup em vinte segundos: problema, promessa e chamada para acao.", |
| "pm_alex", |
| 1.05, |
| ], |
| [ |
| "Bem-vindo ao VoiceMM. Transforme roteiro em audio com uma interface simples e bonita.", |
| "pm_santa", |
| 0.95, |
| ], |
| ] |
|
|
| CSS = """ |
| .gradio-container { |
| background: |
| radial-gradient(circle at top left, rgba(237, 180, 93, 0.18), transparent 30%), |
| radial-gradient(circle at top right, rgba(33, 181, 168, 0.12), transparent 28%), |
| #0f1518; |
| } |
| |
| .voicelek-shell { |
| max-width: 1024px; |
| margin: 0 auto; |
| } |
| |
| .voicelek-kicker { |
| letter-spacing: 0.18em; |
| text-transform: uppercase; |
| color: #efbf74; |
| font-size: 0.8rem; |
| } |
| """ |
|
|
|
|
| @lru_cache(maxsize=8) |
| def get_pipeline(lang_code: str) -> KPipeline: |
| return KPipeline(lang_code=lang_code) |
|
|
|
|
| def normalize_text(text: str) -> str: |
| cleaned = " ".join((text or "").split()) |
| if not cleaned: |
| raise gr.Error("Digite algum texto antes de gerar o audio.") |
| if len(cleaned) > MAX_CHARS: |
| raise gr.Error( |
| f"Use no maximo {MAX_CHARS} caracteres por vez para manter a latencia boa no plano gratis." |
| ) |
| return cleaned |
|
|
|
|
| def synthesize(text: str, voice: str, speed: float) -> tuple[str, str]: |
| cleaned = normalize_text(text) |
| pipeline = get_pipeline(voice[0]) |
|
|
| chunks: list[np.ndarray] = [] |
| for _, _, audio in pipeline(cleaned, voice=voice, speed=float(speed)): |
| chunks.append(np.asarray(audio, dtype=np.float32)) |
|
|
| if not chunks: |
| raise gr.Error("O modelo nao conseguiu gerar audio para esse texto.") |
|
|
| waveform = np.concatenate(chunks) |
| output_dir = Path(tempfile.mkdtemp(prefix="voicelek_")) |
| output_path = output_dir / "voicelek-output.wav" |
| sf.write(output_path, waveform, SAMPLE_RATE) |
|
|
| duration_seconds = len(waveform) / SAMPLE_RATE |
| details = ( |
| f"**Voz:** {VOICE_OPTIONS[voice]} \n" |
| f"**Velocidade:** {speed:.2f}x \n" |
| f"**Entrada:** {len(cleaned)} caracteres \n" |
| f"**Duracao estimada:** {duration_seconds:.1f}s" |
| ) |
| return str(output_path), details |
|
|
|
|
| with gr.Blocks(title=SPACE_TITLE) as demo: |
| with gr.Column(elem_classes="voicelek-shell"): |
| gr.Markdown( |
| """ |
| <div class="voicelek-kicker">VoiceMM</div> |
| # API de TTS em portugues brasileiro |
| |
| Esta Space foi pensada para ser o backend de um frontend estatico no GitHub Pages. |
| O endpoint publico principal e `"/synthesize"`. |
| """, |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=3): |
| text_input = gr.Textbox( |
| label="Texto", |
| lines=8, |
| max_lines=12, |
| placeholder="Cole aqui sua copy, roteiro, CTA ou locucao curta.", |
| value=EXAMPLES[0][0], |
| ) |
| with gr.Column(scale=2): |
| voice_input = gr.Dropdown( |
| choices=[(label, key) for key, label in VOICE_OPTIONS.items()], |
| value="pf_dora", |
| label="Voz", |
| ) |
| speed_input = gr.Slider( |
| minimum=0.8, |
| maximum=1.25, |
| value=1.0, |
| step=0.05, |
| label="Velocidade", |
| ) |
| generate_button = gr.Button("Gerar audio", variant="primary") |
|
|
| audio_output = gr.Audio( |
| label="Saida", |
| type="filepath", |
| format="wav", |
| ) |
| details_output = gr.Markdown( |
| value="Pronto para receber chamadas via navegador ou direto pela API do Gradio." |
| ) |
|
|
| gr.Examples( |
| examples=EXAMPLES, |
| inputs=[text_input, voice_input, speed_input], |
| label="Exemplos rapidos", |
| ) |
|
|
| generate_button.click( |
| fn=synthesize, |
| inputs=[text_input, voice_input, speed_input], |
| outputs=[audio_output, details_output], |
| api_name="synthesize", |
| ) |
|
|
| demo.queue(default_concurrency_limit=1, max_size=16) |
|
|
| if __name__ == "__main__": |
| demo.launch( |
| theme=gr.themes.Soft( |
| primary_hue="amber", |
| secondary_hue="teal", |
| neutral_hue="slate", |
| ), |
| css=CSS, |
| footer_links=["api", "gradio", "settings"], |
| ) |
|
|