Spaces:

Madras1
/

VoiceMM

Sleeping

App Files Files Community

VoiceMM / app.py

Madras1

Upload 4 files

1a65865 verified about 1 month ago

raw

history blame contribute delete

4.99 kB

	from __future__ import annotations

	import tempfile
	from functools import lru_cache
	from pathlib import Path

	import gradio as gr
	import numpy as np
	import soundfile as sf
	from kokoro import KPipeline

	SPACE_TITLE = "VoiceMM TTS API"
	SAMPLE_RATE = 24_000
	MAX_CHARS = 450

	VOICE_OPTIONS = {
	"pf_dora": "Dora, feminina e clara",
	"pm_alex": "Alex, masculina e neutra",
	"pm_santa": "Santa, masculina e encorpada",
	}

	EXAMPLES = [
	[
	"Seu produto ficou pronto. Agora ele tem uma voz que passa confianca, ritmo e presenca.",
	"pf_dora",
	1.0,
	],
	[
	"Apresente sua startup em vinte segundos: problema, promessa e chamada para acao.",
	"pm_alex",
	1.05,
	],
	[
	"Bem-vindo ao VoiceMM. Transforme roteiro em audio com uma interface simples e bonita.",
	"pm_santa",
	0.95,
	],
	]

	CSS = """
	.gradio-container {
	background:
	radial-gradient(circle at top left, rgba(237, 180, 93, 0.18), transparent 30%),
	radial-gradient(circle at top right, rgba(33, 181, 168, 0.12), transparent 28%),
	#0f1518;
	}

	.voicelek-shell {
	max-width: 1024px;
	margin: 0 auto;
	}

	.voicelek-kicker {
	letter-spacing: 0.18em;
	text-transform: uppercase;
	color: #efbf74;
	font-size: 0.8rem;
	}
	"""


	@lru_cache(maxsize=8)
	def get_pipeline(lang_code: str) -> KPipeline:
	return KPipeline(lang_code=lang_code)


	def normalize_text(text: str) -> str:
	cleaned = " ".join((text or "").split())
	if not cleaned:
	raise gr.Error("Digite algum texto antes de gerar o audio.")
	if len(cleaned) > MAX_CHARS:
	raise gr.Error(
	f"Use no maximo {MAX_CHARS} caracteres por vez para manter a latencia boa no plano gratis."
	)
	return cleaned


	def synthesize(text: str, voice: str, speed: float) -> tuple[str, str]:
	cleaned = normalize_text(text)
	pipeline = get_pipeline(voice[0])

	chunks: list[np.ndarray] = []
	for _, _, audio in pipeline(cleaned, voice=voice, speed=float(speed)):
	chunks.append(np.asarray(audio, dtype=np.float32))

	if not chunks:
	raise gr.Error("O modelo nao conseguiu gerar audio para esse texto.")

	waveform = np.concatenate(chunks)
	output_dir = Path(tempfile.mkdtemp(prefix="voicelek_"))
	output_path = output_dir / "voicelek-output.wav"
	sf.write(output_path, waveform, SAMPLE_RATE)

	duration_seconds = len(waveform) / SAMPLE_RATE
	details = (
	f"Voz: {VOICE_OPTIONS[voice]} \n"
	f"Velocidade: {speed:.2f}x \n"
	f"Entrada: {len(cleaned)} caracteres \n"
	f"Duracao estimada: {duration_seconds:.1f}s"
	)
	return str(output_path), details


	with gr.Blocks(title=SPACE_TITLE) as demo:
	with gr.Column(elem_classes="voicelek-shell"):
	gr.Markdown(
	"""
	<div class="voicelek-kicker">VoiceMM</div>
	# API de TTS em portugues brasileiro

	Esta Space foi pensada para ser o backend de um frontend estatico no GitHub Pages.
	O endpoint publico principal e `"/synthesize"`.
	""",
	)

	with gr.Row():
	with gr.Column(scale=3):
	text_input = gr.Textbox(
	label="Texto",
	lines=8,
	max_lines=12,
	placeholder="Cole aqui sua copy, roteiro, CTA ou locucao curta.",
	value=EXAMPLES[0][0],
	)
	with gr.Column(scale=2):
	voice_input = gr.Dropdown(
	choices=[(label, key) for key, label in VOICE_OPTIONS.items()],
	value="pf_dora",
	label="Voz",
	)
	speed_input = gr.Slider(
	minimum=0.8,
	maximum=1.25,
	value=1.0,
	step=0.05,
	label="Velocidade",
	)
	generate_button = gr.Button("Gerar audio", variant="primary")

	audio_output = gr.Audio(
	label="Saida",
	type="filepath",
	format="wav",
	)
	details_output = gr.Markdown(
	value="Pronto para receber chamadas via navegador ou direto pela API do Gradio."
	)

	gr.Examples(
	examples=EXAMPLES,
	inputs=[text_input, voice_input, speed_input],
	label="Exemplos rapidos",
	)

	generate_button.click(
	fn=synthesize,
	inputs=[text_input, voice_input, speed_input],
	outputs=[audio_output, details_output],
	api_name="synthesize",
	)

	demo.queue(default_concurrency_limit=1, max_size=16)

	if __name__ == "__main__":
	demo.launch(
	theme=gr.themes.Soft(
	primary_hue="amber",
	secondary_hue="teal",
	neutral_hue="slate",
	),
	css=CSS,
	footer_links=["api", "gradio", "settings"],
	)