juliocadenas's picture
Create app.py
23d61f3 verified
raw
history blame contribute delete
720 Bytes
import gradio as gr
import torch, os
from nemo.collections.speechlm2.models import SALM
# Descarga y carga el modelo (1,3 GB → cabe en 16 GB RAM)
MODEL_NAME = "nvidia/canary-qwen-2.5b"
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SALM.from_pretrained(MODEL_NAME).to(device).eval()
def transcribe(audio):
# Gradio ya entrega un .wav de 16 kHz mono
with open(audio, "rb") as f:
hyp = model.transcribe([f.read()], batch_size=1)[0][0]
return hyp
demo = gr.Interface(fn=transcribe,
inputs=gr.Audio(type="filepath", label="Sube o graba audio"),
outputs=gr.Textbox(label="Transcripción"))
demo.launch(server_name="0.0.0.0", server_port=7860)