Spaces:
Running
Running
| import gradio as gr | |
| import torch, os | |
| from nemo.collections.speechlm2.models import SALM | |
| # Descarga y carga el modelo (1,3 GB → cabe en 16 GB RAM) | |
| MODEL_NAME = "nvidia/canary-qwen-2.5b" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = SALM.from_pretrained(MODEL_NAME).to(device).eval() | |
| def transcribe(audio): | |
| # Gradio ya entrega un .wav de 16 kHz mono | |
| with open(audio, "rb") as f: | |
| hyp = model.transcribe([f.read()], batch_size=1)[0][0] | |
| return hyp | |
| demo = gr.Interface(fn=transcribe, | |
| inputs=gr.Audio(type="filepath", label="Sube o graba audio"), | |
| outputs=gr.Textbox(label="Transcripción")) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |