NeyDev commited on
Commit
152aeb3
verified
1 Parent(s): 06a41c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -20
app.py CHANGED
@@ -1,30 +1,47 @@
 
 
1
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
2
  import torch
3
  import librosa
4
- import gradio as gr
 
 
5
 
6
- # Cargamos el modelo de guaran铆
 
 
7
  model_name = "ivangtorre/wav2vec2-xlsr-300m-guarani"
8
  processor = Wav2Vec2Processor.from_pretrained(model_name)
9
  model = Wav2Vec2ForCTC.from_pretrained(model_name)
10
 
11
- # Transcripci贸n
12
- def transcribir(audio):
13
- audio_data, _ = librosa.load(audio, sr=16000)
14
- inputs = processor(audio_data, sampling_rate=16000, return_tensors="pt", padding=True)
15
- with torch.no_grad():
16
- logits = model(**inputs).logits
17
- predicted_ids = torch.argmax(logits, dim=-1)
18
- transcription = processor.batch_decode(predicted_ids)[0]
19
- return transcription.lower()
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Interfaz de Gradio
22
- demo = gr.Interface(
23
- fn=transcribir,
24
- inputs=gr.Audio(type="filepath"),
25
- outputs="text",
26
- title="Transcriptor Guaran铆",
27
- description="Sub铆 un audio en guaran铆 (.ogg, .wav) y obten茅 la transcripci贸n"
28
- )
29
 
30
- demo.launch()
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import JSONResponse
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  import torch
5
  import librosa
6
+ import tempfile
7
+ import base64
8
+ import requests
9
 
10
+ app = FastAPI()
11
+
12
+ # Cargar modelo Guaran铆
13
  model_name = "ivangtorre/wav2vec2-xlsr-300m-guarani"
14
  processor = Wav2Vec2Processor.from_pretrained(model_name)
15
  model = Wav2Vec2ForCTC.from_pretrained(model_name)
16
 
17
+ @app.post("/transcribe")
18
+ async def transcribe_audio(request: Request):
19
+ try:
20
+ data = await request.json()
21
+ audio_path = None
22
+
23
+ if "base64" in data:
24
+ audio_bytes = base64.b64decode(data["base64"])
25
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
26
+ tmp.write(audio_bytes)
27
+ audio_path = tmp.name
28
+
29
+ elif "url" in data:
30
+ response = requests.get(data["url"])
31
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
32
+ tmp.write(response.content)
33
+ audio_path = tmp.name
34
+ else:
35
+ return JSONResponse({"error": "Deb茅s enviar 'base64' o 'url'"}, status_code=400)
36
 
37
+ # Transcripci贸n
38
+ audio_data, _ = librosa.load(audio_path, sr=16000)
39
+ inputs = processor(audio_data, sampling_rate=16000, return_tensors="pt", padding=True)
40
+ with torch.no_grad():
41
+ logits = model(**inputs).logits
42
+ predicted_ids = torch.argmax(logits, dim=-1)
43
+ transcription = processor.batch_decode(predicted_ids)[0]
 
44
 
45
+ return JSONResponse({"text": transcription.lower()})
46
+ except Exception as e:
47
+ return JSONResponse({"error": str(e)}, status_code=500)