Spaces:
Runtime error
Runtime error
Commit
·
6f2d374
1
Parent(s):
ac59c9e
add main code
Browse files- .gitignore +21 -0
- app.py +48 -0
- requirements.txt +49 -0
.gitignore
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#virtual environment
|
| 2 |
+
venv/
|
| 3 |
+
|
| 4 |
+
# Gradio
|
| 5 |
+
flagged/
|
| 6 |
+
gradio_cached_examples/
|
| 7 |
+
|
| 8 |
+
# Model weights
|
| 9 |
+
*.pt
|
| 10 |
+
*.ckpt
|
| 11 |
+
*.pth
|
| 12 |
+
|
| 13 |
+
# Media outputs
|
| 14 |
+
*.png
|
| 15 |
+
*.jpg
|
| 16 |
+
*.mp4
|
| 17 |
+
*.mp3
|
| 18 |
+
|
| 19 |
+
#code tests
|
| 20 |
+
app1.py
|
| 21 |
+
app2.py
|
app.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import pipeline
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# ✅ ใช้ path ffmpeg ชั่วคราวเฉพาะใน local
|
| 7 |
+
#os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg-master-latest-win64-gpl\bin"
|
| 8 |
+
|
| 9 |
+
# ✅ โมเดลที่ใช้
|
| 10 |
+
MODEL_NAME = "biodatlab/whisper-th-small-combined"
|
| 11 |
+
lang = "th"
|
| 12 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
| 13 |
+
|
| 14 |
+
pipe = pipeline(
|
| 15 |
+
task="automatic-speech-recognition",
|
| 16 |
+
model=MODEL_NAME,
|
| 17 |
+
chunk_length_s=30,
|
| 18 |
+
device=device,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# ✅ ฟังก์ชันแปลงเสียงเป็นข้อความ
|
| 22 |
+
def transcribe_audio(audio):
|
| 23 |
+
result = pipe(audio, generate_kwargs={"language": "<|th|>", "task": "transcribe"}, batch_size=16)
|
| 24 |
+
text = result["text"]
|
| 25 |
+
return text, text # ส่งทั้งแสดงบนหน้าจอ และโหลดเป็น .txt
|
| 26 |
+
|
| 27 |
+
# ✅ UI ด้วย Gradio
|
| 28 |
+
with gr.Blocks() as demo:
|
| 29 |
+
gr.Markdown("## 🎤 แปลงเสียงพูดภาษาไทยเป็นข้อความ")
|
| 30 |
+
with gr.Row():
|
| 31 |
+
audio_input = gr.Audio(label="อัปโหลดไฟล์เสียง (MP3)", type="filepath")
|
| 32 |
+
with gr.Row():
|
| 33 |
+
transcribed_text = gr.Textbox(label="📜 ข้อความที่แปลงแล้ว", lines=10)
|
| 34 |
+
with gr.Row():
|
| 35 |
+
copy_button = gr.Button("📋 Copy")
|
| 36 |
+
download_button = gr.File(label="⬇️ ดาวน์โหลด .txt")
|
| 37 |
+
|
| 38 |
+
# ปุ่มเริ่มแปลง
|
| 39 |
+
transcribe_btn = gr.Button("🔄 แปลงเสียงเป็นข้อความ")
|
| 40 |
+
|
| 41 |
+
# เมื่อกดปุ่ม
|
| 42 |
+
transcribe_btn.click(
|
| 43 |
+
fn=transcribe_audio,
|
| 44 |
+
inputs=audio_input,
|
| 45 |
+
outputs=[transcribed_text, download_button]
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
audioop-lts==0.2.1
|
| 2 |
+
audioread==3.0.1
|
| 3 |
+
certifi==2025.1.31
|
| 4 |
+
cffi==1.17.1
|
| 5 |
+
charset-normalizer==3.4.1
|
| 6 |
+
colorama==0.4.6
|
| 7 |
+
decorator==5.2.1
|
| 8 |
+
ffmpeg-python==0.2.0
|
| 9 |
+
filelock==3.18.0
|
| 10 |
+
fsspec==2025.3.2
|
| 11 |
+
future==1.0.0
|
| 12 |
+
huggingface-hub==0.30.2
|
| 13 |
+
idna==3.10
|
| 14 |
+
Jinja2==3.1.6
|
| 15 |
+
joblib==1.4.2
|
| 16 |
+
lazy_loader==0.4
|
| 17 |
+
librosa==0.11.0
|
| 18 |
+
llvmlite==0.44.0
|
| 19 |
+
MarkupSafe==3.0.2
|
| 20 |
+
mpmath==1.3.0
|
| 21 |
+
msgpack==1.1.0
|
| 22 |
+
networkx==3.4.2
|
| 23 |
+
numba==0.61.2
|
| 24 |
+
numpy==2.2.5
|
| 25 |
+
packaging==25.0
|
| 26 |
+
platformdirs==4.3.7
|
| 27 |
+
pooch==1.8.2
|
| 28 |
+
pycparser==2.22
|
| 29 |
+
PyYAML==6.0.2
|
| 30 |
+
regex==2024.11.6
|
| 31 |
+
requests==2.32.3
|
| 32 |
+
safetensors==0.5.3
|
| 33 |
+
scikit-learn==1.6.1
|
| 34 |
+
scipy==1.15.2
|
| 35 |
+
setuptools==79.0.0
|
| 36 |
+
soundfile==0.13.1
|
| 37 |
+
soxr==0.5.0.post1
|
| 38 |
+
standard-aifc==3.13.0
|
| 39 |
+
standard-chunk==3.13.0
|
| 40 |
+
standard-sunau==3.13.0
|
| 41 |
+
sympy==1.13.1
|
| 42 |
+
threadpoolctl==3.6.0
|
| 43 |
+
tokenizers==0.21.1
|
| 44 |
+
torch==2.6.0
|
| 45 |
+
torchaudio==2.6.0
|
| 46 |
+
tqdm==4.67.1
|
| 47 |
+
transformers @ git+https://github.com/huggingface/transformers@ca790303f7480e0211e5b58b9e22706f46dbf19a
|
| 48 |
+
typing_extensions==4.13.2
|
| 49 |
+
urllib3==2.4.0
|