DangoMachoo commited on
Commit
6f2d374
·
1 Parent(s): ac59c9e

add main code

Browse files
Files changed (3) hide show
  1. .gitignore +21 -0
  2. app.py +48 -0
  3. requirements.txt +49 -0
.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #virtual environment
2
+ venv/
3
+
4
+ # Gradio
5
+ flagged/
6
+ gradio_cached_examples/
7
+
8
+ # Model weights
9
+ *.pt
10
+ *.ckpt
11
+ *.pth
12
+
13
+ # Media outputs
14
+ *.png
15
+ *.jpg
16
+ *.mp4
17
+ *.mp3
18
+
19
+ #code tests
20
+ app1.py
21
+ app2.py
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+ import os
5
+
6
+ # ✅ ใช้ path ffmpeg ชั่วคราวเฉพาะใน local
7
+ #os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg-master-latest-win64-gpl\bin"
8
+
9
+ # ✅ โมเดลที่ใช้
10
+ MODEL_NAME = "biodatlab/whisper-th-small-combined"
11
+ lang = "th"
12
+ device = 0 if torch.cuda.is_available() else "cpu"
13
+
14
+ pipe = pipeline(
15
+ task="automatic-speech-recognition",
16
+ model=MODEL_NAME,
17
+ chunk_length_s=30,
18
+ device=device,
19
+ )
20
+
21
+ # ✅ ฟังก์ชันแปลงเสียงเป็นข้อความ
22
+ def transcribe_audio(audio):
23
+ result = pipe(audio, generate_kwargs={"language": "<|th|>", "task": "transcribe"}, batch_size=16)
24
+ text = result["text"]
25
+ return text, text # ส่งทั้งแสดงบนหน้าจอ และโหลดเป็น .txt
26
+
27
+ # ✅ UI ด้วย Gradio
28
+ with gr.Blocks() as demo:
29
+ gr.Markdown("## 🎤 แปลงเสียงพูดภาษาไทยเป็นข้อความ")
30
+ with gr.Row():
31
+ audio_input = gr.Audio(label="อัปโหลดไฟล์เสียง (MP3)", type="filepath")
32
+ with gr.Row():
33
+ transcribed_text = gr.Textbox(label="📜 ข้อความที่แปลงแล้ว", lines=10)
34
+ with gr.Row():
35
+ copy_button = gr.Button("📋 Copy")
36
+ download_button = gr.File(label="⬇️ ดาวน์โหลด .txt")
37
+
38
+ # ปุ่มเริ่มแปลง
39
+ transcribe_btn = gr.Button("🔄 แปลงเสียงเป็นข้อความ")
40
+
41
+ # เมื่อกดปุ่ม
42
+ transcribe_btn.click(
43
+ fn=transcribe_audio,
44
+ inputs=audio_input,
45
+ outputs=[transcribed_text, download_button]
46
+ )
47
+
48
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audioop-lts==0.2.1
2
+ audioread==3.0.1
3
+ certifi==2025.1.31
4
+ cffi==1.17.1
5
+ charset-normalizer==3.4.1
6
+ colorama==0.4.6
7
+ decorator==5.2.1
8
+ ffmpeg-python==0.2.0
9
+ filelock==3.18.0
10
+ fsspec==2025.3.2
11
+ future==1.0.0
12
+ huggingface-hub==0.30.2
13
+ idna==3.10
14
+ Jinja2==3.1.6
15
+ joblib==1.4.2
16
+ lazy_loader==0.4
17
+ librosa==0.11.0
18
+ llvmlite==0.44.0
19
+ MarkupSafe==3.0.2
20
+ mpmath==1.3.0
21
+ msgpack==1.1.0
22
+ networkx==3.4.2
23
+ numba==0.61.2
24
+ numpy==2.2.5
25
+ packaging==25.0
26
+ platformdirs==4.3.7
27
+ pooch==1.8.2
28
+ pycparser==2.22
29
+ PyYAML==6.0.2
30
+ regex==2024.11.6
31
+ requests==2.32.3
32
+ safetensors==0.5.3
33
+ scikit-learn==1.6.1
34
+ scipy==1.15.2
35
+ setuptools==79.0.0
36
+ soundfile==0.13.1
37
+ soxr==0.5.0.post1
38
+ standard-aifc==3.13.0
39
+ standard-chunk==3.13.0
40
+ standard-sunau==3.13.0
41
+ sympy==1.13.1
42
+ threadpoolctl==3.6.0
43
+ tokenizers==0.21.1
44
+ torch==2.6.0
45
+ torchaudio==2.6.0
46
+ tqdm==4.67.1
47
+ transformers @ git+https://github.com/huggingface/transformers@ca790303f7480e0211e5b58b9e22706f46dbf19a
48
+ typing_extensions==4.13.2
49
+ urllib3==2.4.0