Spaces:

DangoMachoo
/

Bot-and-life_speech-to-Text

Runtime error

App Files Files Community

DangoMachoo commited on Apr 28

Commit

a4f2967

1 Parent(s): 6430135

update main

Browse files

Files changed (1) hide show

app.py +61 -21

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ from transformers import pipeline
 import os
 import tempfile
 import shutil
 # ✅ ตรวจสอบ ffmpeg
 if not shutil.which("ffmpeg"):
@@ -12,7 +14,7 @@ if not shutil.which("ffmpeg"):
 # ✅ ลบ path ffmpeg เฉพาะ local เพราะ Spaces มี ffmpeg ติดตั้งแล้ว
 # os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg-master-latest-win64-gpl\bin"
-# ✅ โหลดโมเดล
 MODEL_NAME = "biodatlab/whisper-th-small-combined"
 device = 0 if torch.cuda.is_available() else "cpu"
@@ -23,44 +25,78 @@ pipe = pipeline(
     device=device,
 )
-# ✅ ฟังก์ชันแปลงเสียงเป็นข้อความ และเซฟไฟล์
 def transcribe_audio(audio):
     if not audio:
-        return "กรุณาอัปโหลดไฟล์เสียงก่อน", None
     try:
-        result = pipe(audio, generate_kwargs={"language": "<|th|>", "task": "transcribe"}, batch_size=16)
         text = result["text"]
-        with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as f:
-            f.write(text)
-            filepath = f.name
-        return text, filepath
     except Exception as e:
-        return f"เกิดข้อผิดพลาด: {str(e)}", None
-# ✅ ฟังก์ชันสำหรับดาวน์โหลด
-def get_download_filepath(filepath):
-    if filepath and os.path.exists(filepath):
-        return filepath
-    return None
 # ✅ UI Layout
-with gr.Blocks() as demo:
-    gr.Markdown("## 🎤 แปลงเสียงพูดภาษาไทยเป็นข้อความ")
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(label="🎵 อัปโหลดไฟล์เสียง (MP3, WAV, M4A)", type="filepath")
             transcribe_btn = gr.Button("🔄 แปลงเสียงเป็นข้อความ")
         with gr.Column(scale=2):
-            transcribed_text = gr.Textbox(label="📜 ข้อความที่แปลงแล้ว", lines=11)
             with gr.Row():
                 copy_button = gr.Button("📋 คัดลอกข้อความ")
-                download_button = gr.DownloadButton(label="⬇️ ดาวน์โหลด .txt")
     # Action
     transcribe_btn.click(
         fn=transcribe_audio,
         inputs=audio_input,
-        outputs=[transcribed_text, download_button],
         show_progress=True
     )
@@ -73,7 +109,11 @@ with gr.Blocks() as demo:
     )
     # Action ดาวน์โหลด
-    download_button.click(fn=get_download_filepath, inputs=download_button, outputs=download_button)
-# รันใน Hugging Face Spaces ด้วย auth
 demo.launch()

 import os
 import tempfile
 import shutil
+from docx import Document
+import time
 # ✅ ตรวจสอบ ffmpeg
 if not shutil.which("ffmpeg"):
 # ✅ ลบ path ffmpeg เฉพาะ local เพราะ Spaces มี ffmpeg ติดตั้งแล้ว
 # os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg-master-latest-win64-gpl\bin"
+# ✅ โหลดโมเดล small
 MODEL_NAME = "biodatlab/whisper-th-small-combined"
 device = 0 if torch.cuda.is_available() else "cpu"
     device=device,
 )
+# ✅ ฟังก์ชันแปลงเสียงเป็นข้อความ (return text และ processing time)
 def transcribe_audio(audio):
+    start_time = time.time()  # บันทึกเวลาเริ่มต้น
     if not audio:
+        return "กรุณาอัปโหลดไฟล์เสียงก่อน", "ไม่ได้ประมวลผล"
     try:
+        result = pipe(audio, generate_kwargs={"language": "<|th|>", "task": "transcribe"}, batch_size=14)
         text = result["text"]
+        end_time = time.time()  # บันทึกเวลาสิ้นสุด
+        processing_time = end_time - start_time
+        return text, f"ใช้เวลา: {processing_time:.2f} วินาที"
     except Exception as e:
+        return f"เกิดข้อผิดพลาด: {str(e)}", "เกิดข้อผิดพลาด"
+# ✅ ฟังก์ชันสร้างไฟล์สำหรับดาวน์โหลด (.txt หรือ .docx)
+def create_download_file(text, file_format):
+    if not text or text.startswith("กรุณา") or text.startswith("เกิดข้อผิดพลาด"):
+        return None
+    try:
+        if file_format == "Text (.txt)":
+            with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as f:
+                f.write(text)
+                return f.name
+        else:  # Word (.docx)
+            doc = Document()
+            doc.add_paragraph(text)
+            with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as f:
+                doc.save(f.name)
+                return f.name
+    except Exception as e:
+        return None
+# ✅ CSS สำหรับจัด Markdown ตรงกลางและทำให้ Textbox มีแถบเลื่อน
+custom_css = """
+.markdown {
+    text-align: center !important;
+}
+#transcribed-text textarea {
+    height: 250px !important;
+    overflow-y: auto !important;
+    resize: vertical !important;
+}
+"""
 # ✅ UI Layout
+with gr.Blocks(css=custom_css) as demo:
+    gr.Markdown("""
+        <div style="text-align: center;">
+            <h2> แปลงเสียงพูดภาษาไทยเป็นข้อความ </h2>
+        </div>
+    """)
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(label="🎵 อัปโหลดไฟล์เสียง (MP3, WAV, M4A)", type="filepath")
+            download_format = gr.Dropdown(
+                choices=["Text (.txt)", "Word (.docx)"],
+                label="📄 เลือกฟอร์แมตไฟล์",
+                value="Text (.txt)"
+            )
             transcribe_btn = gr.Button("🔄 แปลงเสียงเป็นข้อความ")
         with gr.Column(scale=2):
+            transcribed_text = gr.Textbox(label="📜 ข้อความที่แปลงแล้ว", elem_id="transcribed-text")
+            processing_time_display = gr.Textbox(label="⏱️ เวลาที่ใช้", interactive=False)
             with gr.Row():
                 copy_button = gr.Button("📋 คัดลอกข้อความ")
+                download_button = gr.DownloadButton(label="⬇️ ดาวน์โหลดไฟล์")
     # Action
     transcribe_btn.click(
         fn=transcribe_audio,
         inputs=audio_input,
+        outputs=[transcribed_text, processing_time_display],
         show_progress=True
     )
     )
     # Action ดาวน์โหลด
+    download_button.click(
+        fn=create_download_file,
+        inputs=[transcribed_text, download_format],
+        outputs=download_button
+    )
+# รันใน Hugging Face Spaces
 demo.launch()