Spaces:

DangoMachoo
/

Bot-and-life_speech-to-Text

Runtime error

File size: 4,899 Bytes

import gradio as gr
import torch
from transformers import pipeline
import os
import tempfile
import shutil
from docx import Document
import time

# ✅ ตรวจสอบ ffmpeg
if not shutil.which("ffmpeg"):
    raise EnvironmentError("ffmpeg not found. Please install ffmpeg and ensure it's in PATH.")

# ✅ ลบ path ffmpeg เฉพาะ local เพราะ Spaces มี ffmpeg ติดตั้งแล้ว
# os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg-master-latest-win64-gpl\bin"

# ✅ โหลดโมเดล small
MODEL_NAME = "biodatlab/whisper-th-small-combined"
device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

# ✅ ฟังก์ชันแปลงเสียงเป็นข้อความ (return text และ processing time)
def transcribe_audio(audio):
    start_time = time.time()  # บันทึกเวลาเริ่มต้น
    if not audio:
        return "กรุณาอัปโหลดไฟล์เสียงก่อน", "ไม่ได้ประมวลผล"
    try:
        result = pipe(audio, generate_kwargs={"language": "<|th|>", "task": "transcribe"}, batch_size=14)
        text = result["text"]
        end_time = time.time()  # บันทึกเวลาสิ้นสุด
        processing_time = end_time - start_time
        return text, f"ใช้เวลา: {processing_time:.2f} วินาที"
    except Exception as e:
        return f"เกิดข้อผิดพลาด: {str(e)}", "เกิดข้อผิดพลาด"

# ✅ ฟังก์ชันสร้างไฟล์สำหรับดาวน์โหลด (.txt หรือ .docx)
def create_download_file(text, file_format):
    if not text or text.startswith("กรุณา") or text.startswith("เกิดข้อผิดพลาด"):
        return None
    try:
        if file_format == "Text (.txt)":
            with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as f:
                f.write(text)
                return f.name
        else:  # Word (.docx)
            doc = Document()
            doc.add_paragraph(text)
            with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as f:
                doc.save(f.name)
                return f.name
    except Exception as e:
        return None

# ✅ CSS สำหรับจัด Markdown ตรงกลางและทำให้ Textbox มีแถบเลื่อน
custom_css = """
.markdown {
    text-align: center !important;
}
#transcribed-text textarea {
    height: 250px !important;
    overflow-y: auto !important;
    resize: vertical !important;
}
"""

# ✅ UI Layout
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("""
        <div style="text-align: center;">
            <h2> แปลงเสียงพูดภาษาไทยเป็นข้อความ </h2>
        </div>
    """)
    with gr.Row():
        with gr.Column(scale=1):
            audio_input = gr.Audio(label="🎵 อัปโหลดไฟล์เสียง (MP3, WAV, M4A)", type="filepath")
            download_format = gr.Dropdown(
                choices=["Text (.txt)", "Word (.docx)"],
                label="📄 เลือกฟอร์แมตไฟล์",
                value="Text (.txt)"
            )
            transcribe_btn = gr.Button("🔄 แปลงเสียงเป็นข้อความ")
        with gr.Column(scale=2):
            transcribed_text = gr.Textbox(label="📜 ข้อความที่แปลงแล้ว", elem_id="transcribed-text")
            processing_time_display = gr.Textbox(label="⏱️ เวลาที่ใช้", interactive=False)
            with gr.Row():
                copy_button = gr.Button("📋 คัดลอกข้อความ")
                download_button = gr.DownloadButton(label="⬇️ ดาวน์โหลดไฟล์")

    # Action
    transcribe_btn.click(
        fn=transcribe_audio,
        inputs=audio_input,
        outputs=[transcribed_text, processing_time_display],
        show_progress=True
    )

    # Action คัดลอก (ใช้ JavaScript)
    copy_button.click(
        fn=None,
        inputs=transcribed_text,
        outputs=None,
        js="function(text) {navigator.clipboard.writeText(text); gr.Info('คัดลอกข้อความแล้ว!'); return []}"
    )

    # Action ดาวน์โหลด
    download_button.click(
        fn=create_download_file,
        inputs=[transcribed_text, download_format],
        outputs=download_button
    )

# รันใน Hugging Face Spaces
demo.launch()