| | import sqlite3 |
| | import time |
| | import os |
| | import subprocess |
| | import json |
| | import shlex |
| | from datetime import datetime |
| |
|
| | CWD = "./" |
| | PYTHON_PATH = "stt-transcribe" |
| | STT_MODEL_NAME = "fasterwhispher" |
| | POLL_INTERVAL = 3 |
| |
|
| | def process_audio(file_id, filepath): |
| | """Process audio file using STT and return the transcription""" |
| | try: |
| | print(f"π Running STT on: {os.path.abspath(filepath)}") |
| | |
| | |
| | command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}""" |
| | |
| | subprocess.run( |
| | command, |
| | shell=True, |
| | executable="/bin/bash", |
| | check=True, |
| | cwd=CWD, |
| | env={ |
| | **os.environ, |
| | 'PYTHONUNBUFFERED': '1', |
| | 'CUDA_LAUNCH_BLOCKING': '1', |
| | 'USE_CPU_IF_POSSIBLE': 'true' |
| | } |
| | ) |
| | |
| | |
| | output_path = f'{CWD}/temp_dir/output_transcription.json' |
| | with open(output_path, 'r') as file: |
| | result = json.loads(file.read().strip()) |
| | |
| | |
| | caption = result.get('text', '') or result.get('transcription', '') or str(result) |
| | |
| | return caption, None |
| | |
| | except Exception as e: |
| | print(f"β Error processing file {file_id}: {str(e)}") |
| | return None, str(e) |
| |
|
| | def update_status(file_id, status, caption=None, error=None): |
| | """Update the status of a file in the database""" |
| | conn = sqlite3.connect('audio_captions.db') |
| | c = conn.cursor() |
| | |
| | if status == 'completed': |
| | c.execute('''UPDATE audio_files |
| | SET status = ?, caption = ?, processed_at = ? |
| | WHERE id = ?''', |
| | (status, caption, datetime.now().isoformat(), file_id)) |
| | elif status == 'failed': |
| | c.execute('''UPDATE audio_files |
| | SET status = ?, caption = ?, processed_at = ? |
| | WHERE id = ?''', |
| | (status, f"Error: {error}", datetime.now().isoformat(), file_id)) |
| | else: |
| | c.execute('UPDATE audio_files SET status = ? WHERE id = ?', (status, file_id)) |
| | |
| | conn.commit() |
| | conn.close() |
| |
|
| | def worker_loop(): |
| | """Main worker loop that processes audio files""" |
| | print("π€ STT Worker started. Monitoring for new audio files...") |
| | print("ποΈ Audio files will be deleted after successful processing\n") |
| | |
| | while True: |
| | try: |
| | |
| | conn = sqlite3.connect('audio_captions.db') |
| | conn.row_factory = sqlite3.Row |
| | c = conn.cursor() |
| | c.execute('''SELECT * FROM audio_files |
| | WHERE status = 'not_started' |
| | ORDER BY created_at ASC |
| | LIMIT 1''') |
| | row = c.fetchone() |
| | conn.close() |
| | |
| | if row: |
| | file_id = row['id'] |
| | filepath = row['filepath'] |
| | filename = row['filename'] |
| | |
| | print(f"\n{'='*60}") |
| | print(f"π΅ Processing: {filename}") |
| | print(f"π ID: {file_id}") |
| | print(f"{'='*60}") |
| | |
| | |
| | update_status(file_id, 'processing') |
| | |
| | |
| | caption, error = process_audio(file_id, filepath) |
| | |
| | if caption: |
| | print(f"β
Successfully processed: {filename}") |
| | print(f"π Caption preview: {caption[:100]}...") |
| | update_status(file_id, 'completed', caption=caption) |
| | |
| | |
| | if os.path.exists(filepath): |
| | os.remove(filepath) |
| | print(f"ποΈ Deleted audio file: {filepath}") |
| | else: |
| | print(f"β Failed to process: {filename}") |
| | print(f"Error: {error}") |
| | update_status(file_id, 'failed', error=error) |
| | |
| | else: |
| | |
| | time.sleep(POLL_INTERVAL) |
| | |
| | except Exception as e: |
| | print(f"β οΈ Worker error: {str(e)}") |
| | time.sleep(POLL_INTERVAL) |
| |
|
| | if __name__ == '__main__': |
| | |
| | if not os.path.exists('audio_captions.db'): |
| | print("β Database not found. Please run app.py first to initialize.") |
| | else: |
| | print("\n" + "="*60) |
| | print("π Starting STT Worker (Standalone Mode)") |
| | print("="*60) |
| | print("β οΈ Note: Worker is now embedded in app.py") |
| | print("β οΈ This standalone mode is for testing/debugging only") |
| | print("="*60 + "\n") |
| | worker_loop() |