| import os |
| import numpy as np |
| from pydub import AudioSegment |
| from scipy.io import wavfile |
|
|
| def trim_wav(input_file, output_file, target_duration_minutes=24, target_duration_seconds=10): |
| """ |
| Memotong file WAV sesuai durasi target |
| """ |
| try: |
| |
| audio = AudioSegment.from_wav(input_file) |
| |
| |
| target_duration_ms = (target_duration_minutes * 60 + target_duration_seconds) * 1000 |
| |
| |
| if len(audio) <= target_duration_ms: |
| print(f"File {input_file} memiliki durasi kurang dari target, diloncati") |
| return None |
| |
| |
| trimmed_audio = audio[:target_duration_ms] |
| |
| |
| trimmed_audio.export(output_file, format="wav") |
| |
| print(f"File berhasil dipotong menjadi {target_duration_minutes} menit {target_duration_seconds} detik") |
| return output_file |
| |
| except Exception as e: |
| print(f"Terjadi kesalahan saat memotong file: {str(e)}") |
| return None |
|
|
| def segment_wav_file(input_file, output_dir, segment_duration=5, overlap_percentage=0.5): |
| """ |
| Memotong file WAV menjadi segmen-segmen dengan overlap |
| """ |
| |
| os.makedirs(output_dir, exist_ok=True) |
| |
| |
| sample_rate, audio_data = wavfile.read(input_file) |
| |
| |
| samples_per_segment = int(segment_duration * sample_rate) |
| samples_overlap = int(samples_per_segment * overlap_percentage) |
| |
| |
| step_size = samples_per_segment - samples_overlap |
| |
| |
| segmented_files = [] |
| for start in range(0, len(audio_data) - samples_per_segment + 1, step_size): |
| end = start + samples_per_segment |
| segment = audio_data[start:end] |
| |
| |
| segment_filename = f"ridho_segment_{start//step_size}.wav" |
| output_path = os.path.join(output_dir, segment_filename) |
| |
| |
| wavfile.write(output_path, sample_rate, segment) |
| segmented_files.append(output_path) |
| |
| print(f"Berhasil membuat {len(segmented_files)} segmen") |
| return segmented_files |
|
|
| def process_audio_pipeline(input_wav_file, output_dir): |
| """ |
| Fungsi utama untuk memproses single file WAV |
| """ |
| try: |
| |
| os.makedirs(output_dir, exist_ok=True) |
| |
| |
| trimmed_wav = os.path.join(output_dir, "trimmed.wav") |
| trimmed_result = trim_wav(input_wav_file, trimmed_wav) |
| if not trimmed_result: |
| return |
| |
| |
| segments_dir = os.path.join(output_dir, "segments") |
| segmented_files = segment_wav_file( |
| trimmed_wav, |
| segments_dir, |
| segment_duration=5, |
| overlap_percentage=0.5 |
| ) |
| |
| print("Proses selesai!") |
| return segmented_files |
| |
| except Exception as e: |
| print(f"Terjadi kesalahan dalam pipeline: {str(e)}") |
| return None |
|
|
| |
| if __name__ == "__main__": |
| |
| input_wav = "full recording.wav" |
| output_dir = "/nama/path/to/target/dataset" |
| |
| process_audio_pipeline(input_wav, output_dir) |