| import os |
| import sys |
| current_dir = os.path.dirname(os.path.abspath(__file__)) |
| parent_dir = os.path.dirname(current_dir) |
| sys.path.append(parent_dir) |
| |
|
|
| from .pipelines import MetaItem, VadPipe |
|
|
| class ProcessingPipes: |
| def __init__(self) -> None: |
|
|
| self._process = [] |
| |
| self._vad_pipe = self._launch_process(VadPipe()) |
|
|
| def _launch_process(self, process_obj): |
| process_obj.daemon = True |
| process_obj.start() |
| self._process.append(process_obj) |
| return process_obj |
|
|
| def wait_ready(self): |
| for p in self._process: |
| p.wait() |
|
|
| def voice_detect(self, audio_buffer: bytes) -> MetaItem: |
| item = MetaItem(source_audio=audio_buffer) |
| self._vad_pipe.input_queue.put(item) |
| return self._vad_pipe.output_queue.get() |
|
|
|
|
| if __name__ == "__main__": |
| import soundfile |
| import numpy as np |
|
|
| wav_path1 = "/Users/chenxiang/translator/core/vad_cpp/bin/Chinese-liyongle-part1.mp3" |
| wav_path2 = "/Users/chenxiang/translator/core/whisper_wrapper/bin/zh.wav" |
|
|
| tp = ProcessingPipes() |
| audio, sr, = soundfile.read(wav_path2) |
|
|
| |
| if len(audio.shape) > 1: |
| print("不是单声道") |
| audio = audio.mean(axis=1) |
|
|
| |
| if sr != 16000: |
| print("采样率不是 16000, 重新采样到 16kHz(如果需要)") |
| import resampy |
| audio = resampy.resample(audio, sr, 16000) |
|
|
| |
| print(f"original audio data type = {audio.dtype}") |
| audio = audio.astype(np.float32) |
|
|
| print(f"original audio data size = {audio.shape}") |
|
|
| result = tp.voice_detect(audio) |
| |
| print("********** END *************") |
|
|