import os import sys current_dir = os.path.dirname(os.path.abspath(__file__)) parent_dir = os.path.dirname(current_dir) sys.path.append(parent_dir) # sys.path.append("/Users/chenxiang/translator/Translator/llama-cpp-python/llama_cpp") from .pipelines import MetaItem, VadPipe class ProcessingPipes: def __init__(self) -> None: self._process = [] # vad self._vad_pipe = self._launch_process(VadPipe()) def _launch_process(self, process_obj): process_obj.daemon = True process_obj.start() self._process.append(process_obj) return process_obj def wait_ready(self): for p in self._process: p.wait() def voice_detect(self, audio_buffer: bytes) -> MetaItem: item = MetaItem(source_audio=audio_buffer) self._vad_pipe.input_queue.put(item) return self._vad_pipe.output_queue.get() if __name__ == "__main__": import soundfile import numpy as np wav_path1 = "/Users/chenxiang/translator/core/vad_cpp/bin/Chinese-liyongle-part1.mp3" wav_path2 = "/Users/chenxiang/translator/core/whisper_wrapper/bin/zh.wav" tp = ProcessingPipes() audio, sr, = soundfile.read(wav_path2) # 确保是单声道 if len(audio.shape) > 1: print("不是单声道") audio = audio.mean(axis=1) # 重采样到 16kHz(如果需要) if sr != 16000: print("采样率不是 16000, 重新采样到 16kHz(如果需要)") import resampy audio = resampy.resample(audio, sr, 16000) # 转换为 float32 print(f"original audio data type = {audio.dtype}") audio = audio.astype(np.float32) print(f"original audio data size = {audio.shape}") result = tp.voice_detect(audio) # print(f"{result.speech_status} {result.segments} {result.segments}") print("********** END *************")