File size: 1,879 Bytes

d21d362

import os
import sys
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
# sys.path.append("/Users/chenxiang/translator/Translator/llama-cpp-python/llama_cpp")

from .pipelines import MetaItem, VadPipe

class ProcessingPipes:
    def __init__(self) -> None:

        self._process = []
        # vad 
        self._vad_pipe = self._launch_process(VadPipe())

    def _launch_process(self, process_obj):
        process_obj.daemon = True
        process_obj.start()
        self._process.append(process_obj)
        return process_obj

    def wait_ready(self):
        for p in self._process:
            p.wait()

    def voice_detect(self, audio_buffer: bytes) -> MetaItem:
        item = MetaItem(source_audio=audio_buffer)
        self._vad_pipe.input_queue.put(item)
        return self._vad_pipe.output_queue.get()


if __name__ == "__main__":
    import soundfile
    import numpy as np

    wav_path1 = "/Users/chenxiang/translator/core/vad_cpp/bin/Chinese-liyongle-part1.mp3"
    wav_path2 = "/Users/chenxiang/translator/core/whisper_wrapper/bin/zh.wav"

    tp = ProcessingPipes()
    audio, sr, = soundfile.read(wav_path2)

    # 确保是单声道
    if len(audio.shape) > 1:
        print("不是单声道")
        audio = audio.mean(axis=1)

    # 重采样到 16kHz（如果需要）
    if sr != 16000:
        print("采样率不是 16000, 重新采样到 16kHz（如果需要）")
        import resampy
        audio = resampy.resample(audio, sr, 16000)

    # 转换为 float32
    print(f"original audio data type = {audio.dtype}")
    audio = audio.astype(np.float32)

    print(f"original audio data size = {audio.shape}")

    result = tp.voice_detect(audio)
    # print(f"{result.speech_status}  {result.segments}  {result.segments}")
    print("********** END *************")