vad_cpp / python /processing.py
hzeng412's picture
Duplicate from MoYoYoTech/vad_cpp
d21d362
import os
import sys
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
# sys.path.append("/Users/chenxiang/translator/Translator/llama-cpp-python/llama_cpp")
from .pipelines import MetaItem, VadPipe
class ProcessingPipes:
def __init__(self) -> None:
self._process = []
# vad
self._vad_pipe = self._launch_process(VadPipe())
def _launch_process(self, process_obj):
process_obj.daemon = True
process_obj.start()
self._process.append(process_obj)
return process_obj
def wait_ready(self):
for p in self._process:
p.wait()
def voice_detect(self, audio_buffer: bytes) -> MetaItem:
item = MetaItem(source_audio=audio_buffer)
self._vad_pipe.input_queue.put(item)
return self._vad_pipe.output_queue.get()
if __name__ == "__main__":
import soundfile
import numpy as np
wav_path1 = "/Users/chenxiang/translator/core/vad_cpp/bin/Chinese-liyongle-part1.mp3"
wav_path2 = "/Users/chenxiang/translator/core/whisper_wrapper/bin/zh.wav"
tp = ProcessingPipes()
audio, sr, = soundfile.read(wav_path2)
# 确保是单声道
if len(audio.shape) > 1:
print("不是单声道")
audio = audio.mean(axis=1)
# 重采样到 16kHz(如果需要)
if sr != 16000:
print("采样率不是 16000, 重新采样到 16kHz(如果需要)")
import resampy
audio = resampy.resample(audio, sr, 16000)
# 转换为 float32
print(f"original audio data type = {audio.dtype}")
audio = audio.astype(np.float32)
print(f"original audio data size = {audio.shape}")
result = tp.voice_detect(audio)
# print(f"{result.speech_status} {result.segments} {result.segments}")
print("********** END *************")