File size: 2,250 Bytes
d21d362 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | #include <iostream>
#include <vector>
#include <cmath>
#include <iomanip> // std::fixed, std::setprecision
// 自定义头文件
#include "wav.h" // 包含 wav::WavReader 定义
#include "time_stamp.h" // 包含 timestamp_t 定义
#include "vad_iterator.h" // 包含 VadIterator 类声明
int main(int argc, char* argv[]) {
if (argc < 3) {
std::cerr << "Usage: " << argv[0] << " <model_absolute_path>" << " <audio_file_absolute_path>" << std::endl;
return 1;
}
// 获取命令行传入的音频文件路径
std::string model_path = argv[1];
std::string wav_path = argv[2];
// std::string model_path = "/Users/chenxiang/translator/Translator/moyoyo_asr_models/silero-vad/silero_vad.onnx";
// std::string wav_path = "/Users/chenxiang/translator/core/whisper_wrapper/bin/zh.wav";
// Read the WAV file (expects 16000 Hz, mono, PCM).
wav::WavReader wav_reader(wav_path); // File located in the "audio" folder.
int numSamples = wav_reader.num_samples();
std::vector<float> input_wav(static_cast<size_t>(numSamples));
for (size_t i = 0; i < static_cast<size_t>(numSamples); i++) {
input_wav[i] = static_cast<float>(*(wav_reader.data() + i));
}
// Initialize the VadIterator.
VadIterator vad(model_path);
// Process the audio.
vad.process(input_wav);
// Retrieve the speech timestamps (in samples).
std::vector<timestamp_t> stamps = vad.get_speech_timestamps();
// Convert timestamps to seconds and round to one decimal place (for 16000 Hz).
const float sample_rate_float = 16000.0f;
for (size_t i = 0; i < stamps.size(); i++) {
float start_sec = std::rint((stamps[i].start / sample_rate_float) * 10.0f) / 10.0f;
float end_sec = std::rint((stamps[i].end / sample_rate_float) * 10.0f) / 10.0f;
std::cout << "Speech detected from "
<< std::fixed << std::setprecision(1) << start_sec
<< " s to "
<< std::fixed << std::setprecision(1) << end_sec
<< " s"
<< " [ " << stamps[i].start << " " << stamps[i].end <<" ]"
<< std::endl;
}
// Optionally, reset the internal state.
vad.reset();
return 0;
} |