#include #include #include #include // std::fixed, std::setprecision #include // std::ifstream #include // std::string // 自定义头文件 #include "wav.h" // 包含 wav::WavReader 定义 #include "time_stamp.h" // 包含 timestamp_t 定义 #include "vad_iterator.h" // 包含 VadIterator 类声明 int main(int argc, char* argv[]) { if (argc < 3) { std::cerr << "Usage: " << argv[0] << " " << " " << std::endl; return 1; } // 获取命令行传入的模型路径和音频列表文件路径 std::string model_path = argv[1]; std::string audio_list_path = argv[2]; // 打开 audio_list.txt 文件 std::ifstream audio_list_file(audio_list_path); if (!audio_list_file.is_open()) { std::cerr << "Error: Unable to open audio list file: " << audio_list_path << std::endl; return 1; } // 初始化 VadIterator VadIterator vad(model_path); // 逐行读取音频文件路径并处理 std::string wav_path; while (std::getline(audio_list_file, wav_path)) { if (wav_path.empty()) { continue; // 跳过空行 } std::cout << wav_path << std::endl; try { // 读取 WAV 文件 (expects 16000 Hz, mono, PCM) wav::WavReader wav_reader(wav_path); int numSamples = wav_reader.num_samples(); std::vector input_wav(static_cast(numSamples)); for (size_t i = 0; i < static_cast(numSamples); i++) { input_wav[i] = static_cast(*(wav_reader.data() + i)); } // 处理音频 vad.process(input_wav); // 获取语音时间戳 (以样本为单位) std::vector stamps = vad.get_speech_timestamps(); // 将时间戳转换为秒并输出 const float sample_rate_float = 16000.0f; for (size_t i = 0; i < stamps.size(); i++) { float start_sec = std::rint((stamps[i].start / sample_rate_float) * 10.0f) / 10.0f; float end_sec = std::rint((stamps[i].end / sample_rate_float) * 10.0f) / 10.0f; // std::cout << "Speech detected from " // << std::fixed << std::setprecision(1) << start_sec // << " s to " // << std::fixed << std::setprecision(1) << end_sec // << " s" // << " [ " << stamps[i].start << " " << stamps[i].end << " ]" // << std::endl; std::cout << stamps[i].start << ", " << stamps[i].end << std::endl; } // 重置内部状态 vad.reset(); } catch (const std::exception& e) { std::cerr << "Error processing file " << wav_path << ": " << e.what() << std::endl; } } audio_list_file.close(); return 0; }