| #include <iostream> |
| #include <vector> |
| #include <cmath> |
| #include <iomanip> |
| #include <fstream> |
| #include <string> |
|
|
| |
| #include "wav.h" |
| #include "time_stamp.h" |
| #include "vad_iterator.h" |
|
|
| int main(int argc, char* argv[]) { |
| if (argc < 3) { |
| std::cerr << "Usage: " << argv[0] << " <model_absolute_path>" << " <audio_list_absolute_path>" << std::endl; |
| return 1; |
| } |
|
|
| |
| std::string model_path = argv[1]; |
| std::string audio_list_path = argv[2]; |
|
|
| |
| std::ifstream audio_list_file(audio_list_path); |
| if (!audio_list_file.is_open()) { |
| std::cerr << "Error: Unable to open audio list file: " << audio_list_path << std::endl; |
| return 1; |
| } |
|
|
| |
| VadIterator vad(model_path); |
|
|
| |
| std::string wav_path; |
| while (std::getline(audio_list_file, wav_path)) { |
| if (wav_path.empty()) { |
| continue; |
| } |
|
|
| std::cout << wav_path << std::endl; |
|
|
| try { |
| |
| wav::WavReader wav_reader(wav_path); |
| int numSamples = wav_reader.num_samples(); |
| std::vector<float> input_wav(static_cast<size_t>(numSamples)); |
| for (size_t i = 0; i < static_cast<size_t>(numSamples); i++) { |
| input_wav[i] = static_cast<float>(*(wav_reader.data() + i)); |
| } |
|
|
| |
| vad.process(input_wav); |
| |
| std::vector<timestamp_t> stamps = vad.get_speech_timestamps(); |
|
|
| |
| const float sample_rate_float = 16000.0f; |
| for (size_t i = 0; i < stamps.size(); i++) { |
| float start_sec = std::rint((stamps[i].start / sample_rate_float) * 10.0f) / 10.0f; |
| float end_sec = std::rint((stamps[i].end / sample_rate_float) * 10.0f) / 10.0f; |
| |
| |
| |
| |
| |
| |
| |
| std::cout << stamps[i].start << ", " << stamps[i].end << std::endl; |
| } |
|
|
| |
| vad.reset(); |
| } catch (const std::exception& e) { |
| std::cerr << "Error processing file " << wav_path << ": " << e.what() << std::endl; |
| } |
| } |
|
|
| audio_list_file.close(); |
| return 0; |
| } |