| #include <cstdlib> |
| #include <ctime> |
| #include <fstream> |
| #include <iostream> |
| #include <sstream> |
| #include <unordered_map> |
| #include <vector> |
| #include <algorithm> |
|
|
| using namespace std; |
|
|
| unordered_map<string, uint32_t> vocab; |
| unordered_map<uint64_t, vector<uint32_t>> hasilToOutput; |
|
|
| void preprocess(vector<string> &theString) { |
| vector<string> tmp; |
| for(auto s : theString) { |
| string tmpp; |
| for(auto c : s ) { |
| if(!isalnum(c)) { |
| if(tmpp.length() == 0) { |
| tmp.push_back(string(1, c)); |
| } else { |
| tmp.push_back(tmpp); |
| tmpp = ""; |
| tmp.push_back(string(1, c)); |
| } |
| } else { |
| tmpp += tolower(c); |
| } |
| } |
| if(tmpp != "") tmp.push_back(tmpp); |
| } |
| theString = tmp; |
| } |
|
|
| |
| void loadModel(const string &filename) { |
| ifstream file(filename); |
| if (!file) { |
| cerr << "Gagal membuka file model.\n"; |
| exit(1); |
| } |
|
|
| string line; |
| bool readingVocab = false; |
| bool readingMatch = false; |
|
|
| while (getline(file, line)) { |
| if (line == "Vocabs:") { |
| readingVocab = true; |
| readingMatch = false; |
| continue; |
| } |
| if (line == "Matchs:") { |
| readingVocab = false; |
| readingMatch = true; |
| continue; |
| } |
|
|
| if (readingVocab) { |
| size_t pos = line.find(": "); |
| if (pos != string::npos) { |
| string word = line.substr(0, pos); |
| uint32_t id = stoi(line.substr(pos + 2)); |
| vocab[word] = id; |
| } |
| } else if (readingMatch) { |
| if (line.back() == ':') { |
| uint64_t key = stoull(line.substr(0, line.size() - 1)); |
| getline(file, line); |
| vector<uint32_t> targets; |
| while (getline(file, line) && line != "]") { |
| if (!line.empty()) { |
| targets.push_back(stoi(line)); |
| } |
| } |
| hasilToOutput[key] = targets; |
| } |
| } |
| } |
|
|
| file.close(); |
| } |
|
|
| |
| string inferNextWord(const vector<string> &contextWords) { |
| uint64_t total = 0; |
| for (size_t i = 0; i < contextWords.size(); ++i) { |
| const string &word = contextWords[i]; |
| if (vocab.count(word)) { |
| total += vocab[word] * (i + 1); |
| } else { |
| return "<unknown word: " + word + ">"; |
| } |
| } |
|
|
| if (hasilToOutput.count(total) == 0) { |
| |
| uint64_t closestKey = 0; |
| uint64_t minDiff = UINT64_MAX; |
|
|
| for (const auto &[key, _] : hasilToOutput) { |
| uint64_t diff = (key > total) ? key - total : total - key; |
| if (diff < minDiff) { |
| minDiff = diff; |
| closestKey = key; |
| } |
| } |
|
|
| if (minDiff == UINT64_MAX) |
| return "<no prediction>"; |
| total = closestKey; |
| } |
| const auto &candidates = hasilToOutput[total]; |
| |
| unordered_map<uint32_t, int> freq; |
| for (auto id : candidates) { |
| freq[id]++; |
| } |
|
|
| uint32_t predictedID = max_element(freq.begin(), freq.end(), |
| [](const pair<uint32_t, int> &a, |
| const pair<uint32_t, int> &b) { |
| return a.second < b.second; |
| }) |
| ->first; |
|
|
| |
| for (const auto &[word, id] : vocab) { |
| if (id == predictedID) |
| return word; |
| } |
|
|
| return "<not found>"; |
| } |
|
|
| |
| int main() { |
| srand(time(0)); |
| loadModel("model.txt"); |
|
|
| cout << "Masukkan kalimat sebagai konteks:\n"; |
| vector<string> context; |
| string word; |
| string words; |
| getline(cin, words); |
| stringstream ss(words); |
| while (ss >> word) { |
| context.push_back(word); |
| } |
| preprocess(context); |
| auto newContext = context; |
|
|
| string prediction; |
|
|
| int i = 0; |
|
|
| while (prediction != "[AKHIR]" && i < 50) { |
| prediction = inferNextWord(newContext); |
| newContext.push_back(prediction); |
| i++; |
| } |
| cout << "Prediksi kata berikutnya:"; |
| for (auto m : newContext) { |
| cout << " " << m; |
| if (m.find("<unknown word:") != string::npos) |
| break; |
| } |
| cout << endl; |
|
|
| return 0; |
| } |
|
|