File size: 6,838 Bytes
8a3f69b
 
82a2cd4
8a3f69b
82a2cd4
 
 
 
8a3f69b
 
82a2cd4
 
 
 
 
 
 
 
 
 
 
8a3f69b
 
82a2cd4
8a3f69b
82a2cd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
085f4e9
82a2cd4
 
 
 
 
 
 
085f4e9
82a2cd4
 
 
 
 
085f4e9
82a2cd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70d2a73
82a2cd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
085f4e9
 
 
 
 
82a2cd4
 
 
 
 
 
 
 
 
 
085f4e9
 
 
 
 
 
 
 
 
82a2cd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import re

# --- IPA map ---
ipa_map = {
    "ng": "ŋ", "ny": "ɲ", "sy": "ʃ", "kh": "x", "c": "tʃ", "j": "dʒ",
    "b": "b", "d": "d̪", "t": "t̪", "g": "ɡ", "k": "k", "p": "p",
    "m": "m", "n": "n", "l": "l", "s": "s", "h": "h", "r": "r", "w": "w", "y": "j",
    "a": "a", "i": "i", "u": "u", "o": "o", "e": "ə"
}

# Sebutan huruf
letter_words = {
    "a":"a","b":"be","c":"ce","d":"de","e":"e","f":"ef","g":"ge","h":"ha",
    "i":"i","j":"je","k":"ka","l":"el","m":"em","n":"en","o":"o","p":"pe",
    "q":"ki","r":"er","s":"es","t":"te","u":"u","v":"fe","w":"we","x":"eks",
    "y":"ye","z":"zet"
}

digit_words = {
    "0":"nol","1":"satu","2":"dua","3":"tiga","4":"empat",
    "5":"lima","6":"enam","7":"tujuh","8":"delapan","9":"sembilan"
}

# --- Number to words (hingga triliun) ---
def number_to_words(n: int) -> str:
    n = int(n)
    if n == 0:
        return "nol"
    def _below_thousand(x):
        words = ["nol","satu","dua","tiga","empat","lima","enam","tujuh","delapan","sembilan","sepuluh","sebelas"]
        if x < 12: return words[x]
        if x < 20: return _below_thousand(x-10)+" belas"
        if x < 100:
            q,r=divmod(x,10); return _below_thousand(q)+" puluh"+((" "+_below_thousand(r)) if r else "")
        if x < 200: return "seratus"+((" "+_below_thousand(x-100)) if x>100 else "")
        if x < 1000:
            q,r=divmod(x,100); return _below_thousand(q)+" ratus"+((" "+_below_thousand(r)) if r else "")
    scales=[(1_000_000_000_000,"triliun"),(1_000_000_000,"miliar"),(1_000_000,"juta"),(1000,"ribu")]
    parts=[]; remaining=n
    for v,nm in scales:
        if remaining>=v:
            q,remaining=divmod(remaining,v)
            if v==1000 and q==1: parts.append("seribu")
            else: parts.append(number_to_words(q)+" "+nm)
    if remaining: parts.append(_below_thousand(remaining))
    return " ".join(parts)

# --- Nomor HP ---
phone_pattern=re.compile(r'(?<!\w)(?:\+62|\d)\d{7,}(?!\w)')
def expand_phones(text:str)->str:
    def repl(m):
        digits=re.findall(r'\d',m.group(0))
        return " ".join(digit_words[d] for d in digits)
    return phone_pattern.sub(repl,text)

# --- Angka umum ---
def expand_numbers(text:str)->str:
    def repl(m):
        return number_to_words(int(m.group()))
    return re.sub(r'\d+',repl,text)

# --- Singkatan ---
abbr_pattern=re.compile(r'(?<!\w)([A-Z]{2,})(?!\w)')
def expand_abbreviations(text:str)->str:
    def repl(m):
        token=m.group(1)
        if token=="HP":  # <-- jangan expand 'HP' kalau berdiri sendiri
            return "ha pe"
        return " ".join(letter_words[ch.lower()] for ch in token)
    return abbr_pattern.sub(repl,text)

# --- IPA ---
def apply_ipa_map(text:str)->str:
    t=text.lower()
    for k in sorted(ipa_map,key=len,reverse=True):
        t=re.sub(re.escape(k),ipa_map[k],t)
    return re.sub(r'\s+',' ',t).strip()

# --- Pipeline ---
def indo_to_ipa(text:str)->str:
    # 1. nomor HP
    step1=expand_phones(text)
    # 2. angka biasa
    step2=expand_numbers(step1)
    # 3. singkatan
    step3=expand_abbreviations(step2)
    # 4. mapping IPA
    return apply_ipa_map(step3)

# import re

# ipa_map = {
#     "ng": "ŋ",
#     "ny": "ɲ",
#     "sy": "ʃ",
#     "kh": "x",
#     "c": "tʃ",
#     "j": "dʒ",
#     "y": "j",
#     "r": "r",
#     "x": "ks",
#     "a": "a",
#     "i": "i",
#     "u": "u",
#     "e": "ə",
#     "o": "o",
#     "b": "b",
#     "d": "d̪",
#     "t": "t̪",
#     "g": "ɡ",
#     "k": "k",
#     "p": "p",
#     "m": "m",
#     "n": "n",
#     "l": "l",
#     "s": "s",
#     "h": "h",
#     "w": "w",
# }

# num_words = {
#     0: "nol",
#     1: "satu",
#     2: "dua",
#     3: "tiga",
#     4: "empat",
#     5: "lima",
#     6: "enam",
#     7: "tujuh",
#     8: "delapan",
#     9: "sembilan",
#     10: "sepuluh",
#     11: "sebelas"
# }

# def number_to_words(n: int) -> str:
#     """Konversi angka 0–9999 ke kata dalam bahasa Indonesia"""
#     if n < 12:
#         return num_words[n]
#     elif n < 20:
#         return number_to_words(n-10) + " belas"
#     elif n < 100:
#         puluhan, sisa = divmod(n, 10)
#         result = number_to_words(puluhan) + " puluh"
#         if sisa:
#             result += " " + number_to_words(sisa)
#         return result
#     elif n < 200:
#         return "seratus" + (" " + number_to_words(n-100) if n > 100 else "")
#     elif n < 1000:
#         ratusan, sisa = divmod(n, 100)
#         result = number_to_words(ratusan) + " ratus"
#         if sisa:
#             result += " " + number_to_words(sisa)
#         return result
#     elif n < 2000:
#         return "seribu" + (" " + number_to_words(n-1000) if n > 1000 else "")
#     elif n < 10000:
#         ribuan, sisa = divmod(n, 1000)
#         result = number_to_words(ribuan) + " ribu"
#         if sisa:
#             result += " " + number_to_words(sisa)
#         return result
#     else:
#         return str(n)  # fallback



# def expand_abbreviation(word: str) -> str:
#     """Ubah singkatan (huruf kapital) jadi ucapan Indonesia"""
#     if word.isupper() and len(word) > 1:  # contoh: KTP, DPR, RI
#         return " ".join(letter_words.get(ch.lower(), ch) for ch in word)
#     return word


# letter_words = {
#     "a": "a",
#     "b": "be",
#     "c": "ce",
#     "d": "de",
#     "e": "e",
#     "f": "ef",
#     "g": "ge",
#     "h": "ha",
#     "i": "i",
#     "j": "je",
#     "k": "ka",
#     "l": "el",
#     "m": "em",
#     "n": "en",
#     "o": "o",
#     "p": "pe",
#     "q": "ki",
#     "r": "er",
#     "s": "es",
#     "t": "te",
#     "u": "u",
#     "v": "fe",
#     "w": "we",
#     "x": "eks",
#     "y": "ye",
#     "z": "zet",
# }


# def indo_to_ipa(text: str) -> str:
#     text = text.lower()
    
#     # Tangani singkatan (huruf kapital semua)
#     words = []
#     for w in text.split():
#         if w.isupper() and len(w) > 1:
#             words.append(expand_abbreviation(w))
#         else:
#             words.append(w)
#     text = " ".join(words)
    
#     # Tangani angka → kata
#     def replace_number(match):
#         num = int(match.group())
#         return number_to_words(num)
#     text = re.sub(r"\d+", replace_number, text)
    
#     # Konversi huruf → IPA
#     for k in sorted(ipa_map.keys(), key=lambda x: -len(x)):
#         text = re.sub(k, ipa_map[k], text)
    
#     return text

# # def indo_to_ipa(text: str) -> str:
# #     text = text.lower()
    
# #     # Cari semua angka dalam teks dan ubah ke kata
# #     def replace_number(match):
# #         num = int(match.group())
# #         return number_to_words(num)
    
# #     text = re.sub(r"\d+", replace_number, text)
    
# #     # Konversi huruf → IPA
# #     for k in sorted(ipa_map.keys(), key=lambda x: -len(x)):
# #         text = re.sub(k, ipa_map[k], text)
    
# #     return text