jonathanjordan21 commited on
Commit
8a3f69b
·
verified ·
1 Parent(s): 555d90e

Create idn_phonemes.py

Browse files
Files changed (1) hide show
  1. idn_phonemes.py +92 -0
idn_phonemes.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ ipa_map = {
4
+ "ng": "ŋ",
5
+ "ny": "ɲ",
6
+ "sy": "ʃ",
7
+ "kh": "x",
8
+ "c": "tʃ",
9
+ "j": "dʒ",
10
+ "y": "j",
11
+ "r": "r",
12
+ "x": "ks",
13
+ "a": "a",
14
+ "i": "i",
15
+ "u": "u",
16
+ "e": "ə",
17
+ "o": "o",
18
+ "b": "b",
19
+ "d": "d̪",
20
+ "t": "t̪",
21
+ "g": "ɡ",
22
+ "k": "k",
23
+ "p": "p",
24
+ "m": "m",
25
+ "n": "n",
26
+ "l": "l",
27
+ "s": "s",
28
+ "h": "h",
29
+ "w": "w",
30
+ }
31
+
32
+ num_words = {
33
+ 0: "nol",
34
+ 1: "satu",
35
+ 2: "dua",
36
+ 3: "tiga",
37
+ 4: "empat",
38
+ 5: "lima",
39
+ 6: "enam",
40
+ 7: "tujuh",
41
+ 8: "delapan",
42
+ 9: "sembilan",
43
+ 10: "sepuluh",
44
+ 11: "sebelas"
45
+ }
46
+
47
+ def number_to_words(n: int) -> str:
48
+ """Konversi angka 0–9999 ke kata dalam bahasa Indonesia"""
49
+ if n < 12:
50
+ return num_words[n]
51
+ elif n < 20:
52
+ return number_to_words(n-10) + " belas"
53
+ elif n < 100:
54
+ puluhan, sisa = divmod(n, 10)
55
+ result = number_to_words(puluhan) + " puluh"
56
+ if sisa:
57
+ result += " " + number_to_words(sisa)
58
+ return result
59
+ elif n < 200:
60
+ return "seratus" + (" " + number_to_words(n-100) if n > 100 else "")
61
+ elif n < 1000:
62
+ ratusan, sisa = divmod(n, 100)
63
+ result = number_to_words(ratusan) + " ratus"
64
+ if sisa:
65
+ result += " " + number_to_words(sisa)
66
+ return result
67
+ elif n < 2000:
68
+ return "seribu" + (" " + number_to_words(n-1000) if n > 1000 else "")
69
+ elif n < 10000:
70
+ ribuan, sisa = divmod(n, 1000)
71
+ result = number_to_words(ribuan) + " ribu"
72
+ if sisa:
73
+ result += " " + number_to_words(sisa)
74
+ return result
75
+ else:
76
+ return str(n) # fallback
77
+
78
+ def indo_to_ipa(text: str) -> str:
79
+ text = text.lower()
80
+
81
+ # Cari semua angka dalam teks dan ubah ke kata
82
+ def replace_number(match):
83
+ num = int(match.group())
84
+ return number_to_words(num)
85
+
86
+ text = re.sub(r"\d+", replace_number, text)
87
+
88
+ # Konversi huruf → IPA
89
+ for k in sorted(ipa_map.keys(), key=lambda x: -len(x)):
90
+ text = re.sub(k, ipa_map[k], text)
91
+
92
+ return "[" + text + "]"