Yuchan
commited on
Update Mo.py
Browse files
Mo.py
CHANGED
|
@@ -69,7 +69,7 @@ vocab_size = sp.get_piece_size()
|
|
| 69 |
print(f"✅ Vocabulary size: {vocab_size}")
|
| 70 |
|
| 71 |
max_len = 256
|
| 72 |
-
batch_size =
|
| 73 |
|
| 74 |
def text_to_ids(text):
|
| 75 |
return sp.encode(text, out_type=int)
|
|
@@ -99,7 +99,7 @@ def txt_stream(file_path):
|
|
| 99 |
)
|
| 100 |
|
| 101 |
|
| 102 |
-
LIMIT = 36757266
|
| 103 |
|
| 104 |
dataset = tf.data.Dataset.from_generator(
|
| 105 |
lambda: txt_stream(DATA_PATH),
|
|
|
|
| 69 |
print(f"✅ Vocabulary size: {vocab_size}")
|
| 70 |
|
| 71 |
max_len = 256
|
| 72 |
+
batch_size = 128
|
| 73 |
|
| 74 |
def text_to_ids(text):
|
| 75 |
return sp.encode(text, out_type=int)
|
|
|
|
| 99 |
)
|
| 100 |
|
| 101 |
|
| 102 |
+
LIMIT = 36757266 // 2
|
| 103 |
|
| 104 |
dataset = tf.data.Dataset.from_generator(
|
| 105 |
lambda: txt_stream(DATA_PATH),
|