Yuchan
commited on
Update Mo.py
Browse files
Mo.py
CHANGED
|
@@ -67,7 +67,7 @@ unk_id = sp.piece_to_id("<unk>")
|
|
| 67 |
vocab_size = sp.get_piece_size()
|
| 68 |
print(f"✅ Vocabulary size: {vocab_size}")
|
| 69 |
|
| 70 |
-
max_len =
|
| 71 |
batch_size = 96
|
| 72 |
|
| 73 |
def text_to_ids(text):
|
|
@@ -126,7 +126,7 @@ class SwiGLU(layers.Layer):
|
|
| 126 |
|
| 127 |
|
| 128 |
class MHLA(layers.Layer):
|
| 129 |
-
def __init__(self, embed_dim, num_heads=
|
| 130 |
super().__init__()
|
| 131 |
assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
|
| 132 |
self.embed_dim = embed_dim
|
|
@@ -187,7 +187,7 @@ class Lo(layers.Layer):
|
|
| 187 |
class Block(layers.Layer):
|
| 188 |
def __init__(self, d_model):
|
| 189 |
super().__init__()
|
| 190 |
-
self.lou = MHLA(d_model,
|
| 191 |
self.glu = SwiGLU(d_model, 1048)
|
| 192 |
self.lo = Lo(d_model)
|
| 193 |
|
|
|
|
| 67 |
vocab_size = sp.get_piece_size()
|
| 68 |
print(f"✅ Vocabulary size: {vocab_size}")
|
| 69 |
|
| 70 |
+
max_len = 128
|
| 71 |
batch_size = 96
|
| 72 |
|
| 73 |
def text_to_ids(text):
|
|
|
|
| 126 |
|
| 127 |
|
| 128 |
class MHLA(layers.Layer):
|
| 129 |
+
def __init__(self, embed_dim, num_heads=2, dropout=0.0):
|
| 130 |
super().__init__()
|
| 131 |
assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
|
| 132 |
self.embed_dim = embed_dim
|
|
|
|
| 187 |
class Block(layers.Layer):
|
| 188 |
def __init__(self, d_model):
|
| 189 |
super().__init__()
|
| 190 |
+
self.lou = MHLA(d_model, 2)
|
| 191 |
self.glu = SwiGLU(d_model, 1048)
|
| 192 |
self.lo = Lo(d_model)
|
| 193 |
|