Yuchan commited on
Commit
dd6aa0d
·
verified ·
1 Parent(s): 6fef684

Update Mo.py

Browse files
Files changed (1) hide show
  1. Mo.py +3 -3
Mo.py CHANGED
@@ -67,7 +67,7 @@ unk_id = sp.piece_to_id("<unk>")
67
  vocab_size = sp.get_piece_size()
68
  print(f"✅ Vocabulary size: {vocab_size}")
69
 
70
- max_len = 200
71
  batch_size = 96
72
 
73
  def text_to_ids(text):
@@ -126,7 +126,7 @@ class SwiGLU(layers.Layer):
126
 
127
 
128
  class MHLA(layers.Layer):
129
- def __init__(self, embed_dim, num_heads=8, dropout=0.0):
130
  super().__init__()
131
  assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
132
  self.embed_dim = embed_dim
@@ -187,7 +187,7 @@ class Lo(layers.Layer):
187
  class Block(layers.Layer):
188
  def __init__(self, d_model):
189
  super().__init__()
190
- self.lou = MHLA(d_model, 8)
191
  self.glu = SwiGLU(d_model, 1048)
192
  self.lo = Lo(d_model)
193
 
 
67
  vocab_size = sp.get_piece_size()
68
  print(f"✅ Vocabulary size: {vocab_size}")
69
 
70
+ max_len = 128
71
  batch_size = 96
72
 
73
  def text_to_ids(text):
 
126
 
127
 
128
  class MHLA(layers.Layer):
129
+ def __init__(self, embed_dim, num_heads=2, dropout=0.0):
130
  super().__init__()
131
  assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
132
  self.embed_dim = embed_dim
 
187
  class Block(layers.Layer):
188
  def __init__(self, d_model):
189
  super().__init__()
190
+ self.lou = MHLA(d_model, 2)
191
  self.glu = SwiGLU(d_model, 1048)
192
  self.lo = Lo(d_model)
193