YongganFu commited on
Commit
c50ca7b
·
verified ·
1 Parent(s): fe5d56f

Upload NemotronFlashForCausalLM

Browse files
Files changed (2) hide show
  1. model.safetensors +2 -2
  2. modeling_nemotron_flash.py +5 -5
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b15235dc83bb411387d9a15694e02d3697051e303b067336e17c302a17b6125d
3
- size 1930804368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9581e287ebcd6a1f13c9564d9ae4d2e03d2d464032635af3b3be0de15dbb8939
3
+ size 1930795952
modeling_nemotron_flash.py CHANGED
@@ -918,11 +918,12 @@ class NemotronFlashAttentionDecoderLayer(nn.Module):
918
 
919
  if self.config.intermediate_size > 0:
920
  self.ffn = NemotronFlashMLP(config, layer_idx=layer_idx)
 
921
  else:
922
  self.ffn = None
 
923
 
924
  self.input_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
925
- self.pre_ffn_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
926
 
927
  def forward(
928
  self,
@@ -1037,13 +1038,12 @@ class NemotronFlashMambaDecoderLayer(nn.Module):
1037
  self.intermediate_size = config.intermediate_size
1038
  if self.intermediate_size > 0:
1039
  self.ffn = NemotronFlashMLP(config, layer_idx=layer_idx)
1040
-
1041
- self.input_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
1042
-
1043
- if self.intermediate_size > 0:
1044
  self.pre_ffn_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
1045
  else:
 
1046
  self.pre_ffn_layernorm = None
 
 
1047
 
1048
 
1049
  def forward(
 
918
 
919
  if self.config.intermediate_size > 0:
920
  self.ffn = NemotronFlashMLP(config, layer_idx=layer_idx)
921
+ self.pre_ffn_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
922
  else:
923
  self.ffn = None
924
+ self.pre_ffn_layernorm = None
925
 
926
  self.input_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
 
927
 
928
  def forward(
929
  self,
 
1038
  self.intermediate_size = config.intermediate_size
1039
  if self.intermediate_size > 0:
1040
  self.ffn = NemotronFlashMLP(config, layer_idx=layer_idx)
 
 
 
 
1041
  self.pre_ffn_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
1042
  else:
1043
+ self.ffn = None
1044
  self.pre_ffn_layernorm = None
1045
+
1046
+ self.input_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
1047
 
1048
 
1049
  def forward(