Upload NemotronFlashForCausalLM
Browse files- model.safetensors +2 -2
- modeling_nemotron_flash.py +5 -5
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9581e287ebcd6a1f13c9564d9ae4d2e03d2d464032635af3b3be0de15dbb8939
|
| 3 |
+
size 1930795952
|
modeling_nemotron_flash.py
CHANGED
|
@@ -918,11 +918,12 @@ class NemotronFlashAttentionDecoderLayer(nn.Module):
|
|
| 918 |
|
| 919 |
if self.config.intermediate_size > 0:
|
| 920 |
self.ffn = NemotronFlashMLP(config, layer_idx=layer_idx)
|
|
|
|
| 921 |
else:
|
| 922 |
self.ffn = None
|
|
|
|
| 923 |
|
| 924 |
self.input_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 925 |
-
self.pre_ffn_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 926 |
|
| 927 |
def forward(
|
| 928 |
self,
|
|
@@ -1037,13 +1038,12 @@ class NemotronFlashMambaDecoderLayer(nn.Module):
|
|
| 1037 |
self.intermediate_size = config.intermediate_size
|
| 1038 |
if self.intermediate_size > 0:
|
| 1039 |
self.ffn = NemotronFlashMLP(config, layer_idx=layer_idx)
|
| 1040 |
-
|
| 1041 |
-
self.input_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 1042 |
-
|
| 1043 |
-
if self.intermediate_size > 0:
|
| 1044 |
self.pre_ffn_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 1045 |
else:
|
|
|
|
| 1046 |
self.pre_ffn_layernorm = None
|
|
|
|
|
|
|
| 1047 |
|
| 1048 |
|
| 1049 |
def forward(
|
|
|
|
| 918 |
|
| 919 |
if self.config.intermediate_size > 0:
|
| 920 |
self.ffn = NemotronFlashMLP(config, layer_idx=layer_idx)
|
| 921 |
+
self.pre_ffn_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 922 |
else:
|
| 923 |
self.ffn = None
|
| 924 |
+
self.pre_ffn_layernorm = None
|
| 925 |
|
| 926 |
self.input_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
|
|
| 927 |
|
| 928 |
def forward(
|
| 929 |
self,
|
|
|
|
| 1038 |
self.intermediate_size = config.intermediate_size
|
| 1039 |
if self.intermediate_size > 0:
|
| 1040 |
self.ffn = NemotronFlashMLP(config, layer_idx=layer_idx)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1041 |
self.pre_ffn_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 1042 |
else:
|
| 1043 |
+
self.ffn = None
|
| 1044 |
self.pre_ffn_layernorm = None
|
| 1045 |
+
|
| 1046 |
+
self.input_layernorm = NemotronFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 1047 |
|
| 1048 |
|
| 1049 |
def forward(
|