mazesmazes commited on
Commit
9c8b91f
·
verified ·
1 Parent(s): eedf682

Training in progress - step 500

Browse files
Files changed (4) hide show
  1. asr_config.py +2 -0
  2. asr_modeling.py +1 -7
  3. config.json +1 -0
  4. model.safetensors +1 -1
asr_config.py CHANGED
@@ -26,6 +26,7 @@ class ASRConfig(transformers.PretrainedConfig):
26
  projector_num_layers: int = 2, # Number of layers (for residual projector)
27
  projector_dropout: float = 0.05, # Dropout rate for projector layers
28
  projector_input_noise: float = 0.02, # Input noise for projector
 
29
  inference_diversity_penalty: float = 0.0,
30
  inference_warmup_tokens: int = 10,
31
  max_new_tokens: Optional[int] = None,
@@ -72,6 +73,7 @@ class ASRConfig(transformers.PretrainedConfig):
72
  self.projector_num_layers = projector_num_layers
73
  self.projector_dropout = projector_dropout
74
  self.projector_input_noise = projector_input_noise
 
75
  self.inference_diversity_penalty = inference_diversity_penalty
76
  self.inference_warmup_tokens = inference_warmup_tokens
77
  if "audio_config" not in kwargs:
 
26
  projector_num_layers: int = 2, # Number of layers (for residual projector)
27
  projector_dropout: float = 0.05, # Dropout rate for projector layers
28
  projector_input_noise: float = 0.02, # Input noise for projector
29
+ label_smoothing: float = 0.0, # Label smoothing for cross-entropy loss
30
  inference_diversity_penalty: float = 0.0,
31
  inference_warmup_tokens: int = 10,
32
  max_new_tokens: Optional[int] = None,
 
73
  self.projector_num_layers = projector_num_layers
74
  self.projector_dropout = projector_dropout
75
  self.projector_input_noise = projector_input_noise
76
+ self.label_smoothing = label_smoothing
77
  self.inference_diversity_penalty = inference_diversity_penalty
78
  self.inference_warmup_tokens = inference_warmup_tokens
79
  if "audio_config" not in kwargs:
asr_modeling.py CHANGED
@@ -118,10 +118,6 @@ class ASRModel(PreTrainedModel):
118
  "low_cpu_mem_usage": True,
119
  "dtype": dtype,
120
  }
121
- # Only use device_map="auto" when NOT loading from pretrained
122
- # (avoids meta tensor conflicts during from_pretrained)
123
- if not cls._is_loading_from_pretrained:
124
- encoder_kwargs["device_map"] = "auto"
125
 
126
  if "whisper" in config.audio_model_id.lower():
127
  from transformers import WhisperModel
@@ -146,9 +142,6 @@ class ASRModel(PreTrainedModel):
146
  "low_cpu_mem_usage": True,
147
  "dtype": dtype,
148
  }
149
- # Only use device_map="auto" when NOT loading from pretrained
150
- if not cls._is_loading_from_pretrained:
151
- decoder_kwargs["device_map"] = "auto"
152
 
153
  decoder = AutoModelForCausalLM.from_pretrained(config.text_model_id, **decoder_kwargs)
154
  decoder.config.use_cache = getattr(config, "use_cache", True)
@@ -393,6 +386,7 @@ class ASRModel(PreTrainedModel):
393
  shift_logits.view(-1, shift_logits.size(-1)),
394
  shift_labels.view(-1),
395
  ignore_index=-100,
 
396
  )
397
 
398
  return CausalLMOutputWithPast(
 
118
  "low_cpu_mem_usage": True,
119
  "dtype": dtype,
120
  }
 
 
 
 
121
 
122
  if "whisper" in config.audio_model_id.lower():
123
  from transformers import WhisperModel
 
142
  "low_cpu_mem_usage": True,
143
  "dtype": dtype,
144
  }
 
 
 
145
 
146
  decoder = AutoModelForCausalLM.from_pretrained(config.text_model_id, **decoder_kwargs)
147
  decoder.config.use_cache = getattr(config, "use_cache", True)
 
386
  shift_logits.view(-1, shift_logits.size(-1)),
387
  shift_labels.view(-1),
388
  ignore_index=-100,
389
+ label_smoothing=getattr(self.config, "label_smoothing", 0.0),
390
  )
391
 
392
  return CausalLMOutputWithPast(
config.json CHANGED
@@ -68,6 +68,7 @@
68
  "encoder_dim": 1280,
69
  "inference_diversity_penalty": 0.0,
70
  "inference_warmup_tokens": 10,
 
71
  "llm_dim": 2048,
72
  "max_new_tokens": 128,
73
  "min_new_tokens": 1,
 
68
  "encoder_dim": 1280,
69
  "inference_diversity_penalty": 0.0,
70
  "inference_warmup_tokens": 10,
71
+ "label_smoothing": 0.1,
72
  "llm_dim": 2048,
73
  "max_new_tokens": 128,
74
  "min_new_tokens": 1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5cdacf5b89d706d6f0e5faffce196310de913bd0a577f4bdbe7b92335271e59
3
  size 144762160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a25deecc1f7a0eef0322e88451ff27f6eb9ade7e853e21df8eb8afee152b736
3
  size 144762160