mazesmazes commited on
Commit
f096b62
·
verified ·
1 Parent(s): 4941335

Training in progress - step 500

Browse files
Files changed (2) hide show
  1. asr_modeling.py +4 -6
  2. tokenizer.json +2 -2
asr_modeling.py CHANGED
@@ -101,10 +101,6 @@ class ASRModel(PreTrainedModel):
101
  # Audio projector (trainable)
102
  self.projector = self._create_projector(config, target_dtype)
103
 
104
- # Loss function
105
- self.label_smoothing = getattr(config, "label_smoothing", 0.1)
106
- self.loss_fct = nn.CrossEntropyLoss(ignore_index=-100, label_smoothing=self.label_smoothing)
107
-
108
  # For model parallelism
109
  self._no_split_modules = getattr(self.language_model, "_no_split_modules", [])
110
 
@@ -393,8 +389,10 @@ class ASRModel(PreTrainedModel):
393
  logits = outputs.logits
394
  shift_logits = logits[..., :-1, :].contiguous()
395
  shift_labels = labels[..., 1:].contiguous()
396
- loss = self.loss_fct(
397
- shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)
 
 
398
  )
399
 
400
  return CausalLMOutputWithPast(
 
101
  # Audio projector (trainable)
102
  self.projector = self._create_projector(config, target_dtype)
103
 
 
 
 
 
104
  # For model parallelism
105
  self._no_split_modules = getattr(self.language_model, "_no_split_modules", [])
106
 
 
389
  logits = outputs.logits
390
  shift_logits = logits[..., :-1, :].contiguous()
391
  shift_labels = labels[..., 1:].contiguous()
392
+ loss = F.cross_entropy(
393
+ shift_logits.view(-1, shift_logits.size(-1)),
394
+ shift_labels.view(-1),
395
+ ignore_index=-100,
396
  )
397
 
398
  return CausalLMOutputWithPast(
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4aeaf198f783cbf58d8cd59812baac429ffe49147bf9648f6618de20b8d4a4c
3
- size 17209003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64999f2f5e05d34613701df1999669c5dce7e3891e1628a002518ee68a8626d1
3
+ size 17209101