| |
| |
| |
| |
| |
| |
| |
| |
|
|
| sample_rate: 16000 |
| use_vq: true |
| rec_loss_coef: 1 |
| use_mask_output: true |
| mask_th: 0.35 |
|
|
| device: cpu |
|
|
| |
| n_mels: 80 |
|
|
| |
| out_n_neurons: 50 |
|
|
| |
| embedding_model: !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2 |
| dim: 256 |
|
|
| classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier |
| input_size: 256 |
| out_neurons: 50 |
| lin_blocks: 1 |
|
|
| |
| K: 1024 |
|
|
| |
| n_fft: 1024 |
| spec_mag_power: 0.5 |
| hop_length: 11.6099 |
| win_length: 23.2199 |
| compute_stft: !new:speechbrain.processing.features.STFT |
| n_fft: 1024 |
| hop_length: 11.6099 |
| win_length: 23.2199 |
| sample_rate: 16000 |
|
|
| compute_fbank: !new:speechbrain.processing.features.Filterbank |
| n_mels: 80 |
| n_fft: 1024 |
| sample_rate: 16000 |
|
|
| compute_istft: !new:speechbrain.processing.features.ISTFT |
| sample_rate: 16000 |
| hop_length: 11.6099 |
| win_length: 23.2199 |
|
|
| label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder |
| psi_model: !new:speechbrain.lobes.models.PIQ.VectorQuantizedPSI_Audio |
| dim: 256 |
| K: 1024 |
| shared_keys: 0 |
| activate_class_partitioning: true |
| use_adapter: true |
| adapter_reduce_dim: true |
|
|
| modules: |
| compute_stft: !ref <compute_stft> |
| compute_fbank: !ref <compute_fbank> |
| compute_istft: !ref <compute_istft> |
| psi: !ref <psi_model> |
| embedding_model: !ref <embedding_model> |
| classifier: !ref <classifier> |
|
|
| pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
| loadables: |
| embedding_model: !ref <embedding_model> |
| classifier: !ref <classifier> |
| psi: !ref <psi_model> |
| label_encoder: !ref <label_encoder> |
| paths: |
| embedding_model: speechbrain/PIQ-ESC50/embedding_modelft.ckpt |
| classifier: speechbrain/PIQ-ESC50/classifier.ckpt |
| psi: speechbrain/PIQ-ESC50/psi_model.ckpt |
| label_encoder: speechbrain/cnn14-esc50/label_encoder.txt |