speechbrain
/

PIQ-ESC50

Sound Classification

Interpretable Sound Classification

Posthoc Interpretation

Posthoc Interpretation via Quantization

Model card Files Files and versions

PIQ-ESC50 / hyperparams.yaml

cemsubakan's picture

Update hyperparams.yaml

25df5de verified about 2 years ago

history blame contribute delete

2.13 kB

	# #################################
	# The recipe for training PIQ on the ESC50 dataset.
	#
	# Author:
	# * Cem Subakan 2022, 2023
	# * Francesco Paissan 2022, 2023
	# (based on the SpeechBrain UrbanSound8k recipe)
	# #################################

	sample_rate: 16000
	use_vq: true
	rec_loss_coef: 1
	use_mask_output: true
	mask_th: 0.35

	device: cpu

	# Feature parameters
	n_mels: 80

	# Number of classes
	out_n_neurons: 50

	# embedding_model: !new:custom_models.Conv2dEncoder_v2
	embedding_model: !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2
	dim: 256

	classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
	input_size: 256
	out_neurons: 50
	lin_blocks: 1

	# Interpretation hyperparams
	K: 1024

	# pre-processing
	n_fft: 1024
	spec_mag_power: 0.5
	hop_length: 11.6099
	win_length: 23.2199
	compute_stft: !new:speechbrain.processing.features.STFT
	n_fft: 1024
	hop_length: 11.6099
	win_length: 23.2199
	sample_rate: 16000

	compute_fbank: !new:speechbrain.processing.features.Filterbank
	n_mels: 80
	n_fft: 1024
	sample_rate: 16000

	compute_istft: !new:speechbrain.processing.features.ISTFT
	sample_rate: 16000
	hop_length: 11.6099
	win_length: 23.2199

	label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
	psi_model: !new:speechbrain.lobes.models.PIQ.VectorQuantizedPSI_Audio
	dim: 256
	K: 1024
	shared_keys: 0
	activate_class_partitioning: true
	use_adapter: true
	adapter_reduce_dim: true

	modules:
	compute_stft: !ref <compute_stft>
	compute_fbank: !ref <compute_fbank>
	compute_istft: !ref <compute_istft>
	psi: !ref <psi_model>
	embedding_model: !ref <embedding_model>
	classifier: !ref <classifier>

	pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
	loadables:
	embedding_model: !ref <embedding_model>
	classifier: !ref <classifier>
	psi: !ref <psi_model>
	label_encoder: !ref <label_encoder>
	paths:
	embedding_model: speechbrain/PIQ-ESC50/embedding_modelft.ckpt
	classifier: speechbrain/PIQ-ESC50/classifier.ckpt
	psi: speechbrain/PIQ-ESC50/psi_model.ckpt
	label_encoder: speechbrain/cnn14-esc50/label_encoder.txt