| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| model: |
| |
| llama_path: "DeepSeek-R1-Distill-Qwen-1.5B/" |
| whisper_path: "distil-whisper/distil-large-v3/" |
| beats_path: "BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt" |
|
|
| ckpt: "tiny_all_tasks_319.pth" |
|
|
| freeze_whisper: True |
| freeze_beats: True |
|
|
| |
| use_speech_Qformer: True |
| freeze_speech_QFormer: False |
| window_level_Qformer: True |
| num_speech_query_token: 1 |
| second_per_window: 0.333333 |
| second_stride: 0.333333 |
|
|
| speech_llama_proj_model: "" |
| freeze_speech_llama_proj: False |
|
|
| |
| lora: True |
| lora_rank: 8 |
| lora_alpha: 32 |
| lora_dropout: 0.1 |
|
|
| multi_prompt: True |
| prompt_template: "USER: {}\nASSISTANT:" |
| prompt_path: "prompts/train_prompt.json" |
| test_prompt_path: "prompts/test_prompt.json" |
| max_txt_len: 300 |
| end_sym: "</s>" |
|
|
| generate: |
| max_new_tokens: 200 |
| num_beams: 4 |
| do_sample: False |
| min_length: 1 |
| temperature: 1.0 |
| top_p: 0.9 |
| repetition_penalty: 1.0 |
| length_penalty: 1.0 |