inspirewind
/

viralBERT-classification

viralbert_for_sequence_classification

Model card Files Files and versions

viralBERT-classification / config.json

inspirewind's picture

Upload folder using huggingface_hub

87dcdb4 verified 4 months ago

history blame contribute delete

2.86 kB

	{
	"adamw_betas_for_muon_others": [
	0.9,
	0.95
	],
	"adamw_eps_for_muon_others": 1e-10,
	"adamw_lr": 0.00064,
	"adamw_lr_for_muon_others": 0.000267,
	"adamw_max_grad_norm": 0.5,
	"adamw_weight_decay": 0.01,
	"adamw_weight_decay_for_muon_others": 0.05,
	"adjust_learning_rate_for_accumulation": true,
	"architectures": [
	"ViralBERTForSequenceClassification"
	],
	"attention_head_size": 64,
	"attention_probs_dropout_prob": 0.0,
	"batch_size": 256,
	"class_weights": null,
	"classifier_dropout_prob": 0.1,
	"cls_token_id": 1,
	"compile_backend": "inductor",
	"compile_fullgraph": true,
	"compile_mode": "default",
	"data_dir": "",
	"dtype": "float32",
	"fasta_file": "",
	"feed_forward_activation": "swiglu",
	"filter_n": false,
	"fp16": true,
	"freeze_bert_layers": 8,
	"global_attn_every_n_layers": 0,
	"global_max_grad_norm": 1.0,
	"gradient_accumulation_steps": 8,
	"hidden_dropout_prob": 0.0,
	"hidden_size": 768,
	"high_lr_multiplier": 1.0,
	"high_lr_steps_ratio": 0.0,
	"id2label": {
	"0": "bac",
	"1": "virus"
	},
	"initializer_range": 0.02,
	"intermediate_size": 2048,
	"label2id": {
	"bac": 0,
	"virus": 1
	},
	"label_smoothing_factor": 0.1,
	"layer_norm_eps": 1e-12,
	"logging_steps": 1000,
	"loss_type": "ce",
	"lr_scheduler_type": "cosine",
	"mask_token_id": 3,
	"masking_strategy": "structural",
	"max_eval_samples": 2048,
	"max_steps_for_sweep": null,
	"min_lr_ratio": 0.05,
	"mlm_probability": 0.15,
	"model_type": "viralbert_for_sequence_classification",
	"muon_lr": 0.0015,
	"muon_max_grad_norm": 1.0,
	"muon_momentum": 0.95,
	"muon_weight_decay": 0.05,
	"n_token_id": 9,
	"norm_layer_type": "rmsnorm",
	"num_attention_heads": 12,
	"num_hidden_layers": 12,
	"num_train_epochs": 3,
	"num_workers": 4,
	"optimizer_type": "muon_adamw",
	"p_codon": 0.5,
	"pad_token_id": 0,
	"pos_weight": null,
	"position_embedding_type": "rope",
	"resume_from_checkpoint": null,
	"resume_mode": null,
	"reverse_complement_prob": 0.5,
	"rope_interpolation_factor": 1.0,
	"run_name": "",
	"save_steps": 10000,
	"save_total_limit": 5,
	"scale_loss_for_accumulation": true,
	"seed": 42,
	"sep_token_id": 2,
	"seq_length": 512,
	"seq_mask_prob": 0.5,
	"seq_mask_ratio": 0.15,
	"sliding_window_size": 0,
	"stride": 256,
	"sweep_early_stopping_patience_steps": 1000,
	"sweep_early_stopping_threshold": 50.0,
	"tie_word_embeddings": false,
	"transformers_version": "4.56.1",
	"use_compile": true,
	"use_per_group_clipping": false,
	"use_qk_norm": true,
	"use_seq_augment": true,
	"use_xpos": false,
	"vocab_size": 14,
	"wandb_enabled": true,
	"wandb_group": "",
	"wandb_name": "",
	"wandb_notes": "",
	"wandb_project": "",
	"wandb_tags": [],
	"wandb_watch_freq": null,
	"wandb_watch_model": false,
	"warmup_steps": 4000,
	"warmup_steps_ratio": 0.1
	}