basic_reference_200m / latest_metadata.json
eoinf's picture
Upload folder using huggingface_hub
fde051d verified
raw
history blame contribute delete
993 Bytes
{"step": 6000, "tokens_seen": 196608000, "config": {"model_name": "basic_reference_200m", "n_layers": 2, "d_model": 512, "d_mlp": 2048, "d_head": 64, "n_heads": 8, "attn_only": false, "layer_norm_eps": 1e-05, "init_range": 0.02, "n_ctx": 1024, "d_vocab": 48262, "dataset_name": "eoinf/unprocessed-c4-code-test", "tokenizer_name": "NeelNanda/gpt-neox-tokenizer-digits", "seed": 10, "device": "cuda", "use_bfloat16_matmul": false, "batch_size_per_device": 32, "n_devices": 1, "batches_per_step": 1, "max_tokens": 200000000, "lr_hidden": 0.002, "lr_vector": 0.001, "lr_schedule": "constant_with_warmup", "warmup_tokens": 30000000, "weight_decay": 0.05, "grad_norm_clip": 1.0, "train_loss_moving_average_beta": 0.99, "log_interval": 25, "save_checkpoints": true, "checkpoint_interval": 1000, "checkpoint_interval_ratio": 1.1, "save_log_checkpoints": true, "use_wandb": true, "batch_size": 32, "tokens_per_step": 32768, "warmup_steps": 915, "max_steps": 6103}, "train_loss_ewma": 4.048539307487607}