{ "model": { "vocab_size": 8192, "context_length": 128, "d_embedding": 512, "d_intermediate": 2048, "n_heads": 16, "n_layers": 16, "qkv_bias": false }, "train": { "peak_lr": 0.001, "warmup_ratio": 0.01, "n_epochs": 2, "batch_size": 8, "weight_decay": 0.1 } }