{ "epoch": 10.0, "eval_loss": 0.5854414701461792, "eval_mem_cpu_alloc_delta": 77824, "eval_mem_cpu_peaked_delta": 0, "eval_mem_gpu_alloc_delta": 0, "eval_mem_gpu_peaked_delta": 201464832, "eval_runtime": 51.652, "eval_samples": 17653, "eval_samples_per_second": 341.768, "init_mem_cpu_alloc_delta": 2455293952, "init_mem_cpu_peaked_delta": 24326144, "init_mem_gpu_alloc_delta": 199785472, "init_mem_gpu_peaked_delta": 0, "perplexity": 1.7957835955166468, "train_mem_cpu_alloc_delta": -4308992, "train_mem_cpu_peaked_delta": 670556160, "train_mem_gpu_alloc_delta": 601210368, "train_mem_gpu_peaked_delta": 1409567744, "train_runtime": 21748.254, "train_samples": 158861, "train_samples_per_second": 9.131 }