diff --git "a/running_log.txt" "b/running_log.txt" --- "a/running_log.txt" +++ "b/running_log.txt" @@ -1,77 +1,63 @@ -[INFO|parser.py:344] 2024-07-29 19:46:30,264 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 +07/30/2024 01:57:42 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> -07/29/2024 19:46:30 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> +07/30/2024 01:57:42 - INFO - llamafactory.hparams.parser - Process rank: 7, device: cuda:7, n_gpu: 1, distributed training: True, compute dtype: None -07/29/2024 19:46:30 - INFO - llamafactory.data.template - Add pad token: <|eot_id|> +[INFO|parser.py:344] 2024-07-30 01:57:42,511 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: None -07/29/2024 19:46:30 - INFO - llamafactory.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 +07/30/2024 01:57:42 - INFO - llamafactory.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: None -07/29/2024 19:46:30 - INFO - llamafactory.hparams.parser - Process rank: 4, device: cuda:4, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 +07/30/2024 01:57:42 - INFO - llamafactory.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1, distributed training: True, compute dtype: None -07/29/2024 19:46:30 - INFO - llamafactory.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 +07/30/2024 01:57:42 - INFO - llamafactory.hparams.parser - Process rank: 6, device: cuda:6, n_gpu: 1, distributed training: True, compute dtype: None -07/29/2024 19:46:30 - INFO - llamafactory.hparams.parser - Process rank: 6, device: cuda:6, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 +07/30/2024 01:57:42 - INFO - llamafactory.hparams.parser - Process rank: 4, device: cuda:4, n_gpu: 1, distributed training: True, compute dtype: None -07/29/2024 19:46:30 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 +07/30/2024 01:57:42 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: None -07/29/2024 19:46:30 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> +07/30/2024 01:57:42 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> -07/29/2024 19:46:30 - INFO - llamafactory.data.template - Add pad token: <|eot_id|> +[INFO|tokenization_utils_base.py:2287] 2024-07-30 01:57:42,516 >> loading file tokenizer.json -[INFO|tokenization_utils_base.py:2289] 2024-07-29 19:46:30,758 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/b2a4d0f33b41fcd59a6d31662cc63b8d53367e1e/tokenizer.json +[INFO|tokenization_utils_base.py:2287] 2024-07-30 01:57:42,516 >> loading file added_tokens.json -07/29/2024 19:46:30 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> +[INFO|tokenization_utils_base.py:2287] 2024-07-30 01:57:42,516 >> loading file special_tokens_map.json -07/29/2024 19:46:30 - INFO - llamafactory.data.template - Add pad token: <|eot_id|> +[INFO|tokenization_utils_base.py:2287] 2024-07-30 01:57:42,516 >> loading file tokenizer_config.json -07/29/2024 19:46:30 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> +[INFO|tokenization_utils_base.py:2533] 2024-07-30 01:57:42,783 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. -07/29/2024 19:46:30 - INFO - llamafactory.data.template - Add pad token: <|eot_id|> +[INFO|template.py:270] 2024-07-30 01:57:42,784 >> Replace eos token: <|eot_id|> -[INFO|tokenization_utils_base.py:2289] 2024-07-29 19:46:30,758 >> loading file added_tokens.json from cache at None +[INFO|loader.py:52] 2024-07-30 01:57:42,785 >> Loading dataset convert_finetune_truth_dev.json... -[INFO|tokenization_utils_base.py:2289] 2024-07-29 19:46:30,758 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/b2a4d0f33b41fcd59a6d31662cc63b8d53367e1e/special_tokens_map.json +07/30/2024 01:57:42 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> -[INFO|tokenization_utils_base.py:2289] 2024-07-29 19:46:30,758 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/b2a4d0f33b41fcd59a6d31662cc63b8d53367e1e/tokenizer_config.json +07/30/2024 01:57:42 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> -[INFO|tokenization_utils_base.py:2533] 2024-07-29 19:46:31,050 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. +07/30/2024 01:57:42 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> -[INFO|template.py:270] 2024-07-29 19:46:31,051 >> Replace eos token: <|eot_id|> +07/30/2024 01:57:42 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> -[INFO|template.py:372] 2024-07-29 19:46:31,051 >> Add pad token: <|eot_id|> +07/30/2024 01:57:42 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> -[INFO|loader.py:52] 2024-07-29 19:46:31,051 >> Loading dataset convert_finetune_truth.json... +07/30/2024 01:57:43 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth_dev.json... -07/29/2024 19:46:31 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> +07/30/2024 01:57:43 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth_dev.json... -07/29/2024 19:46:31 - INFO - llamafactory.data.template - Add pad token: <|eot_id|> +07/30/2024 01:57:43 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth_dev.json... -07/29/2024 19:46:31 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> +07/30/2024 01:57:43 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth_dev.json... -07/29/2024 19:46:31 - INFO - llamafactory.data.template - Add pad token: <|eot_id|> +07/30/2024 01:57:43 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth_dev.json... -07/29/2024 19:46:31 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|> +07/30/2024 01:57:43 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth_dev.json... -07/29/2024 19:46:31 - INFO - llamafactory.data.template - Add pad token: <|eot_id|> +07/30/2024 01:57:43 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth_dev.json... -07/29/2024 19:46:32 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth.json... +[INFO|configuration_utils.py:731] 2024-07-30 01:57:47,547 >> loading configuration file saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/config.json -07/29/2024 19:46:32 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth.json... - -07/29/2024 19:46:32 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth.json... - -07/29/2024 19:46:32 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth.json... - -07/29/2024 19:46:32 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth.json... - -07/29/2024 19:46:32 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth.json... - -07/29/2024 19:46:32 - INFO - llamafactory.data.loader - Loading dataset convert_finetune_truth.json... - -[INFO|configuration_utils.py:733] 2024-07-29 19:46:33,166 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/b2a4d0f33b41fcd59a6d31662cc63b8d53367e1e/config.json - -[INFO|configuration_utils.py:800] 2024-07-29 19:46:33,168 >> Model config LlamaConfig { - "_name_or_path": "meta-llama/Meta-Llama-3.1-8B-Instruct", +[INFO|configuration_utils.py:800] 2024-07-30 01:57:47,549 >> Model config LlamaConfig { + "_name_or_path": "saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final", "architectures": [ "LlamaForCausalLM" ], @@ -106,16 +92,18 @@ "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.43.3", - "use_cache": true, + "use_cache": false, "vocab_size": 128256 } -[INFO|modeling_utils.py:3634] 2024-07-29 19:46:33,192 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/b2a4d0f33b41fcd59a6d31662cc63b8d53367e1e/model.safetensors.index.json +[INFO|patcher.py:81] 2024-07-30 01:57:47,549 >> Using KV cache for faster generation. + +[INFO|modeling_utils.py:3631] 2024-07-30 01:57:47,572 >> loading weights file saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/model.safetensors.index.json -[INFO|modeling_utils.py:1572] 2024-07-29 19:46:33,193 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[INFO|modeling_utils.py:1572] 2024-07-30 01:57:47,572 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. -[INFO|configuration_utils.py:1038] 2024-07-29 19:46:33,195 >> Generate config GenerationConfig { +[INFO|configuration_utils.py:1038] 2024-07-30 01:57:47,574 >> Generate config GenerationConfig { "bos_token_id": 128000, "eos_token_id": [ 128001, @@ -125,25 +113,29 @@ } -[INFO|modeling_utils.py:4463] 2024-07-29 19:46:37,422 >> All model checkpoint weights were used when initializing LlamaForCausalLM. +07/30/2024 01:57:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation. +07/30/2024 01:57:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation. -[INFO|modeling_utils.py:4471] 2024-07-29 19:46:37,423 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at meta-llama/Meta-Llama-3.1-8B-Instruct. -If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +07/30/2024 01:57:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation. + +07/30/2024 01:57:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation. -07/29/2024 19:46:37 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. +07/30/2024 01:57:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation. -07/29/2024 19:46:37 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. +07/30/2024 01:57:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation. -07/29/2024 19:46:37 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. +07/30/2024 01:57:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation. -07/29/2024 19:46:37 - INFO - llamafactory.model.adapter - Fine-tuning method: Full +[INFO|modeling_utils.py:4463] 2024-07-30 01:57:52,260 >> All model checkpoint weights were used when initializing LlamaForCausalLM. -07/29/2024 19:46:37 - INFO - llamafactory.model.loader - trainable params: 8,030,261,248 || all params: 8,030,261,248 || trainable%: 100.0000 -[INFO|configuration_utils.py:993] 2024-07-29 19:46:37,598 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/b2a4d0f33b41fcd59a6d31662cc63b8d53367e1e/generation_config.json +[INFO|modeling_utils.py:4471] 2024-07-30 01:57:52,260 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. + +[INFO|configuration_utils.py:991] 2024-07-30 01:57:52,264 >> loading configuration file saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/generation_config.json -[INFO|configuration_utils.py:1038] 2024-07-29 19:46:37,599 >> Generate config GenerationConfig { +[INFO|configuration_utils.py:1038] 2024-07-30 01:57:52,264 >> Generate config GenerationConfig { "bos_token_id": 128000, "do_sample": true, "eos_token_id": [ @@ -156,1878 +148,60 @@ If your task is similar to the task the model of the checkpoint was trained on, } -[INFO|checkpointing.py:103] 2024-07-29 19:46:37,606 >> Gradient checkpointing enabled. - -[INFO|attention.py:84] 2024-07-29 19:46:37,606 >> Using torch SDPA for faster training and inference. - -[INFO|adapter.py:302] 2024-07-29 19:46:37,606 >> Upcasting trainable params to float32. - -[INFO|adapter.py:48] 2024-07-29 19:46:37,606 >> Fine-tuning method: Full - -[INFO|loader.py:196] 2024-07-29 19:46:37,650 >> trainable params: 8,030,261,248 || all params: 8,030,261,248 || trainable%: 100.0000 - -07/29/2024 19:46:37 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. - -07/29/2024 19:46:37 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. - -07/29/2024 19:46:37 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. - -07/29/2024 19:46:37 - INFO - llamafactory.model.adapter - Fine-tuning method: Full - -[INFO|trainer.py:648] 2024-07-29 19:46:37,655 >> Using auto half precision backend - -07/29/2024 19:46:37 - INFO - llamafactory.model.loader - trainable params: 8,030,261,248 || all params: 8,030,261,248 || trainable%: 100.0000 - -07/29/2024 19:46:37 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. - -07/29/2024 19:46:37 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. - -07/29/2024 19:46:37 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. - -07/29/2024 19:46:37 - INFO - llamafactory.model.adapter - Fine-tuning method: Full - -07/29/2024 19:46:38 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. - -07/29/2024 19:46:38 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. - -07/29/2024 19:46:38 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. - -07/29/2024 19:46:38 - INFO - llamafactory.model.adapter - Fine-tuning method: Full - -07/29/2024 19:46:38 - INFO - llamafactory.model.loader - trainable params: 8,030,261,248 || all params: 8,030,261,248 || trainable%: 100.0000 - -07/29/2024 19:46:38 - INFO - llamafactory.model.loader - trainable params: 8,030,261,248 || all params: 8,030,261,248 || trainable%: 100.0000 - -07/29/2024 19:46:38 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. - -07/29/2024 19:46:38 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. - -07/29/2024 19:46:38 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. - -07/29/2024 19:46:38 - INFO - llamafactory.model.adapter - Fine-tuning method: Full - -07/29/2024 19:46:38 - INFO - llamafactory.model.loader - trainable params: 8,030,261,248 || all params: 8,030,261,248 || trainable%: 100.0000 - -07/29/2024 19:46:38 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. - -07/29/2024 19:46:38 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. - -07/29/2024 19:46:38 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. - -07/29/2024 19:46:38 - INFO - llamafactory.model.adapter - Fine-tuning method: Full - -07/29/2024 19:46:38 - INFO - llamafactory.model.loader - trainable params: 8,030,261,248 || all params: 8,030,261,248 || trainable%: 100.0000 - -07/29/2024 19:46:38 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. - -07/29/2024 19:46:38 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. - -07/29/2024 19:46:38 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. - -07/29/2024 19:46:38 - INFO - llamafactory.model.adapter - Fine-tuning method: Full - -07/29/2024 19:46:38 - INFO - llamafactory.model.loader - trainable params: 8,030,261,248 || all params: 8,030,261,248 || trainable%: 100.0000 - -[INFO|trainer.py:2134] 2024-07-29 19:47:08,063 >> ***** Running training ***** - -[INFO|trainer.py:2135] 2024-07-29 19:47:08,063 >> Num examples = 22,434 - -[INFO|trainer.py:2136] 2024-07-29 19:47:08,063 >> Num Epochs = 5 - -[INFO|trainer.py:2137] 2024-07-29 19:47:08,063 >> Instantaneous batch size per device = 2 - -[INFO|trainer.py:2140] 2024-07-29 19:47:08,063 >> Total train batch size (w. parallel, distributed & accumulation) = 128 - -[INFO|trainer.py:2141] 2024-07-29 19:47:08,063 >> Gradient Accumulation steps = 8 - -[INFO|trainer.py:2142] 2024-07-29 19:47:08,063 >> Total optimization steps = 875 - -[INFO|trainer.py:2143] 2024-07-29 19:47:08,064 >> Number of trainable parameters = 8,030,261,248 - -[INFO|callbacks.py:310] 2024-07-29 19:47:21,884 >> {'loss': 12.0862, 'learning_rate': 5.0000e-07, 'epoch': 0.01, 'throughput': 460.83} - -[INFO|callbacks.py:310] 2024-07-29 19:47:35,061 >> {'loss': 12.1914, 'learning_rate': 1.0000e-06, 'epoch': 0.01, 'throughput': 458.74} - -[INFO|callbacks.py:310] 2024-07-29 19:47:48,230 >> {'loss': 11.9748, 'learning_rate': 1.5000e-06, 'epoch': 0.02, 'throughput': 463.69} - -[INFO|callbacks.py:310] 2024-07-29 19:48:01,399 >> {'loss': 11.5009, 'learning_rate': 2.0000e-06, 'epoch': 0.02, 'throughput': 477.90} - -[INFO|callbacks.py:310] 2024-07-29 19:48:14,543 >> {'loss': 8.7827, 'learning_rate': 2.5000e-06, 'epoch': 0.03, 'throughput': 486.91} - -[INFO|callbacks.py:310] 2024-07-29 19:48:27,700 >> {'loss': 6.5054, 'learning_rate': 3.0000e-06, 'epoch': 0.03, 'throughput': 489.84} - -[INFO|callbacks.py:310] 2024-07-29 19:48:40,859 >> {'loss': 5.2127, 'learning_rate': 3.5000e-06, 'epoch': 0.04, 'throughput': 495.55} - -[INFO|callbacks.py:310] 2024-07-29 19:48:54,034 >> {'loss': 2.7490, 'learning_rate': 4.0000e-06, 'epoch': 0.05, 'throughput': 496.90} - -[INFO|callbacks.py:310] 2024-07-29 19:49:07,197 >> {'loss': 1.0344, 'learning_rate': 4.5000e-06, 'epoch': 0.05, 'throughput': 498.68} - -[INFO|callbacks.py:310] 2024-07-29 19:49:20,359 >> {'loss': 0.6484, 'learning_rate': 5.0000e-06, 'epoch': 0.06, 'throughput': 499.62} - -[INFO|callbacks.py:310] 2024-07-29 19:49:33,537 >> {'loss': 1.8744, 'learning_rate': 5.0000e-06, 'epoch': 0.06, 'throughput': 499.01} - -[INFO|callbacks.py:310] 2024-07-29 19:49:46,697 >> {'loss': 0.4507, 'learning_rate': 4.9999e-06, 'epoch': 0.07, 'throughput': 500.88} - -[INFO|callbacks.py:310] 2024-07-29 19:49:59,863 >> {'loss': 1.6065, 'learning_rate': 4.9999e-06, 'epoch': 0.07, 'throughput': 500.68} - -[INFO|callbacks.py:310] 2024-07-29 19:50:13,022 >> {'loss': 1.6470, 'learning_rate': 4.9997e-06, 'epoch': 0.08, 'throughput': 499.23} - -[INFO|callbacks.py:310] 2024-07-29 19:50:26,202 >> {'loss': 0.4799, 'learning_rate': 4.9996e-06, 'epoch': 0.09, 'throughput': 500.26} - -[INFO|callbacks.py:310] 2024-07-29 19:50:39,387 >> {'loss': 0.5683, 'learning_rate': 4.9994e-06, 'epoch': 0.09, 'throughput': 500.24} - -[INFO|callbacks.py:310] 2024-07-29 19:50:52,548 >> {'loss': 1.1372, 'learning_rate': 4.9992e-06, 'epoch': 0.10, 'throughput': 501.06} - -[INFO|callbacks.py:310] 2024-07-29 19:51:05,708 >> {'loss': 0.3574, 'learning_rate': 4.9989e-06, 'epoch': 0.10, 'throughput': 501.32} - -[INFO|callbacks.py:310] 2024-07-29 19:51:18,889 >> {'loss': 0.3492, 'learning_rate': 4.9987e-06, 'epoch': 0.11, 'throughput': 500.88} - -[INFO|callbacks.py:310] 2024-07-29 19:51:32,077 >> {'loss': 1.0766, 'learning_rate': 4.9984e-06, 'epoch': 0.11, 'throughput': 500.77} - -[INFO|callbacks.py:310] 2024-07-29 19:51:45,255 >> {'loss': 0.3635, 'learning_rate': 4.9980e-06, 'epoch': 0.12, 'throughput': 500.11} - -[INFO|callbacks.py:310] 2024-07-29 19:51:58,424 >> {'loss': 1.5452, 'learning_rate': 4.9976e-06, 'epoch': 0.13, 'throughput': 499.58} - -[INFO|callbacks.py:310] 2024-07-29 19:52:11,622 >> {'loss': 0.9203, 'learning_rate': 4.9972e-06, 'epoch': 0.13, 'throughput': 499.73} - -[INFO|callbacks.py:310] 2024-07-29 19:52:24,794 >> {'loss': 0.3603, 'learning_rate': 4.9968e-06, 'epoch': 0.14, 'throughput': 499.10} - -[INFO|callbacks.py:310] 2024-07-29 19:52:37,953 >> {'loss': 0.7598, 'learning_rate': 4.9963e-06, 'epoch': 0.14, 'throughput': 498.84} - -[INFO|callbacks.py:310] 2024-07-29 19:52:51,122 >> {'loss': 0.5039, 'learning_rate': 4.9958e-06, 'epoch': 0.15, 'throughput': 499.32} - -[INFO|callbacks.py:310] 2024-07-29 19:53:04,307 >> {'loss': 0.4744, 'learning_rate': 4.9952e-06, 'epoch': 0.15, 'throughput': 498.81} - -[INFO|callbacks.py:310] 2024-07-29 19:53:17,484 >> {'loss': 0.3387, 'learning_rate': 4.9947e-06, 'epoch': 0.16, 'throughput': 499.12} - -[INFO|callbacks.py:310] 2024-07-29 19:53:30,632 >> {'loss': 0.3363, 'learning_rate': 4.9941e-06, 'epoch': 0.17, 'throughput': 498.57} - -[INFO|callbacks.py:310] 2024-07-29 19:53:43,811 >> {'loss': 0.2842, 'learning_rate': 4.9934e-06, 'epoch': 0.17, 'throughput': 498.62} - -[INFO|callbacks.py:310] 2024-07-29 19:53:57,003 >> {'loss': 0.2779, 'learning_rate': 4.9927e-06, 'epoch': 0.18, 'throughput': 499.09} - -[INFO|callbacks.py:310] 2024-07-29 19:54:10,169 >> {'loss': 0.2420, 'learning_rate': 4.9920e-06, 'epoch': 0.18, 'throughput': 498.53} - -[INFO|callbacks.py:310] 2024-07-29 19:54:23,326 >> {'loss': 0.2279, 'learning_rate': 4.9913e-06, 'epoch': 0.19, 'throughput': 498.72} - -[INFO|callbacks.py:310] 2024-07-29 19:54:36,512 >> {'loss': 0.1862, 'learning_rate': 4.9905e-06, 'epoch': 0.19, 'throughput': 498.79} - -[INFO|callbacks.py:310] 2024-07-29 19:54:49,697 >> {'loss': 0.2974, 'learning_rate': 4.9897e-06, 'epoch': 0.20, 'throughput': 498.82} - -[INFO|callbacks.py:310] 2024-07-29 19:55:02,890 >> {'loss': 0.1820, 'learning_rate': 4.9889e-06, 'epoch': 0.21, 'throughput': 498.95} - -[INFO|callbacks.py:310] 2024-07-29 19:55:16,062 >> {'loss': 0.1516, 'learning_rate': 4.9880e-06, 'epoch': 0.21, 'throughput': 498.82} - -[INFO|callbacks.py:310] 2024-07-29 19:55:29,260 >> {'loss': 0.2325, 'learning_rate': 4.9871e-06, 'epoch': 0.22, 'throughput': 499.38} - -[INFO|callbacks.py:310] 2024-07-29 19:55:42,436 >> {'loss': 0.1389, 'learning_rate': 4.9861e-06, 'epoch': 0.22, 'throughput': 498.94} - -[INFO|callbacks.py:310] 2024-07-29 19:55:55,621 >> {'loss': 0.2076, 'learning_rate': 4.9852e-06, 'epoch': 0.23, 'throughput': 499.21} - -[INFO|callbacks.py:310] 2024-07-29 19:56:08,816 >> {'loss': 0.1476, 'learning_rate': 4.9842e-06, 'epoch': 0.23, 'throughput': 499.87} - -[INFO|callbacks.py:310] 2024-07-29 19:56:21,994 >> {'loss': 0.1590, 'learning_rate': 4.9831e-06, 'epoch': 0.24, 'throughput': 499.62} - -[INFO|callbacks.py:310] 2024-07-29 19:56:35,182 >> {'loss': 0.1761, 'learning_rate': 4.9821e-06, 'epoch': 0.25, 'throughput': 499.54} - -[INFO|callbacks.py:310] 2024-07-29 19:56:48,345 >> {'loss': 0.1581, 'learning_rate': 4.9810e-06, 'epoch': 0.25, 'throughput': 499.57} - -[INFO|callbacks.py:310] 2024-07-29 19:57:01,510 >> {'loss': 0.1454, 'learning_rate': 4.9798e-06, 'epoch': 0.26, 'throughput': 499.67} - -[INFO|callbacks.py:310] 2024-07-29 19:57:14,692 >> {'loss': 0.1823, 'learning_rate': 4.9787e-06, 'epoch': 0.26, 'throughput': 499.84} - -[INFO|callbacks.py:310] 2024-07-29 19:57:27,858 >> {'loss': 0.2675, 'learning_rate': 4.9775e-06, 'epoch': 0.27, 'throughput': 500.12} - -[INFO|callbacks.py:310] 2024-07-29 19:57:41,048 >> {'loss': 0.1482, 'learning_rate': 4.9762e-06, 'epoch': 0.27, 'throughput': 500.03} - -[INFO|callbacks.py:310] 2024-07-29 19:57:54,204 >> {'loss': 0.1258, 'learning_rate': 4.9750e-06, 'epoch': 0.28, 'throughput': 499.91} - -[INFO|callbacks.py:310] 2024-07-29 19:58:07,364 >> {'loss': 0.1921, 'learning_rate': 4.9737e-06, 'epoch': 0.29, 'throughput': 500.36} - -[INFO|callbacks.py:310] 2024-07-29 19:58:20,512 >> {'loss': 0.1491, 'learning_rate': 4.9723e-06, 'epoch': 0.29, 'throughput': 500.38} - -[INFO|callbacks.py:310] 2024-07-29 19:58:33,683 >> {'loss': 0.1186, 'learning_rate': 4.9710e-06, 'epoch': 0.30, 'throughput': 500.27} - -[INFO|callbacks.py:310] 2024-07-29 19:58:46,843 >> {'loss': 0.2011, 'learning_rate': 4.9696e-06, 'epoch': 0.30, 'throughput': 500.55} - -[INFO|callbacks.py:310] 2024-07-29 19:58:59,996 >> {'loss': 0.1301, 'learning_rate': 4.9681e-06, 'epoch': 0.31, 'throughput': 500.66} - -[INFO|callbacks.py:310] 2024-07-29 19:59:13,165 >> {'loss': 0.1563, 'learning_rate': 4.9667e-06, 'epoch': 0.31, 'throughput': 500.06} - -[INFO|callbacks.py:310] 2024-07-29 19:59:26,331 >> {'loss': 0.1667, 'learning_rate': 4.9652e-06, 'epoch': 0.32, 'throughput': 499.87} - -[INFO|callbacks.py:310] 2024-07-29 19:59:39,487 >> {'loss': 0.1340, 'learning_rate': 4.9637e-06, 'epoch': 0.33, 'throughput': 499.79} - -[INFO|callbacks.py:310] 2024-07-29 19:59:52,655 >> {'loss': 0.1753, 'learning_rate': 4.9621e-06, 'epoch': 0.33, 'throughput': 500.01} - -[INFO|callbacks.py:310] 2024-07-29 20:00:05,816 >> {'loss': 0.1381, 'learning_rate': 4.9605e-06, 'epoch': 0.34, 'throughput': 500.11} - -[INFO|callbacks.py:310] 2024-07-29 20:00:18,995 >> {'loss': 0.0927, 'learning_rate': 4.9589e-06, 'epoch': 0.34, 'throughput': 500.54} - -[INFO|callbacks.py:310] 2024-07-29 20:00:32,160 >> {'loss': 0.1608, 'learning_rate': 4.9572e-06, 'epoch': 0.35, 'throughput': 500.48} - -[INFO|callbacks.py:310] 2024-07-29 20:00:45,321 >> {'loss': 0.1219, 'learning_rate': 4.9555e-06, 'epoch': 0.35, 'throughput': 500.25} - -[INFO|callbacks.py:310] 2024-07-29 20:00:58,482 >> {'loss': 0.1674, 'learning_rate': 4.9538e-06, 'epoch': 0.36, 'throughput': 500.03} - -[INFO|callbacks.py:310] 2024-07-29 20:01:11,648 >> {'loss': 0.1413, 'learning_rate': 4.9521e-06, 'epoch': 0.36, 'throughput': 499.70} - -[INFO|callbacks.py:310] 2024-07-29 20:01:24,798 >> {'loss': 0.1515, 'learning_rate': 4.9503e-06, 'epoch': 0.37, 'throughput': 499.78} - -[INFO|callbacks.py:310] 2024-07-29 20:01:37,941 >> {'loss': 0.1904, 'learning_rate': 4.9485e-06, 'epoch': 0.38, 'throughput': 499.60} - -[INFO|callbacks.py:310] 2024-07-29 20:01:51,099 >> {'loss': 0.1283, 'learning_rate': 4.9466e-06, 'epoch': 0.38, 'throughput': 499.32} - -[INFO|callbacks.py:310] 2024-07-29 20:02:04,263 >> {'loss': 0.1686, 'learning_rate': 4.9447e-06, 'epoch': 0.39, 'throughput': 499.03} - -[INFO|callbacks.py:310] 2024-07-29 20:02:17,421 >> {'loss': 0.1451, 'learning_rate': 4.9428e-06, 'epoch': 0.39, 'throughput': 499.17} - -[INFO|callbacks.py:310] 2024-07-29 20:02:30,594 >> {'loss': 0.1635, 'learning_rate': 4.9409e-06, 'epoch': 0.40, 'throughput': 498.98} - -[INFO|callbacks.py:310] 2024-07-29 20:02:43,767 >> {'loss': 0.1182, 'learning_rate': 4.9389e-06, 'epoch': 0.40, 'throughput': 498.89} - -[INFO|callbacks.py:310] 2024-07-29 20:02:56,928 >> {'loss': 0.1328, 'learning_rate': 4.9369e-06, 'epoch': 0.41, 'throughput': 499.01} - -[INFO|callbacks.py:310] 2024-07-29 20:03:10,075 >> {'loss': 0.1312, 'learning_rate': 4.9348e-06, 'epoch': 0.42, 'throughput': 499.57} - -[INFO|callbacks.py:310] 2024-07-29 20:03:23,225 >> {'loss': 0.1439, 'learning_rate': 4.9328e-06, 'epoch': 0.42, 'throughput': 500.00} - -[INFO|callbacks.py:310] 2024-07-29 20:03:36,398 >> {'loss': 0.1534, 'learning_rate': 4.9307e-06, 'epoch': 0.43, 'throughput': 500.24} - -[INFO|callbacks.py:310] 2024-07-29 20:03:49,571 >> {'loss': 0.1159, 'learning_rate': 4.9285e-06, 'epoch': 0.43, 'throughput': 499.87} - -[INFO|callbacks.py:310] 2024-07-29 20:04:02,726 >> {'loss': 0.1291, 'learning_rate': 4.9263e-06, 'epoch': 0.44, 'throughput': 499.84} - -[INFO|callbacks.py:310] 2024-07-29 20:04:15,881 >> {'loss': 0.1450, 'learning_rate': 4.9241e-06, 'epoch': 0.44, 'throughput': 500.06} - -[INFO|callbacks.py:310] 2024-07-29 20:04:29,044 >> {'loss': 0.1155, 'learning_rate': 4.9219e-06, 'epoch': 0.45, 'throughput': 499.95} - -[INFO|callbacks.py:310] 2024-07-29 20:04:42,213 >> {'loss': 0.1153, 'learning_rate': 4.9196e-06, 'epoch': 0.46, 'throughput': 500.30} - -[INFO|callbacks.py:310] 2024-07-29 20:04:55,382 >> {'loss': 0.1673, 'learning_rate': 4.9173e-06, 'epoch': 0.46, 'throughput': 500.05} - -[INFO|callbacks.py:310] 2024-07-29 20:05:08,544 >> {'loss': 0.2122, 'learning_rate': 4.9150e-06, 'epoch': 0.47, 'throughput': 500.28} - -[INFO|callbacks.py:310] 2024-07-29 20:05:21,698 >> {'loss': 0.1092, 'learning_rate': 4.9126e-06, 'epoch': 0.47, 'throughput': 500.01} - -[INFO|callbacks.py:310] 2024-07-29 20:05:34,858 >> {'loss': 0.1480, 'learning_rate': 4.9103e-06, 'epoch': 0.48, 'throughput': 500.34} - -[INFO|callbacks.py:310] 2024-07-29 20:05:48,012 >> {'loss': 0.1237, 'learning_rate': 4.9078e-06, 'epoch': 0.48, 'throughput': 500.21} - -[INFO|callbacks.py:310] 2024-07-29 20:06:01,169 >> {'loss': 0.1060, 'learning_rate': 4.9054e-06, 'epoch': 0.49, 'throughput': 500.12} - -[INFO|callbacks.py:310] 2024-07-29 20:06:14,345 >> {'loss': 0.1131, 'learning_rate': 4.9029e-06, 'epoch': 0.50, 'throughput': 500.04} - -[INFO|callbacks.py:310] 2024-07-29 20:06:27,511 >> {'loss': 0.1343, 'learning_rate': 4.9004e-06, 'epoch': 0.50, 'throughput': 500.17} - -[INFO|callbacks.py:310] 2024-07-29 20:06:40,689 >> {'loss': 0.0873, 'learning_rate': 4.8978e-06, 'epoch': 0.51, 'throughput': 500.47} - -[INFO|callbacks.py:310] 2024-07-29 20:06:53,859 >> {'loss': 0.1412, 'learning_rate': 4.8952e-06, 'epoch': 0.51, 'throughput': 500.42} - -[INFO|callbacks.py:310] 2024-07-29 20:07:07,006 >> {'loss': 0.0983, 'learning_rate': 4.8926e-06, 'epoch': 0.52, 'throughput': 500.30} - -[INFO|callbacks.py:310] 2024-07-29 20:07:20,173 >> {'loss': 0.1322, 'learning_rate': 4.8899e-06, 'epoch': 0.52, 'throughput': 500.27} - -[INFO|callbacks.py:310] 2024-07-29 20:07:33,342 >> {'loss': 0.1345, 'learning_rate': 4.8873e-06, 'epoch': 0.53, 'throughput': 500.63} - -[INFO|callbacks.py:310] 2024-07-29 20:07:46,487 >> {'loss': 0.1367, 'learning_rate': 4.8846e-06, 'epoch': 0.54, 'throughput': 500.72} - -[INFO|callbacks.py:310] 2024-07-29 20:07:59,646 >> {'loss': 0.1228, 'learning_rate': 4.8818e-06, 'epoch': 0.54, 'throughput': 500.49} - -[INFO|callbacks.py:310] 2024-07-29 20:08:12,829 >> {'loss': 0.2031, 'learning_rate': 4.8790e-06, 'epoch': 0.55, 'throughput': 500.56} - -[INFO|callbacks.py:310] 2024-07-29 20:08:25,986 >> {'loss': 0.1235, 'learning_rate': 4.8762e-06, 'epoch': 0.55, 'throughput': 500.49} - -[INFO|callbacks.py:310] 2024-07-29 20:08:39,134 >> {'loss': 0.1136, 'learning_rate': 4.8734e-06, 'epoch': 0.56, 'throughput': 500.48} - -[INFO|callbacks.py:310] 2024-07-29 20:08:52,293 >> {'loss': 0.1065, 'learning_rate': 4.8705e-06, 'epoch': 0.56, 'throughput': 500.67} - -[INFO|callbacks.py:310] 2024-07-29 20:09:05,460 >> {'loss': 0.1489, 'learning_rate': 4.8676e-06, 'epoch': 0.57, 'throughput': 500.95} - -[INFO|callbacks.py:310] 2024-07-29 20:09:18,617 >> {'loss': 0.1057, 'learning_rate': 4.8647e-06, 'epoch': 0.58, 'throughput': 500.84} - -[INFO|callbacks.py:310] 2024-07-29 20:09:31,771 >> {'loss': 0.1382, 'learning_rate': 4.8617e-06, 'epoch': 0.58, 'throughput': 500.86} - -[INFO|callbacks.py:310] 2024-07-29 20:09:44,946 >> {'loss': 0.1190, 'learning_rate': 4.8587e-06, 'epoch': 0.59, 'throughput': 500.91} - -[INFO|callbacks.py:310] 2024-07-29 20:09:58,110 >> {'loss': 0.1058, 'learning_rate': 4.8557e-06, 'epoch': 0.59, 'throughput': 500.69} - -[INFO|callbacks.py:310] 2024-07-29 20:10:11,248 >> {'loss': 0.0622, 'learning_rate': 4.8527e-06, 'epoch': 0.60, 'throughput': 500.65} - -[INFO|callbacks.py:310] 2024-07-29 20:10:24,406 >> {'loss': 0.0800, 'learning_rate': 4.8496e-06, 'epoch': 0.60, 'throughput': 500.62} - -[INFO|callbacks.py:310] 2024-07-29 20:10:37,560 >> {'loss': 0.1147, 'learning_rate': 4.8465e-06, 'epoch': 0.61, 'throughput': 500.74} - -[INFO|callbacks.py:310] 2024-07-29 20:10:50,722 >> {'loss': 0.1084, 'learning_rate': 4.8433e-06, 'epoch': 0.62, 'throughput': 501.11} - -[INFO|callbacks.py:310] 2024-07-29 20:11:03,872 >> {'loss': 0.1195, 'learning_rate': 4.8401e-06, 'epoch': 0.62, 'throughput': 501.10} - -[INFO|callbacks.py:310] 2024-07-29 20:11:17,030 >> {'loss': 0.0877, 'learning_rate': 4.8369e-06, 'epoch': 0.63, 'throughput': 500.96} - -[INFO|callbacks.py:310] 2024-07-29 20:11:30,191 >> {'loss': 0.1405, 'learning_rate': 4.8337e-06, 'epoch': 0.63, 'throughput': 500.87} - -[INFO|callbacks.py:310] 2024-07-29 20:11:43,349 >> {'loss': 0.1014, 'learning_rate': 4.8304e-06, 'epoch': 0.64, 'throughput': 501.06} - -[INFO|callbacks.py:310] 2024-07-29 20:11:56,502 >> {'loss': 0.1094, 'learning_rate': 4.8271e-06, 'epoch': 0.64, 'throughput': 500.91} - -[INFO|callbacks.py:310] 2024-07-29 20:12:09,657 >> {'loss': 0.1301, 'learning_rate': 4.8238e-06, 'epoch': 0.65, 'throughput': 500.72} - -[INFO|callbacks.py:310] 2024-07-29 20:12:22,817 >> {'loss': 0.0939, 'learning_rate': 4.8204e-06, 'epoch': 0.66, 'throughput': 500.93} - -[INFO|callbacks.py:310] 2024-07-29 20:12:35,965 >> {'loss': 0.1220, 'learning_rate': 4.8170e-06, 'epoch': 0.66, 'throughput': 501.05} - -[INFO|callbacks.py:310] 2024-07-29 20:12:49,126 >> {'loss': 0.1296, 'learning_rate': 4.8136e-06, 'epoch': 0.67, 'throughput': 501.02} - -[INFO|callbacks.py:310] 2024-07-29 20:13:02,279 >> {'loss': 0.1080, 'learning_rate': 4.8101e-06, 'epoch': 0.67, 'throughput': 500.90} - -[INFO|callbacks.py:310] 2024-07-29 20:13:15,427 >> {'loss': 0.0779, 'learning_rate': 4.8066e-06, 'epoch': 0.68, 'throughput': 500.84} - -[INFO|callbacks.py:310] 2024-07-29 20:13:28,581 >> {'loss': 0.1358, 'learning_rate': 4.8031e-06, 'epoch': 0.68, 'throughput': 500.72} - -[INFO|callbacks.py:310] 2024-07-29 20:13:41,726 >> {'loss': 0.1485, 'learning_rate': 4.7996e-06, 'epoch': 0.69, 'throughput': 500.93} - -[INFO|callbacks.py:310] 2024-07-29 20:13:54,879 >> {'loss': 0.0900, 'learning_rate': 4.7960e-06, 'epoch': 0.70, 'throughput': 500.93} - -[INFO|callbacks.py:310] 2024-07-29 20:14:08,026 >> {'loss': 0.1080, 'learning_rate': 4.7924e-06, 'epoch': 0.70, 'throughput': 500.94} - -[INFO|callbacks.py:310] 2024-07-29 20:14:21,169 >> {'loss': 0.1557, 'learning_rate': 4.7888e-06, 'epoch': 0.71, 'throughput': 500.99} - -[INFO|callbacks.py:310] 2024-07-29 20:14:34,334 >> {'loss': 0.1206, 'learning_rate': 4.7851e-06, 'epoch': 0.71, 'throughput': 500.69} - -[INFO|callbacks.py:310] 2024-07-29 20:14:47,493 >> {'loss': 0.0977, 'learning_rate': 4.7814e-06, 'epoch': 0.72, 'throughput': 500.57} - -[INFO|callbacks.py:310] 2024-07-29 20:15:00,658 >> {'loss': 0.1223, 'learning_rate': 4.7777e-06, 'epoch': 0.72, 'throughput': 500.83} - -[INFO|callbacks.py:310] 2024-07-29 20:15:13,811 >> {'loss': 0.1751, 'learning_rate': 4.7739e-06, 'epoch': 0.73, 'throughput': 501.04} - -[INFO|callbacks.py:310] 2024-07-29 20:15:26,967 >> {'loss': 0.1016, 'learning_rate': 4.7701e-06, 'epoch': 0.74, 'throughput': 501.10} - -[INFO|callbacks.py:310] 2024-07-29 20:15:40,129 >> {'loss': 0.1351, 'learning_rate': 4.7663e-06, 'epoch': 0.74, 'throughput': 501.30} - -[INFO|callbacks.py:310] 2024-07-29 20:15:53,281 >> {'loss': 0.0891, 'learning_rate': 4.7625e-06, 'epoch': 0.75, 'throughput': 501.34} - -[INFO|callbacks.py:310] 2024-07-29 20:16:06,422 >> {'loss': 0.0700, 'learning_rate': 4.7586e-06, 'epoch': 0.75, 'throughput': 501.37} - -[INFO|callbacks.py:310] 2024-07-29 20:16:19,589 >> {'loss': 0.1325, 'learning_rate': 4.7547e-06, 'epoch': 0.76, 'throughput': 501.42} - -[INFO|callbacks.py:310] 2024-07-29 20:16:32,733 >> {'loss': 0.1074, 'learning_rate': 4.7507e-06, 'epoch': 0.76, 'throughput': 501.63} - -[INFO|callbacks.py:310] 2024-07-29 20:16:45,889 >> {'loss': 0.0949, 'learning_rate': 4.7468e-06, 'epoch': 0.77, 'throughput': 501.49} - -[INFO|callbacks.py:310] 2024-07-29 20:16:59,055 >> {'loss': 0.0813, 'learning_rate': 4.7428e-06, 'epoch': 0.78, 'throughput': 501.64} - -[INFO|callbacks.py:310] 2024-07-29 20:17:12,211 >> {'loss': 0.0920, 'learning_rate': 4.7387e-06, 'epoch': 0.78, 'throughput': 501.72} - -[INFO|callbacks.py:310] 2024-07-29 20:17:25,358 >> {'loss': 0.0679, 'learning_rate': 4.7347e-06, 'epoch': 0.79, 'throughput': 501.77} - -[INFO|callbacks.py:310] 2024-07-29 20:17:38,513 >> {'loss': 0.1234, 'learning_rate': 4.7306e-06, 'epoch': 0.79, 'throughput': 501.69} - -[INFO|callbacks.py:310] 2024-07-29 20:17:51,669 >> {'loss': 0.1281, 'learning_rate': 4.7265e-06, 'epoch': 0.80, 'throughput': 502.03} - -[INFO|callbacks.py:310] 2024-07-29 20:18:04,817 >> {'loss': 0.0834, 'learning_rate': 4.7223e-06, 'epoch': 0.80, 'throughput': 501.99} - -[INFO|callbacks.py:310] 2024-07-29 20:18:17,974 >> {'loss': 0.0863, 'learning_rate': 4.7182e-06, 'epoch': 0.81, 'throughput': 502.00} - -[INFO|callbacks.py:310] 2024-07-29 20:18:31,127 >> {'loss': 0.0503, 'learning_rate': 4.7140e-06, 'epoch': 0.82, 'throughput': 502.08} - -[INFO|callbacks.py:310] 2024-07-29 20:18:44,274 >> {'loss': 0.1619, 'learning_rate': 4.7097e-06, 'epoch': 0.82, 'throughput': 501.99} - -[INFO|callbacks.py:310] 2024-07-29 20:18:57,437 >> {'loss': 0.1112, 'learning_rate': 4.7055e-06, 'epoch': 0.83, 'throughput': 501.90} - -[INFO|callbacks.py:310] 2024-07-29 20:19:10,575 >> {'loss': 0.0939, 'learning_rate': 4.7012e-06, 'epoch': 0.83, 'throughput': 501.95} - -[INFO|callbacks.py:310] 2024-07-29 20:19:23,731 >> {'loss': 0.0549, 'learning_rate': 4.6969e-06, 'epoch': 0.84, 'throughput': 501.90} - -[INFO|callbacks.py:310] 2024-07-29 20:19:36,881 >> {'loss': 0.1128, 'learning_rate': 4.6925e-06, 'epoch': 0.84, 'throughput': 502.11} - -[INFO|callbacks.py:310] 2024-07-29 20:19:50,033 >> {'loss': 0.0862, 'learning_rate': 4.6881e-06, 'epoch': 0.85, 'throughput': 502.18} - -[INFO|callbacks.py:310] 2024-07-29 20:20:03,197 >> {'loss': 0.0769, 'learning_rate': 4.6837e-06, 'epoch': 0.86, 'throughput': 502.02} - -[INFO|callbacks.py:310] 2024-07-29 20:20:16,346 >> {'loss': 0.1199, 'learning_rate': 4.6793e-06, 'epoch': 0.86, 'throughput': 501.97} - -[INFO|callbacks.py:310] 2024-07-29 20:20:29,497 >> {'loss': 0.0930, 'learning_rate': 4.6748e-06, 'epoch': 0.87, 'throughput': 502.07} - -[INFO|callbacks.py:310] 2024-07-29 20:20:42,642 >> {'loss': 0.0903, 'learning_rate': 4.6703e-06, 'epoch': 0.87, 'throughput': 502.12} - -[INFO|callbacks.py:310] 2024-07-29 20:20:55,809 >> {'loss': 0.0877, 'learning_rate': 4.6658e-06, 'epoch': 0.88, 'throughput': 502.19} - -[INFO|callbacks.py:310] 2024-07-29 20:21:08,973 >> {'loss': 0.1049, 'learning_rate': 4.6613e-06, 'epoch': 0.88, 'throughput': 502.05} - -[INFO|callbacks.py:310] 2024-07-29 20:21:22,120 >> {'loss': 0.1052, 'learning_rate': 4.6567e-06, 'epoch': 0.89, 'throughput': 502.09} - -[INFO|callbacks.py:310] 2024-07-29 20:21:35,256 >> {'loss': 0.1196, 'learning_rate': 4.6521e-06, 'epoch': 0.90, 'throughput': 502.10} - -[INFO|callbacks.py:310] 2024-07-29 20:21:48,403 >> {'loss': 0.0427, 'learning_rate': 4.6475e-06, 'epoch': 0.90, 'throughput': 502.08} - -[INFO|callbacks.py:310] 2024-07-29 20:22:01,543 >> {'loss': 0.0764, 'learning_rate': 4.6428e-06, 'epoch': 0.91, 'throughput': 502.25} - -[INFO|callbacks.py:310] 2024-07-29 20:22:14,699 >> {'loss': 0.0812, 'learning_rate': 4.6381e-06, 'epoch': 0.91, 'throughput': 502.20} - -[INFO|callbacks.py:310] 2024-07-29 20:22:27,845 >> {'loss': 0.0807, 'learning_rate': 4.6334e-06, 'epoch': 0.92, 'throughput': 502.42} - -[INFO|callbacks.py:310] 2024-07-29 20:22:41,002 >> {'loss': 0.1475, 'learning_rate': 4.6286e-06, 'epoch': 0.92, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 20:22:54,177 >> {'loss': 0.0507, 'learning_rate': 4.6239e-06, 'epoch': 0.93, 'throughput': 502.42} - -[INFO|callbacks.py:310] 2024-07-29 20:23:07,321 >> {'loss': 0.1015, 'learning_rate': 4.6191e-06, 'epoch': 0.94, 'throughput': 502.37} - -[INFO|callbacks.py:310] 2024-07-29 20:23:20,472 >> {'loss': 0.0957, 'learning_rate': 4.6142e-06, 'epoch': 0.94, 'throughput': 502.38} - -[INFO|callbacks.py:310] 2024-07-29 20:23:33,634 >> {'loss': 0.1327, 'learning_rate': 4.6094e-06, 'epoch': 0.95, 'throughput': 502.47} - -[INFO|callbacks.py:310] 2024-07-29 20:23:46,783 >> {'loss': 0.0782, 'learning_rate': 4.6045e-06, 'epoch': 0.95, 'throughput': 502.50} - -[INFO|callbacks.py:310] 2024-07-29 20:23:59,922 >> {'loss': 0.0768, 'learning_rate': 4.5996e-06, 'epoch': 0.96, 'throughput': 502.35} - -[INFO|callbacks.py:310] 2024-07-29 20:24:13,088 >> {'loss': 0.0330, 'learning_rate': 4.5946e-06, 'epoch': 0.96, 'throughput': 502.22} - -[INFO|callbacks.py:310] 2024-07-29 20:24:26,258 >> {'loss': 0.1295, 'learning_rate': 4.5896e-06, 'epoch': 0.97, 'throughput': 502.18} - -[INFO|callbacks.py:310] 2024-07-29 20:24:39,401 >> {'loss': 0.1216, 'learning_rate': 4.5846e-06, 'epoch': 0.98, 'throughput': 502.22} - -[INFO|callbacks.py:310] 2024-07-29 20:24:52,560 >> {'loss': 0.0896, 'learning_rate': 4.5796e-06, 'epoch': 0.98, 'throughput': 502.14} - -[INFO|callbacks.py:310] 2024-07-29 20:25:05,710 >> {'loss': 0.1010, 'learning_rate': 4.5746e-06, 'epoch': 0.99, 'throughput': 502.30} - -[INFO|callbacks.py:310] 2024-07-29 20:25:18,874 >> {'loss': 0.0787, 'learning_rate': 4.5695e-06, 'epoch': 0.99, 'throughput': 502.25} - -[INFO|callbacks.py:310] 2024-07-29 20:25:32,034 >> {'loss': 0.1118, 'learning_rate': 4.5644e-06, 'epoch': 1.00, 'throughput': 502.26} - -[INFO|callbacks.py:310] 2024-07-29 20:25:45,208 >> {'loss': 0.0671, 'learning_rate': 4.5592e-06, 'epoch': 1.00, 'throughput': 502.22} - -[INFO|callbacks.py:310] 2024-07-29 20:25:58,373 >> {'loss': 0.0862, 'learning_rate': 4.5541e-06, 'epoch': 1.01, 'throughput': 502.29} - -[INFO|callbacks.py:310] 2024-07-29 20:26:11,523 >> {'loss': 0.0385, 'learning_rate': 4.5489e-06, 'epoch': 1.01, 'throughput': 502.15} - -[INFO|callbacks.py:310] 2024-07-29 20:26:24,667 >> {'loss': 0.0557, 'learning_rate': 4.5437e-06, 'epoch': 1.02, 'throughput': 502.21} - -[INFO|callbacks.py:310] 2024-07-29 20:26:37,817 >> {'loss': 0.0692, 'learning_rate': 4.5384e-06, 'epoch': 1.03, 'throughput': 502.15} - -[INFO|callbacks.py:310] 2024-07-29 20:26:50,973 >> {'loss': 0.1001, 'learning_rate': 4.5332e-06, 'epoch': 1.03, 'throughput': 502.09} - -[INFO|callbacks.py:310] 2024-07-29 20:27:04,133 >> {'loss': 0.0782, 'learning_rate': 4.5279e-06, 'epoch': 1.04, 'throughput': 502.08} - -[INFO|callbacks.py:310] 2024-07-29 20:27:17,278 >> {'loss': 0.0324, 'learning_rate': 4.5225e-06, 'epoch': 1.04, 'throughput': 502.13} - -[INFO|callbacks.py:310] 2024-07-29 20:27:30,423 >> {'loss': 0.0671, 'learning_rate': 4.5172e-06, 'epoch': 1.05, 'throughput': 502.02} - -[INFO|callbacks.py:310] 2024-07-29 20:27:43,587 >> {'loss': 0.0510, 'learning_rate': 4.5118e-06, 'epoch': 1.05, 'throughput': 502.29} - -[INFO|callbacks.py:310] 2024-07-29 20:27:56,738 >> {'loss': 0.0565, 'learning_rate': 4.5064e-06, 'epoch': 1.06, 'throughput': 502.39} - -[INFO|callbacks.py:310] 2024-07-29 20:28:09,920 >> {'loss': 0.0393, 'learning_rate': 4.5010e-06, 'epoch': 1.07, 'throughput': 502.46} - -[INFO|callbacks.py:310] 2024-07-29 20:28:23,075 >> {'loss': 0.0922, 'learning_rate': 4.4955e-06, 'epoch': 1.07, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 20:28:36,245 >> {'loss': 0.0518, 'learning_rate': 4.4900e-06, 'epoch': 1.08, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 20:28:49,385 >> {'loss': 0.0894, 'learning_rate': 4.4845e-06, 'epoch': 1.08, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 20:29:02,547 >> {'loss': 0.0631, 'learning_rate': 4.4790e-06, 'epoch': 1.09, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 20:29:15,700 >> {'loss': 0.0464, 'learning_rate': 4.4734e-06, 'epoch': 1.09, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 20:29:28,858 >> {'loss': 0.0652, 'learning_rate': 4.4679e-06, 'epoch': 1.10, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 20:29:42,016 >> {'loss': 0.0317, 'learning_rate': 4.4622e-06, 'epoch': 1.11, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 20:29:55,169 >> {'loss': 0.0485, 'learning_rate': 4.4566e-06, 'epoch': 1.11, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 20:30:08,313 >> {'loss': 0.0325, 'learning_rate': 4.4509e-06, 'epoch': 1.12, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 20:30:21,465 >> {'loss': 0.0753, 'learning_rate': 4.4452e-06, 'epoch': 1.12, 'throughput': 502.76} - -[INFO|callbacks.py:310] 2024-07-29 20:30:34,620 >> {'loss': 0.0159, 'learning_rate': 4.4395e-06, 'epoch': 1.13, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 20:30:47,775 >> {'loss': 0.0718, 'learning_rate': 4.4338e-06, 'epoch': 1.13, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 20:31:00,922 >> {'loss': 0.0491, 'learning_rate': 4.4280e-06, 'epoch': 1.14, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 20:31:14,069 >> {'loss': 0.0608, 'learning_rate': 4.4222e-06, 'epoch': 1.15, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 20:31:27,222 >> {'loss': 0.0631, 'learning_rate': 4.4164e-06, 'epoch': 1.15, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 20:31:40,370 >> {'loss': 0.0240, 'learning_rate': 4.4106e-06, 'epoch': 1.16, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 20:31:53,514 >> {'loss': 0.0453, 'learning_rate': 4.4047e-06, 'epoch': 1.16, 'throughput': 502.76} - -[INFO|callbacks.py:310] 2024-07-29 20:32:06,665 >> {'loss': 0.0345, 'learning_rate': 4.3988e-06, 'epoch': 1.17, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 20:32:19,817 >> {'loss': 0.0150, 'learning_rate': 4.3929e-06, 'epoch': 1.17, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 20:32:32,980 >> {'loss': 0.0731, 'learning_rate': 4.3869e-06, 'epoch': 1.18, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 20:32:46,135 >> {'loss': 0.0560, 'learning_rate': 4.3810e-06, 'epoch': 1.19, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 20:32:59,286 >> {'loss': 0.1013, 'learning_rate': 4.3750e-06, 'epoch': 1.19, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 20:33:12,440 >> {'loss': 0.0254, 'learning_rate': 4.3690e-06, 'epoch': 1.20, 'throughput': 502.76} - -[INFO|callbacks.py:310] 2024-07-29 20:33:25,610 >> {'loss': 0.0526, 'learning_rate': 4.3629e-06, 'epoch': 1.20, 'throughput': 502.76} - -[INFO|callbacks.py:310] 2024-07-29 20:33:38,756 >> {'loss': 0.0370, 'learning_rate': 4.3568e-06, 'epoch': 1.21, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 20:33:51,913 >> {'loss': 0.0438, 'learning_rate': 4.3508e-06, 'epoch': 1.21, 'throughput': 502.75} - -[INFO|callbacks.py:310] 2024-07-29 20:34:05,058 >> {'loss': 0.0496, 'learning_rate': 4.3446e-06, 'epoch': 1.22, 'throughput': 502.78} - -[INFO|callbacks.py:310] 2024-07-29 20:34:18,213 >> {'loss': 0.0453, 'learning_rate': 4.3385e-06, 'epoch': 1.23, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 20:34:31,373 >> {'loss': 0.0428, 'learning_rate': 4.3323e-06, 'epoch': 1.23, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 20:34:44,538 >> {'loss': 0.1130, 'learning_rate': 4.3261e-06, 'epoch': 1.24, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 20:34:57,689 >> {'loss': 0.0934, 'learning_rate': 4.3199e-06, 'epoch': 1.24, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 20:35:10,824 >> {'loss': 0.0495, 'learning_rate': 4.3137e-06, 'epoch': 1.25, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 20:35:23,989 >> {'loss': 0.0962, 'learning_rate': 4.3074e-06, 'epoch': 1.25, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 20:35:37,141 >> {'loss': 0.0546, 'learning_rate': 4.3012e-06, 'epoch': 1.26, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 20:35:50,290 >> {'loss': 0.0321, 'learning_rate': 4.2948e-06, 'epoch': 1.27, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 20:36:03,448 >> {'loss': 0.0420, 'learning_rate': 4.2885e-06, 'epoch': 1.27, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 20:36:16,590 >> {'loss': 0.0554, 'learning_rate': 4.2822e-06, 'epoch': 1.28, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 20:36:29,737 >> {'loss': 0.0263, 'learning_rate': 4.2758e-06, 'epoch': 1.28, 'throughput': 502.92} - -[INFO|callbacks.py:310] 2024-07-29 20:36:42,887 >> {'loss': 0.0433, 'learning_rate': 4.2694e-06, 'epoch': 1.29, 'throughput': 502.99} - -[INFO|callbacks.py:310] 2024-07-29 20:36:56,037 >> {'loss': 0.0569, 'learning_rate': 4.2629e-06, 'epoch': 1.29, 'throughput': 502.98} - -[INFO|callbacks.py:310] 2024-07-29 20:37:09,211 >> {'loss': 0.0532, 'learning_rate': 4.2565e-06, 'epoch': 1.30, 'throughput': 503.22} - -[INFO|callbacks.py:310] 2024-07-29 20:37:22,372 >> {'loss': 0.0155, 'learning_rate': 4.2500e-06, 'epoch': 1.31, 'throughput': 503.14} - -[INFO|callbacks.py:310] 2024-07-29 20:37:35,532 >> {'loss': 0.0277, 'learning_rate': 4.2435e-06, 'epoch': 1.31, 'throughput': 503.09} - -[INFO|callbacks.py:310] 2024-07-29 20:37:48,690 >> {'loss': 0.0740, 'learning_rate': 4.2370e-06, 'epoch': 1.32, 'throughput': 503.17} - -[INFO|callbacks.py:310] 2024-07-29 20:38:01,842 >> {'loss': 0.0861, 'learning_rate': 4.2305e-06, 'epoch': 1.32, 'throughput': 503.09} - -[INFO|callbacks.py:310] 2024-07-29 20:38:15,008 >> {'loss': 0.0669, 'learning_rate': 4.2239e-06, 'epoch': 1.33, 'throughput': 503.13} - -[INFO|callbacks.py:310] 2024-07-29 20:38:28,144 >> {'loss': 0.0703, 'learning_rate': 4.2173e-06, 'epoch': 1.33, 'throughput': 503.12} - -[INFO|callbacks.py:310] 2024-07-29 20:38:41,301 >> {'loss': 0.0172, 'learning_rate': 4.2107e-06, 'epoch': 1.34, 'throughput': 503.16} - -[INFO|callbacks.py:310] 2024-07-29 20:38:54,454 >> {'loss': 0.1336, 'learning_rate': 4.2041e-06, 'epoch': 1.35, 'throughput': 503.23} - -[INFO|callbacks.py:310] 2024-07-29 20:39:07,599 >> {'loss': 0.0207, 'learning_rate': 4.1974e-06, 'epoch': 1.35, 'throughput': 503.17} - -[INFO|callbacks.py:310] 2024-07-29 20:39:20,738 >> {'loss': 0.0815, 'learning_rate': 4.1907e-06, 'epoch': 1.36, 'throughput': 503.23} - -[INFO|callbacks.py:310] 2024-07-29 20:39:33,884 >> {'loss': 0.0453, 'learning_rate': 4.1840e-06, 'epoch': 1.36, 'throughput': 503.15} - -[INFO|callbacks.py:310] 2024-07-29 20:39:47,056 >> {'loss': 0.0791, 'learning_rate': 4.1773e-06, 'epoch': 1.37, 'throughput': 503.12} - -[INFO|callbacks.py:310] 2024-07-29 20:40:00,200 >> {'loss': 0.0344, 'learning_rate': 4.1706e-06, 'epoch': 1.37, 'throughput': 503.10} - -[INFO|callbacks.py:310] 2024-07-29 20:40:13,359 >> {'loss': 0.0532, 'learning_rate': 4.1638e-06, 'epoch': 1.38, 'throughput': 503.17} - -[INFO|callbacks.py:310] 2024-07-29 20:40:26,522 >> {'loss': 0.0574, 'learning_rate': 4.1570e-06, 'epoch': 1.39, 'throughput': 503.20} - -[INFO|callbacks.py:310] 2024-07-29 20:40:39,672 >> {'loss': 0.1002, 'learning_rate': 4.1502e-06, 'epoch': 1.39, 'throughput': 503.19} - -[INFO|callbacks.py:310] 2024-07-29 20:40:52,827 >> {'loss': 0.0314, 'learning_rate': 4.1434e-06, 'epoch': 1.40, 'throughput': 503.23} - -[INFO|callbacks.py:310] 2024-07-29 20:41:05,991 >> {'loss': 0.0689, 'learning_rate': 4.1365e-06, 'epoch': 1.40, 'throughput': 503.18} - -[INFO|callbacks.py:310] 2024-07-29 20:41:19,153 >> {'loss': 0.0587, 'learning_rate': 4.1297e-06, 'epoch': 1.41, 'throughput': 503.08} - -[INFO|callbacks.py:310] 2024-07-29 20:41:32,303 >> {'loss': 0.0433, 'learning_rate': 4.1228e-06, 'epoch': 1.41, 'throughput': 503.09} - -[INFO|callbacks.py:310] 2024-07-29 20:41:45,446 >> {'loss': 0.0306, 'learning_rate': 4.1158e-06, 'epoch': 1.42, 'throughput': 503.06} - -[INFO|callbacks.py:310] 2024-07-29 20:41:58,595 >> {'loss': 0.0454, 'learning_rate': 4.1089e-06, 'epoch': 1.43, 'throughput': 503.05} - -[INFO|callbacks.py:310] 2024-07-29 20:42:11,751 >> {'loss': 0.0464, 'learning_rate': 4.1019e-06, 'epoch': 1.43, 'throughput': 503.16} - -[INFO|callbacks.py:310] 2024-07-29 20:42:24,905 >> {'loss': 0.0578, 'learning_rate': 4.0950e-06, 'epoch': 1.44, 'throughput': 503.24} - -[INFO|callbacks.py:310] 2024-07-29 20:42:38,051 >> {'loss': 0.0369, 'learning_rate': 4.0880e-06, 'epoch': 1.44, 'throughput': 503.14} - -[INFO|callbacks.py:310] 2024-07-29 20:42:51,203 >> {'loss': 0.0686, 'learning_rate': 4.0809e-06, 'epoch': 1.45, 'throughput': 503.14} - -[INFO|callbacks.py:310] 2024-07-29 20:43:04,367 >> {'loss': 0.0873, 'learning_rate': 4.0739e-06, 'epoch': 1.45, 'throughput': 503.20} - -[INFO|callbacks.py:310] 2024-07-29 20:43:17,517 >> {'loss': 0.0466, 'learning_rate': 4.0668e-06, 'epoch': 1.46, 'throughput': 503.27} - -[INFO|callbacks.py:310] 2024-07-29 20:43:30,682 >> {'loss': 0.0349, 'learning_rate': 4.0597e-06, 'epoch': 1.47, 'throughput': 503.26} - -[INFO|callbacks.py:310] 2024-07-29 20:43:43,848 >> {'loss': 0.0610, 'learning_rate': 4.0526e-06, 'epoch': 1.47, 'throughput': 503.31} - -[INFO|callbacks.py:310] 2024-07-29 20:43:57,002 >> {'loss': 0.0801, 'learning_rate': 4.0455e-06, 'epoch': 1.48, 'throughput': 503.38} - -[INFO|callbacks.py:310] 2024-07-29 20:44:10,159 >> {'loss': 0.0406, 'learning_rate': 4.0384e-06, 'epoch': 1.48, 'throughput': 503.22} - -[INFO|callbacks.py:310] 2024-07-29 20:44:23,330 >> {'loss': 0.0417, 'learning_rate': 4.0312e-06, 'epoch': 1.49, 'throughput': 503.30} - -[INFO|callbacks.py:310] 2024-07-29 20:44:36,492 >> {'loss': 0.0178, 'learning_rate': 4.0240e-06, 'epoch': 1.49, 'throughput': 503.25} - -[INFO|callbacks.py:310] 2024-07-29 20:44:49,641 >> {'loss': 0.0238, 'learning_rate': 4.0168e-06, 'epoch': 1.50, 'throughput': 503.25} - -[INFO|callbacks.py:310] 2024-07-29 20:45:02,803 >> {'loss': 0.0842, 'learning_rate': 4.0096e-06, 'epoch': 1.51, 'throughput': 503.30} - -[INFO|callbacks.py:310] 2024-07-29 20:45:15,957 >> {'loss': 0.0354, 'learning_rate': 4.0023e-06, 'epoch': 1.51, 'throughput': 503.34} - -[INFO|callbacks.py:310] 2024-07-29 20:45:29,115 >> {'loss': 0.0720, 'learning_rate': 3.9951e-06, 'epoch': 1.52, 'throughput': 503.41} - -[INFO|callbacks.py:310] 2024-07-29 20:45:42,269 >> {'loss': 0.0297, 'learning_rate': 3.9878e-06, 'epoch': 1.52, 'throughput': 503.39} - -[INFO|callbacks.py:310] 2024-07-29 20:45:55,417 >> {'loss': 0.0355, 'learning_rate': 3.9805e-06, 'epoch': 1.53, 'throughput': 503.44} - -[INFO|callbacks.py:310] 2024-07-29 20:46:08,575 >> {'loss': 0.0669, 'learning_rate': 3.9731e-06, 'epoch': 1.53, 'throughput': 503.43} - -[INFO|callbacks.py:310] 2024-07-29 20:46:21,711 >> {'loss': 0.0622, 'learning_rate': 3.9658e-06, 'epoch': 1.54, 'throughput': 503.42} - -[INFO|callbacks.py:310] 2024-07-29 20:46:34,866 >> {'loss': 0.0436, 'learning_rate': 3.9584e-06, 'epoch': 1.55, 'throughput': 503.42} - -[INFO|callbacks.py:310] 2024-07-29 20:46:48,020 >> {'loss': 0.0265, 'learning_rate': 3.9510e-06, 'epoch': 1.55, 'throughput': 503.40} - -[INFO|callbacks.py:310] 2024-07-29 20:47:01,179 >> {'loss': 0.0165, 'learning_rate': 3.9436e-06, 'epoch': 1.56, 'throughput': 503.37} - -[INFO|callbacks.py:310] 2024-07-29 20:47:14,348 >> {'loss': 0.0246, 'learning_rate': 3.9362e-06, 'epoch': 1.56, 'throughput': 503.39} - -[INFO|callbacks.py:310] 2024-07-29 20:47:27,508 >> {'loss': 0.0929, 'learning_rate': 3.9288e-06, 'epoch': 1.57, 'throughput': 503.41} - -[INFO|callbacks.py:310] 2024-07-29 20:47:40,656 >> {'loss': 0.0591, 'learning_rate': 3.9213e-06, 'epoch': 1.57, 'throughput': 503.34} - -[INFO|callbacks.py:310] 2024-07-29 20:47:53,821 >> {'loss': 0.0479, 'learning_rate': 3.9138e-06, 'epoch': 1.58, 'throughput': 503.33} - -[INFO|callbacks.py:310] 2024-07-29 20:48:06,984 >> {'loss': 0.0414, 'learning_rate': 3.9063e-06, 'epoch': 1.59, 'throughput': 503.30} - -[INFO|callbacks.py:310] 2024-07-29 20:48:20,134 >> {'loss': 0.0591, 'learning_rate': 3.8988e-06, 'epoch': 1.59, 'throughput': 503.28} - -[INFO|callbacks.py:310] 2024-07-29 20:48:33,301 >> {'loss': 0.0797, 'learning_rate': 3.8913e-06, 'epoch': 1.60, 'throughput': 503.26} - -[INFO|callbacks.py:310] 2024-07-29 20:48:46,454 >> {'loss': 0.0338, 'learning_rate': 3.8837e-06, 'epoch': 1.60, 'throughput': 503.24} - -[INFO|callbacks.py:310] 2024-07-29 20:48:59,609 >> {'loss': 0.0764, 'learning_rate': 3.8762e-06, 'epoch': 1.61, 'throughput': 503.26} - -[INFO|callbacks.py:310] 2024-07-29 20:49:12,741 >> {'loss': 0.0835, 'learning_rate': 3.8686e-06, 'epoch': 1.61, 'throughput': 503.34} - -[INFO|callbacks.py:310] 2024-07-29 20:49:25,905 >> {'loss': 0.0644, 'learning_rate': 3.8610e-06, 'epoch': 1.62, 'throughput': 503.36} - -[INFO|callbacks.py:310] 2024-07-29 20:49:39,047 >> {'loss': 0.0265, 'learning_rate': 3.8533e-06, 'epoch': 1.63, 'throughput': 503.34} - -[INFO|callbacks.py:310] 2024-07-29 20:49:52,203 >> {'loss': 0.0934, 'learning_rate': 3.8457e-06, 'epoch': 1.63, 'throughput': 503.44} - -[INFO|callbacks.py:310] 2024-07-29 20:50:05,357 >> {'loss': 0.0436, 'learning_rate': 3.8380e-06, 'epoch': 1.64, 'throughput': 503.37} - -[INFO|callbacks.py:310] 2024-07-29 20:50:18,524 >> {'loss': 0.0461, 'learning_rate': 3.8304e-06, 'epoch': 1.64, 'throughput': 503.35} - -[INFO|callbacks.py:310] 2024-07-29 20:50:31,674 >> {'loss': 0.0521, 'learning_rate': 3.8227e-06, 'epoch': 1.65, 'throughput': 503.37} - -[INFO|callbacks.py:310] 2024-07-29 20:50:44,824 >> {'loss': 0.0282, 'learning_rate': 3.8149e-06, 'epoch': 1.65, 'throughput': 503.27} - -[INFO|callbacks.py:310] 2024-07-29 20:50:57,972 >> {'loss': 0.0408, 'learning_rate': 3.8072e-06, 'epoch': 1.66, 'throughput': 503.30} - -[INFO|callbacks.py:310] 2024-07-29 20:51:11,121 >> {'loss': 0.0435, 'learning_rate': 3.7995e-06, 'epoch': 1.67, 'throughput': 503.27} - -[INFO|callbacks.py:310] 2024-07-29 20:51:24,261 >> {'loss': 0.0459, 'learning_rate': 3.7917e-06, 'epoch': 1.67, 'throughput': 503.30} - -[INFO|callbacks.py:310] 2024-07-29 20:51:37,426 >> {'loss': 0.0460, 'learning_rate': 3.7839e-06, 'epoch': 1.68, 'throughput': 503.29} - -[INFO|callbacks.py:310] 2024-07-29 20:51:50,576 >> {'loss': 0.0338, 'learning_rate': 3.7761e-06, 'epoch': 1.68, 'throughput': 503.35} - -[INFO|callbacks.py:310] 2024-07-29 20:52:03,733 >> {'loss': 0.0304, 'learning_rate': 3.7683e-06, 'epoch': 1.69, 'throughput': 503.29} - -[INFO|callbacks.py:310] 2024-07-29 20:52:16,890 >> {'loss': 0.0652, 'learning_rate': 3.7605e-06, 'epoch': 1.69, 'throughput': 503.33} - -[INFO|callbacks.py:310] 2024-07-29 20:52:30,044 >> {'loss': 0.0233, 'learning_rate': 3.7526e-06, 'epoch': 1.70, 'throughput': 503.31} - -[INFO|callbacks.py:310] 2024-07-29 20:52:43,195 >> {'loss': 0.0447, 'learning_rate': 3.7448e-06, 'epoch': 1.70, 'throughput': 503.29} - -[INFO|callbacks.py:310] 2024-07-29 20:52:56,353 >> {'loss': 0.0599, 'learning_rate': 3.7369e-06, 'epoch': 1.71, 'throughput': 503.30} - -[INFO|callbacks.py:310] 2024-07-29 20:53:09,524 >> {'loss': 0.0607, 'learning_rate': 3.7290e-06, 'epoch': 1.72, 'throughput': 503.34} - -[INFO|callbacks.py:310] 2024-07-29 20:53:22,688 >> {'loss': 0.0449, 'learning_rate': 3.7211e-06, 'epoch': 1.72, 'throughput': 503.35} - -[INFO|callbacks.py:310] 2024-07-29 20:53:35,826 >> {'loss': 0.0294, 'learning_rate': 3.7131e-06, 'epoch': 1.73, 'throughput': 503.24} - -[INFO|callbacks.py:310] 2024-07-29 20:53:48,970 >> {'loss': 0.0580, 'learning_rate': 3.7052e-06, 'epoch': 1.73, 'throughput': 503.21} - -[INFO|callbacks.py:310] 2024-07-29 20:54:02,117 >> {'loss': 0.0774, 'learning_rate': 3.6972e-06, 'epoch': 1.74, 'throughput': 503.23} - -[INFO|callbacks.py:310] 2024-07-29 20:54:15,263 >> {'loss': 0.0718, 'learning_rate': 3.6892e-06, 'epoch': 1.74, 'throughput': 503.23} - -[INFO|callbacks.py:310] 2024-07-29 20:54:28,411 >> {'loss': 0.0642, 'learning_rate': 3.6812e-06, 'epoch': 1.75, 'throughput': 503.21} - -[INFO|callbacks.py:310] 2024-07-29 20:54:41,573 >> {'loss': 0.0992, 'learning_rate': 3.6732e-06, 'epoch': 1.76, 'throughput': 503.19} - -[INFO|callbacks.py:310] 2024-07-29 20:54:54,731 >> {'loss': 0.0325, 'learning_rate': 3.6652e-06, 'epoch': 1.76, 'throughput': 503.14} - -[INFO|callbacks.py:310] 2024-07-29 20:55:07,903 >> {'loss': 0.0403, 'learning_rate': 3.6572e-06, 'epoch': 1.77, 'throughput': 503.07} - -[INFO|callbacks.py:310] 2024-07-29 20:55:21,047 >> {'loss': 0.0619, 'learning_rate': 3.6491e-06, 'epoch': 1.77, 'throughput': 503.02} - -[INFO|callbacks.py:310] 2024-07-29 20:55:34,198 >> {'loss': 0.0609, 'learning_rate': 3.6410e-06, 'epoch': 1.78, 'throughput': 502.99} - -[INFO|callbacks.py:310] 2024-07-29 20:55:47,354 >> {'loss': 0.0523, 'learning_rate': 3.6330e-06, 'epoch': 1.78, 'throughput': 502.93} - -[INFO|callbacks.py:310] 2024-07-29 20:56:00,520 >> {'loss': 0.0481, 'learning_rate': 3.6249e-06, 'epoch': 1.79, 'throughput': 503.03} - -[INFO|callbacks.py:310] 2024-07-29 20:56:13,676 >> {'loss': 0.0353, 'learning_rate': 3.6167e-06, 'epoch': 1.80, 'throughput': 502.96} - -[INFO|callbacks.py:310] 2024-07-29 20:56:26,825 >> {'loss': 0.0408, 'learning_rate': 3.6086e-06, 'epoch': 1.80, 'throughput': 502.92} - -[INFO|callbacks.py:310] 2024-07-29 20:56:39,981 >> {'loss': 0.0368, 'learning_rate': 3.6005e-06, 'epoch': 1.81, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 20:56:53,139 >> {'loss': 0.0614, 'learning_rate': 3.5923e-06, 'epoch': 1.81, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 20:57:06,297 >> {'loss': 0.0328, 'learning_rate': 3.5841e-06, 'epoch': 1.82, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 20:57:19,454 >> {'loss': 0.0658, 'learning_rate': 3.5759e-06, 'epoch': 1.82, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 20:57:32,614 >> {'loss': 0.0491, 'learning_rate': 3.5677e-06, 'epoch': 1.83, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 20:57:45,759 >> {'loss': 0.0411, 'learning_rate': 3.5595e-06, 'epoch': 1.84, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 20:57:58,918 >> {'loss': 0.0281, 'learning_rate': 3.5513e-06, 'epoch': 1.84, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 20:58:12,080 >> {'loss': 0.0319, 'learning_rate': 3.5430e-06, 'epoch': 1.85, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 20:58:25,228 >> {'loss': 0.0919, 'learning_rate': 3.5348e-06, 'epoch': 1.85, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 20:58:38,378 >> {'loss': 0.0690, 'learning_rate': 3.5265e-06, 'epoch': 1.86, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 20:58:51,528 >> {'loss': 0.0615, 'learning_rate': 3.5182e-06, 'epoch': 1.86, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 20:59:04,693 >> {'loss': 0.0460, 'learning_rate': 3.5099e-06, 'epoch': 1.87, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 20:59:17,837 >> {'loss': 0.0317, 'learning_rate': 3.5016e-06, 'epoch': 1.88, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 20:59:30,998 >> {'loss': 0.0614, 'learning_rate': 3.4933e-06, 'epoch': 1.88, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 20:59:44,164 >> {'loss': 0.0651, 'learning_rate': 3.4849e-06, 'epoch': 1.89, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 20:59:57,310 >> {'loss': 0.0283, 'learning_rate': 3.4766e-06, 'epoch': 1.89, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 21:00:10,464 >> {'loss': 0.0406, 'learning_rate': 3.4682e-06, 'epoch': 1.90, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 21:00:23,624 >> {'loss': 0.0290, 'learning_rate': 3.4599e-06, 'epoch': 1.90, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 21:00:36,783 >> {'loss': 0.0202, 'learning_rate': 3.4515e-06, 'epoch': 1.91, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 21:00:49,917 >> {'loss': 0.0246, 'learning_rate': 3.4431e-06, 'epoch': 1.92, 'throughput': 502.59} - -[INFO|callbacks.py:310] 2024-07-29 21:01:03,061 >> {'loss': 0.0755, 'learning_rate': 3.4346e-06, 'epoch': 1.92, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 21:01:16,220 >> {'loss': 0.0162, 'learning_rate': 3.4262e-06, 'epoch': 1.93, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 21:01:29,385 >> {'loss': 0.0127, 'learning_rate': 3.4178e-06, 'epoch': 1.93, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 21:01:42,525 >> {'loss': 0.0225, 'learning_rate': 3.4093e-06, 'epoch': 1.94, 'throughput': 502.40} - -[INFO|callbacks.py:310] 2024-07-29 21:01:55,676 >> {'loss': 0.0556, 'learning_rate': 3.4009e-06, 'epoch': 1.94, 'throughput': 502.39} - -[INFO|callbacks.py:310] 2024-07-29 21:02:08,818 >> {'loss': 0.0282, 'learning_rate': 3.3924e-06, 'epoch': 1.95, 'throughput': 502.34} - -[INFO|callbacks.py:310] 2024-07-29 21:02:21,970 >> {'loss': 0.0222, 'learning_rate': 3.3839e-06, 'epoch': 1.96, 'throughput': 502.45} - -[INFO|callbacks.py:310] 2024-07-29 21:02:35,128 >> {'loss': 0.0387, 'learning_rate': 3.3754e-06, 'epoch': 1.96, 'throughput': 502.44} - -[INFO|callbacks.py:310] 2024-07-29 21:02:48,290 >> {'loss': 0.0589, 'learning_rate': 3.3669e-06, 'epoch': 1.97, 'throughput': 502.45} - -[INFO|callbacks.py:310] 2024-07-29 21:03:01,437 >> {'loss': 0.1223, 'learning_rate': 3.3584e-06, 'epoch': 1.97, 'throughput': 502.41} - -[INFO|callbacks.py:310] 2024-07-29 21:03:14,590 >> {'loss': 0.0546, 'learning_rate': 3.3498e-06, 'epoch': 1.98, 'throughput': 502.48} - -[INFO|callbacks.py:310] 2024-07-29 21:03:27,741 >> {'loss': 0.0686, 'learning_rate': 3.3413e-06, 'epoch': 1.98, 'throughput': 502.46} - -[INFO|callbacks.py:310] 2024-07-29 21:03:40,899 >> {'loss': 0.0316, 'learning_rate': 3.3327e-06, 'epoch': 1.99, 'throughput': 502.42} - -[INFO|callbacks.py:310] 2024-07-29 21:03:54,054 >> {'loss': 0.0360, 'learning_rate': 3.3242e-06, 'epoch': 2.00, 'throughput': 502.40} - -[INFO|callbacks.py:310] 2024-07-29 21:04:07,195 >> {'loss': 0.0681, 'learning_rate': 3.3156e-06, 'epoch': 2.00, 'throughput': 502.46} - -[INFO|callbacks.py:310] 2024-07-29 21:04:20,351 >> {'loss': 0.0133, 'learning_rate': 3.3070e-06, 'epoch': 2.01, 'throughput': 502.45} - -[INFO|callbacks.py:310] 2024-07-29 21:04:33,501 >> {'loss': 0.0086, 'learning_rate': 3.2984e-06, 'epoch': 2.01, 'throughput': 502.52} - -[INFO|callbacks.py:310] 2024-07-29 21:04:46,663 >> {'loss': 0.0066, 'learning_rate': 3.2898e-06, 'epoch': 2.02, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 21:04:59,818 >> {'loss': 0.0137, 'learning_rate': 3.2812e-06, 'epoch': 2.02, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 21:05:12,964 >> {'loss': 0.0104, 'learning_rate': 3.2725e-06, 'epoch': 2.03, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 21:05:26,135 >> {'loss': 0.0290, 'learning_rate': 3.2639e-06, 'epoch': 2.04, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 21:05:39,290 >> {'loss': 0.0628, 'learning_rate': 3.2553e-06, 'epoch': 2.04, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 21:05:52,458 >> {'loss': 0.0492, 'learning_rate': 3.2466e-06, 'epoch': 2.05, 'throughput': 502.47} - -[INFO|callbacks.py:310] 2024-07-29 21:06:05,607 >> {'loss': 0.0217, 'learning_rate': 3.2379e-06, 'epoch': 2.05, 'throughput': 502.48} - -[INFO|callbacks.py:310] 2024-07-29 21:06:18,753 >> {'loss': 0.0100, 'learning_rate': 3.2292e-06, 'epoch': 2.06, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 21:06:31,895 >> {'loss': 0.0193, 'learning_rate': 3.2206e-06, 'epoch': 2.06, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 21:06:45,042 >> {'loss': 0.0160, 'learning_rate': 3.2119e-06, 'epoch': 2.07, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 21:06:58,200 >> {'loss': 0.0025, 'learning_rate': 3.2031e-06, 'epoch': 2.08, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 21:07:11,346 >> {'loss': 0.0020, 'learning_rate': 3.1944e-06, 'epoch': 2.08, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 21:07:24,484 >> {'loss': 0.0113, 'learning_rate': 3.1857e-06, 'epoch': 2.09, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 21:07:37,639 >> {'loss': 0.0047, 'learning_rate': 3.1770e-06, 'epoch': 2.09, 'throughput': 502.59} - -[INFO|callbacks.py:310] 2024-07-29 21:07:50,784 >> {'loss': 0.0186, 'learning_rate': 3.1682e-06, 'epoch': 2.10, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 21:08:03,924 >> {'loss': 0.0033, 'learning_rate': 3.1595e-06, 'epoch': 2.10, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 21:08:17,083 >> {'loss': 0.0344, 'learning_rate': 3.1507e-06, 'epoch': 2.11, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 21:08:30,228 >> {'loss': 0.0362, 'learning_rate': 3.1419e-06, 'epoch': 2.12, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 21:08:43,402 >> {'loss': 0.0009, 'learning_rate': 3.1332e-06, 'epoch': 2.12, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 21:08:56,563 >> {'loss': 0.0317, 'learning_rate': 3.1244e-06, 'epoch': 2.13, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 21:09:09,721 >> {'loss': 0.0158, 'learning_rate': 3.1156e-06, 'epoch': 2.13, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 21:09:22,871 >> {'loss': 0.0154, 'learning_rate': 3.1068e-06, 'epoch': 2.14, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 21:09:36,027 >> {'loss': 0.0178, 'learning_rate': 3.0980e-06, 'epoch': 2.14, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 21:09:49,174 >> {'loss': 0.0686, 'learning_rate': 3.0891e-06, 'epoch': 2.15, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 21:10:02,322 >> {'loss': 0.0289, 'learning_rate': 3.0803e-06, 'epoch': 2.16, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 21:10:15,474 >> {'loss': 0.0122, 'learning_rate': 3.0715e-06, 'epoch': 2.16, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 21:10:28,631 >> {'loss': 0.0258, 'learning_rate': 3.0626e-06, 'epoch': 2.17, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 21:10:41,794 >> {'loss': 0.0033, 'learning_rate': 3.0538e-06, 'epoch': 2.17, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 21:10:54,950 >> {'loss': 0.0100, 'learning_rate': 3.0449e-06, 'epoch': 2.18, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 21:11:08,105 >> {'loss': 0.0185, 'learning_rate': 3.0361e-06, 'epoch': 2.18, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 21:11:21,255 >> {'loss': 0.0171, 'learning_rate': 3.0272e-06, 'epoch': 2.19, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 21:11:34,407 >> {'loss': 0.0049, 'learning_rate': 3.0183e-06, 'epoch': 2.20, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 21:11:47,553 >> {'loss': 0.0044, 'learning_rate': 3.0094e-06, 'epoch': 2.20, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 21:12:00,698 >> {'loss': 0.0218, 'learning_rate': 3.0005e-06, 'epoch': 2.21, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 21:12:13,849 >> {'loss': 0.0260, 'learning_rate': 2.9916e-06, 'epoch': 2.21, 'throughput': 502.59} - -[INFO|callbacks.py:310] 2024-07-29 21:12:27,015 >> {'loss': 0.0037, 'learning_rate': 2.9827e-06, 'epoch': 2.22, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 21:12:40,165 >> {'loss': 0.0031, 'learning_rate': 2.9738e-06, 'epoch': 2.22, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 21:12:53,308 >> {'loss': 0.0188, 'learning_rate': 2.9649e-06, 'epoch': 2.23, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 21:13:06,471 >> {'loss': 0.0302, 'learning_rate': 2.9560e-06, 'epoch': 2.24, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:13:19,625 >> {'loss': 0.0205, 'learning_rate': 2.9470e-06, 'epoch': 2.24, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:13:32,784 >> {'loss': 0.0347, 'learning_rate': 2.9381e-06, 'epoch': 2.25, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 21:13:45,932 >> {'loss': 0.0650, 'learning_rate': 2.9292e-06, 'epoch': 2.25, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 21:13:59,094 >> {'loss': 0.0662, 'learning_rate': 2.9202e-06, 'epoch': 2.26, 'throughput': 502.75} - -[INFO|callbacks.py:310] 2024-07-29 21:14:12,257 >> {'loss': 0.0348, 'learning_rate': 2.9113e-06, 'epoch': 2.26, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 21:14:25,404 >> {'loss': 0.0105, 'learning_rate': 2.9023e-06, 'epoch': 2.27, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 21:14:38,559 >> {'loss': 0.0478, 'learning_rate': 2.8933e-06, 'epoch': 2.28, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 21:14:51,716 >> {'loss': 0.0048, 'learning_rate': 2.8844e-06, 'epoch': 2.28, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 21:15:04,868 >> {'loss': 0.0416, 'learning_rate': 2.8754e-06, 'epoch': 2.29, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 21:15:18,013 >> {'loss': 0.0090, 'learning_rate': 2.8664e-06, 'epoch': 2.29, 'throughput': 502.76} - -[INFO|callbacks.py:310] 2024-07-29 21:15:31,167 >> {'loss': 0.0145, 'learning_rate': 2.8574e-06, 'epoch': 2.30, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:15:44,333 >> {'loss': 0.0239, 'learning_rate': 2.8484e-06, 'epoch': 2.30, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 21:15:57,489 >> {'loss': 0.0151, 'learning_rate': 2.8394e-06, 'epoch': 2.31, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:16:10,643 >> {'loss': 0.0597, 'learning_rate': 2.8304e-06, 'epoch': 2.32, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 21:16:23,782 >> {'loss': 0.0213, 'learning_rate': 2.8214e-06, 'epoch': 2.32, 'throughput': 502.96} - -[INFO|callbacks.py:310] 2024-07-29 21:16:36,930 >> {'loss': 0.0197, 'learning_rate': 2.8124e-06, 'epoch': 2.33, 'throughput': 502.94} - -[INFO|callbacks.py:310] 2024-07-29 21:16:50,066 >> {'loss': 0.0435, 'learning_rate': 2.8034e-06, 'epoch': 2.33, 'throughput': 502.90} - -[INFO|callbacks.py:310] 2024-07-29 21:17:03,229 >> {'loss': 0.0127, 'learning_rate': 2.7944e-06, 'epoch': 2.34, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 21:17:16,382 >> {'loss': 0.0123, 'learning_rate': 2.7854e-06, 'epoch': 2.34, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:17:29,536 >> {'loss': 0.0331, 'learning_rate': 2.7764e-06, 'epoch': 2.35, 'throughput': 502.94} - -[INFO|callbacks.py:310] 2024-07-29 21:17:42,706 >> {'loss': 0.0507, 'learning_rate': 2.7673e-06, 'epoch': 2.35, 'throughput': 502.98} - -[INFO|callbacks.py:310] 2024-07-29 21:17:55,865 >> {'loss': 0.0447, 'learning_rate': 2.7583e-06, 'epoch': 2.36, 'throughput': 502.99} - -[INFO|callbacks.py:310] 2024-07-29 21:18:09,015 >> {'loss': 0.0252, 'learning_rate': 2.7493e-06, 'epoch': 2.37, 'throughput': 502.95} - -[INFO|callbacks.py:310] 2024-07-29 21:18:22,177 >> {'loss': 0.0117, 'learning_rate': 2.7402e-06, 'epoch': 2.37, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 21:18:35,328 >> {'loss': 0.0149, 'learning_rate': 2.7312e-06, 'epoch': 2.38, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 21:18:48,489 >> {'loss': 0.0038, 'learning_rate': 2.7222e-06, 'epoch': 2.38, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 21:19:01,653 >> {'loss': 0.0374, 'learning_rate': 2.7131e-06, 'epoch': 2.39, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 21:19:14,809 >> {'loss': 0.0246, 'learning_rate': 2.7041e-06, 'epoch': 2.39, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:19:27,968 >> {'loss': 0.0363, 'learning_rate': 2.6950e-06, 'epoch': 2.40, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 21:19:41,117 >> {'loss': 0.0197, 'learning_rate': 2.6860e-06, 'epoch': 2.41, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:19:54,258 >> {'loss': 0.0113, 'learning_rate': 2.6769e-06, 'epoch': 2.41, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:20:07,398 >> {'loss': 0.0255, 'learning_rate': 2.6678e-06, 'epoch': 2.42, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 21:20:20,558 >> {'loss': 0.0184, 'learning_rate': 2.6588e-06, 'epoch': 2.42, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:20:33,726 >> {'loss': 0.0397, 'learning_rate': 2.6497e-06, 'epoch': 2.43, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:20:46,873 >> {'loss': 0.0242, 'learning_rate': 2.6407e-06, 'epoch': 2.43, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 21:21:00,039 >> {'loss': 0.0168, 'learning_rate': 2.6316e-06, 'epoch': 2.44, 'throughput': 502.91} - -[INFO|callbacks.py:310] 2024-07-29 21:21:13,190 >> {'loss': 0.0034, 'learning_rate': 2.6225e-06, 'epoch': 2.45, 'throughput': 502.94} - -[INFO|callbacks.py:310] 2024-07-29 21:21:26,345 >> {'loss': 0.0063, 'learning_rate': 2.6135e-06, 'epoch': 2.45, 'throughput': 502.95} - -[INFO|callbacks.py:310] 2024-07-29 21:21:39,486 >> {'loss': 0.0229, 'learning_rate': 2.6044e-06, 'epoch': 2.46, 'throughput': 503.00} - -[INFO|callbacks.py:310] 2024-07-29 21:21:52,653 >> {'loss': 0.0047, 'learning_rate': 2.5953e-06, 'epoch': 2.46, 'throughput': 503.02} - -[INFO|callbacks.py:310] 2024-07-29 21:22:05,807 >> {'loss': 0.0271, 'learning_rate': 2.5862e-06, 'epoch': 2.47, 'throughput': 503.01} - -[INFO|callbacks.py:310] 2024-07-29 21:22:18,965 >> {'loss': 0.0041, 'learning_rate': 2.5772e-06, 'epoch': 2.47, 'throughput': 502.98} - -[INFO|callbacks.py:310] 2024-07-29 21:22:32,118 >> {'loss': 0.0150, 'learning_rate': 2.5681e-06, 'epoch': 2.48, 'throughput': 502.98} - -[INFO|callbacks.py:310] 2024-07-29 21:22:45,270 >> {'loss': 0.0420, 'learning_rate': 2.5590e-06, 'epoch': 2.49, 'throughput': 502.93} - -[INFO|callbacks.py:310] 2024-07-29 21:22:58,439 >> {'loss': 0.0099, 'learning_rate': 2.5499e-06, 'epoch': 2.49, 'throughput': 502.91} - -[INFO|callbacks.py:310] 2024-07-29 21:23:11,587 >> {'loss': 0.0215, 'learning_rate': 2.5409e-06, 'epoch': 2.50, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 21:23:24,746 >> {'loss': 0.0013, 'learning_rate': 2.5318e-06, 'epoch': 2.50, 'throughput': 502.92} - -[INFO|callbacks.py:310] 2024-07-29 21:23:37,899 >> {'loss': 0.0248, 'learning_rate': 2.5227e-06, 'epoch': 2.51, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 21:23:51,045 >> {'loss': 0.0340, 'learning_rate': 2.5136e-06, 'epoch': 2.51, 'throughput': 502.99} - -[INFO|callbacks.py:310] 2024-07-29 21:24:04,190 >> {'loss': 0.0484, 'learning_rate': 2.5045e-06, 'epoch': 2.52, 'throughput': 503.02} - -[INFO|callbacks.py:310] 2024-07-29 21:24:17,341 >> {'loss': 0.0519, 'learning_rate': 2.4955e-06, 'epoch': 2.53, 'throughput': 503.04} - -[INFO|callbacks.py:310] 2024-07-29 21:24:30,488 >> {'loss': 0.0088, 'learning_rate': 2.4864e-06, 'epoch': 2.53, 'throughput': 503.00} - -[INFO|callbacks.py:310] 2024-07-29 21:24:43,652 >> {'loss': 0.0249, 'learning_rate': 2.4773e-06, 'epoch': 2.54, 'throughput': 502.99} - -[INFO|callbacks.py:310] 2024-07-29 21:24:56,796 >> {'loss': 0.0041, 'learning_rate': 2.4682e-06, 'epoch': 2.54, 'throughput': 503.01} - -[INFO|callbacks.py:310] 2024-07-29 21:25:09,954 >> {'loss': 0.0058, 'learning_rate': 2.4591e-06, 'epoch': 2.55, 'throughput': 503.04} - -[INFO|callbacks.py:310] 2024-07-29 21:25:23,114 >> {'loss': 0.0053, 'learning_rate': 2.4501e-06, 'epoch': 2.55, 'throughput': 503.01} - -[INFO|callbacks.py:310] 2024-07-29 21:25:36,266 >> {'loss': 0.0212, 'learning_rate': 2.4410e-06, 'epoch': 2.56, 'throughput': 503.02} - -[INFO|callbacks.py:310] 2024-07-29 21:25:49,426 >> {'loss': 0.0074, 'learning_rate': 2.4319e-06, 'epoch': 2.57, 'throughput': 502.99} - -[INFO|callbacks.py:310] 2024-07-29 21:26:02,579 >> {'loss': 0.0075, 'learning_rate': 2.4228e-06, 'epoch': 2.57, 'throughput': 503.02} - -[INFO|callbacks.py:310] 2024-07-29 21:26:15,723 >> {'loss': 0.0119, 'learning_rate': 2.4138e-06, 'epoch': 2.58, 'throughput': 503.01} - -[INFO|callbacks.py:310] 2024-07-29 21:26:28,867 >> {'loss': 0.0231, 'learning_rate': 2.4047e-06, 'epoch': 2.58, 'throughput': 503.01} - -[INFO|callbacks.py:310] 2024-07-29 21:26:42,018 >> {'loss': 0.0046, 'learning_rate': 2.3956e-06, 'epoch': 2.59, 'throughput': 503.03} - -[INFO|callbacks.py:310] 2024-07-29 21:26:55,159 >> {'loss': 0.0071, 'learning_rate': 2.3865e-06, 'epoch': 2.59, 'throughput': 503.03} - -[INFO|callbacks.py:310] 2024-07-29 21:27:08,308 >> {'loss': 0.0148, 'learning_rate': 2.3775e-06, 'epoch': 2.60, 'throughput': 503.05} - -[INFO|callbacks.py:310] 2024-07-29 21:27:21,457 >> {'loss': 0.0011, 'learning_rate': 2.3684e-06, 'epoch': 2.61, 'throughput': 503.05} - -[INFO|callbacks.py:310] 2024-07-29 21:27:34,611 >> {'loss': 0.0050, 'learning_rate': 2.3593e-06, 'epoch': 2.61, 'throughput': 503.02} - -[INFO|callbacks.py:310] 2024-07-29 21:27:47,765 >> {'loss': 0.0348, 'learning_rate': 2.3503e-06, 'epoch': 2.62, 'throughput': 503.03} - -[INFO|callbacks.py:310] 2024-07-29 21:28:00,920 >> {'loss': 0.0024, 'learning_rate': 2.3412e-06, 'epoch': 2.62, 'throughput': 503.00} - -[INFO|callbacks.py:310] 2024-07-29 21:28:14,070 >> {'loss': 0.0013, 'learning_rate': 2.3322e-06, 'epoch': 2.63, 'throughput': 502.96} - -[INFO|callbacks.py:310] 2024-07-29 21:28:27,227 >> {'loss': 0.0408, 'learning_rate': 2.3231e-06, 'epoch': 2.63, 'throughput': 502.96} - -[INFO|callbacks.py:310] 2024-07-29 21:28:40,388 >> {'loss': 0.0069, 'learning_rate': 2.3140e-06, 'epoch': 2.64, 'throughput': 502.98} - -[INFO|callbacks.py:310] 2024-07-29 21:28:53,546 >> {'loss': 0.0382, 'learning_rate': 2.3050e-06, 'epoch': 2.65, 'throughput': 502.96} - -[INFO|callbacks.py:310] 2024-07-29 21:29:06,705 >> {'loss': 0.0179, 'learning_rate': 2.2959e-06, 'epoch': 2.65, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 21:29:19,861 >> {'loss': 0.0073, 'learning_rate': 2.2869e-06, 'epoch': 2.66, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 21:29:33,032 >> {'loss': 0.0600, 'learning_rate': 2.2778e-06, 'epoch': 2.66, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 21:29:46,171 >> {'loss': 0.0443, 'learning_rate': 2.2688e-06, 'epoch': 2.67, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 21:29:59,328 >> {'loss': 0.0082, 'learning_rate': 2.2598e-06, 'epoch': 2.67, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 21:30:12,480 >> {'loss': 0.0036, 'learning_rate': 2.2507e-06, 'epoch': 2.68, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 21:30:25,614 >> {'loss': 0.0129, 'learning_rate': 2.2417e-06, 'epoch': 2.69, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 21:30:38,772 >> {'loss': 0.0122, 'learning_rate': 2.2327e-06, 'epoch': 2.69, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 21:30:51,924 >> {'loss': 0.0285, 'learning_rate': 2.2236e-06, 'epoch': 2.70, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 21:31:05,083 >> {'loss': 0.0132, 'learning_rate': 2.2146e-06, 'epoch': 2.70, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:31:18,251 >> {'loss': 0.0359, 'learning_rate': 2.2056e-06, 'epoch': 2.71, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:31:31,431 >> {'loss': 0.0039, 'learning_rate': 2.1966e-06, 'epoch': 2.71, 'throughput': 502.80} - -[INFO|callbacks.py:310] 2024-07-29 21:31:44,597 >> {'loss': 0.0151, 'learning_rate': 2.1876e-06, 'epoch': 2.72, 'throughput': 502.80} - -[INFO|callbacks.py:310] 2024-07-29 21:31:57,755 >> {'loss': 0.0406, 'learning_rate': 2.1786e-06, 'epoch': 2.73, 'throughput': 502.76} - -[INFO|callbacks.py:310] 2024-07-29 21:32:10,910 >> {'loss': 0.0055, 'learning_rate': 2.1696e-06, 'epoch': 2.73, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 21:32:24,071 >> {'loss': 0.0414, 'learning_rate': 2.1606e-06, 'epoch': 2.74, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 21:32:37,221 >> {'loss': 0.0199, 'learning_rate': 2.1516e-06, 'epoch': 2.74, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 21:32:50,374 >> {'loss': 0.0141, 'learning_rate': 2.1426e-06, 'epoch': 2.75, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:33:03,527 >> {'loss': 0.0281, 'learning_rate': 2.1336e-06, 'epoch': 2.75, 'throughput': 502.76} - -[INFO|callbacks.py:310] 2024-07-29 21:33:16,697 >> {'loss': 0.0273, 'learning_rate': 2.1246e-06, 'epoch': 2.76, 'throughput': 502.75} - -[INFO|callbacks.py:310] 2024-07-29 21:33:29,835 >> {'loss': 0.0048, 'learning_rate': 2.1156e-06, 'epoch': 2.77, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 21:33:42,989 >> {'loss': 0.0312, 'learning_rate': 2.1067e-06, 'epoch': 2.77, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 21:33:56,153 >> {'loss': 0.0110, 'learning_rate': 2.0977e-06, 'epoch': 2.78, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 21:34:09,303 >> {'loss': 0.0109, 'learning_rate': 2.0887e-06, 'epoch': 2.78, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 21:34:22,464 >> {'loss': 0.0051, 'learning_rate': 2.0798e-06, 'epoch': 2.79, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 21:34:35,616 >> {'loss': 0.0033, 'learning_rate': 2.0708e-06, 'epoch': 2.79, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 21:34:48,760 >> {'loss': 0.0228, 'learning_rate': 2.0619e-06, 'epoch': 2.80, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 21:35:01,913 >> {'loss': 0.0192, 'learning_rate': 2.0530e-06, 'epoch': 2.81, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 21:35:15,068 >> {'loss': 0.0067, 'learning_rate': 2.0440e-06, 'epoch': 2.81, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 21:35:28,243 >> {'loss': 0.0558, 'learning_rate': 2.0351e-06, 'epoch': 2.82, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 21:35:41,380 >> {'loss': 0.0247, 'learning_rate': 2.0262e-06, 'epoch': 2.82, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 21:35:54,540 >> {'loss': 0.0501, 'learning_rate': 2.0173e-06, 'epoch': 2.83, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 21:36:07,671 >> {'loss': 0.0268, 'learning_rate': 2.0084e-06, 'epoch': 2.83, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 21:36:20,837 >> {'loss': 0.0323, 'learning_rate': 1.9995e-06, 'epoch': 2.84, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 21:36:34,003 >> {'loss': 0.0469, 'learning_rate': 1.9906e-06, 'epoch': 2.85, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 21:36:47,146 >> {'loss': 0.0600, 'learning_rate': 1.9817e-06, 'epoch': 2.85, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 21:37:00,312 >> {'loss': 0.0153, 'learning_rate': 1.9728e-06, 'epoch': 2.86, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 21:37:13,469 >> {'loss': 0.0019, 'learning_rate': 1.9639e-06, 'epoch': 2.86, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 21:37:26,618 >> {'loss': 0.0037, 'learning_rate': 1.9551e-06, 'epoch': 2.87, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 21:37:39,781 >> {'loss': 0.0091, 'learning_rate': 1.9462e-06, 'epoch': 2.87, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 21:37:52,924 >> {'loss': 0.0038, 'learning_rate': 1.9374e-06, 'epoch': 2.88, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 21:38:06,077 >> {'loss': 0.0026, 'learning_rate': 1.9285e-06, 'epoch': 2.89, 'throughput': 502.47} - -[INFO|callbacks.py:310] 2024-07-29 21:38:19,240 >> {'loss': 0.0198, 'learning_rate': 1.9197e-06, 'epoch': 2.89, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 21:38:32,392 >> {'loss': 0.0372, 'learning_rate': 1.9109e-06, 'epoch': 2.90, 'throughput': 502.48} - -[INFO|callbacks.py:310] 2024-07-29 21:38:45,545 >> {'loss': 0.0063, 'learning_rate': 1.9020e-06, 'epoch': 2.90, 'throughput': 502.50} - -[INFO|callbacks.py:310] 2024-07-29 21:38:58,687 >> {'loss': 0.0034, 'learning_rate': 1.8932e-06, 'epoch': 2.91, 'throughput': 502.45} - -[INFO|callbacks.py:310] 2024-07-29 21:39:11,844 >> {'loss': 0.0049, 'learning_rate': 1.8844e-06, 'epoch': 2.91, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 21:39:24,984 >> {'loss': 0.0236, 'learning_rate': 1.8756e-06, 'epoch': 2.92, 'throughput': 502.50} - -[INFO|callbacks.py:310] 2024-07-29 21:39:38,133 >> {'loss': 0.0326, 'learning_rate': 1.8668e-06, 'epoch': 2.93, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 21:39:51,287 >> {'loss': 0.0349, 'learning_rate': 1.8581e-06, 'epoch': 2.93, 'throughput': 502.50} - -[INFO|callbacks.py:310] 2024-07-29 21:40:04,446 >> {'loss': 0.0034, 'learning_rate': 1.8493e-06, 'epoch': 2.94, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 21:40:17,596 >> {'loss': 0.0309, 'learning_rate': 1.8405e-06, 'epoch': 2.94, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 21:40:30,746 >> {'loss': 0.0185, 'learning_rate': 1.8318e-06, 'epoch': 2.95, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 21:40:43,901 >> {'loss': 0.0054, 'learning_rate': 1.8230e-06, 'epoch': 2.95, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 21:40:57,040 >> {'loss': 0.0076, 'learning_rate': 1.8143e-06, 'epoch': 2.96, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 21:41:10,203 >> {'loss': 0.0191, 'learning_rate': 1.8056e-06, 'epoch': 2.97, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 21:41:23,365 >> {'loss': 0.0348, 'learning_rate': 1.7969e-06, 'epoch': 2.97, 'throughput': 502.59} - -[INFO|callbacks.py:310] 2024-07-29 21:41:36,513 >> {'loss': 0.0166, 'learning_rate': 1.7881e-06, 'epoch': 2.98, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 21:41:49,654 >> {'loss': 0.0322, 'learning_rate': 1.7794e-06, 'epoch': 2.98, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 21:42:02,807 >> {'loss': 0.0063, 'learning_rate': 1.7708e-06, 'epoch': 2.99, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 21:42:15,959 >> {'loss': 0.0021, 'learning_rate': 1.7621e-06, 'epoch': 2.99, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 21:42:29,104 >> {'loss': 0.0116, 'learning_rate': 1.7534e-06, 'epoch': 3.00, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 21:42:42,259 >> {'loss': 0.0027, 'learning_rate': 1.7447e-06, 'epoch': 3.00, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 21:42:55,427 >> {'loss': 0.0076, 'learning_rate': 1.7361e-06, 'epoch': 3.01, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 21:43:08,570 >> {'loss': 0.0060, 'learning_rate': 1.7275e-06, 'epoch': 3.02, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 21:43:21,730 >> {'loss': 0.0008, 'learning_rate': 1.7188e-06, 'epoch': 3.02, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 21:43:34,884 >> {'loss': 0.0139, 'learning_rate': 1.7102e-06, 'epoch': 3.03, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 21:43:48,053 >> {'loss': 0.0123, 'learning_rate': 1.7016e-06, 'epoch': 3.03, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 21:44:01,202 >> {'loss': 0.0004, 'learning_rate': 1.6930e-06, 'epoch': 3.04, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 21:44:14,363 >> {'loss': 0.0005, 'learning_rate': 1.6844e-06, 'epoch': 3.04, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 21:44:27,534 >> {'loss': 0.0025, 'learning_rate': 1.6758e-06, 'epoch': 3.05, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 21:44:40,697 >> {'loss': 0.0008, 'learning_rate': 1.6673e-06, 'epoch': 3.06, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 21:44:53,848 >> {'loss': 0.0004, 'learning_rate': 1.6587e-06, 'epoch': 3.06, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 21:45:06,992 >> {'loss': 0.0003, 'learning_rate': 1.6502e-06, 'epoch': 3.07, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 21:45:20,158 >> {'loss': 0.0092, 'learning_rate': 1.6416e-06, 'epoch': 3.07, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 21:45:33,308 >> {'loss': 0.0090, 'learning_rate': 1.6331e-06, 'epoch': 3.08, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 21:45:46,458 >> {'loss': 0.0356, 'learning_rate': 1.6246e-06, 'epoch': 3.08, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 21:45:59,616 >> {'loss': 0.0009, 'learning_rate': 1.6161e-06, 'epoch': 3.09, 'throughput': 502.78} - -[INFO|callbacks.py:310] 2024-07-29 21:46:12,773 >> {'loss': 0.0006, 'learning_rate': 1.6076e-06, 'epoch': 3.10, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 21:46:25,906 >> {'loss': 0.0007, 'learning_rate': 1.5991e-06, 'epoch': 3.10, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:46:39,065 >> {'loss': 0.0006, 'learning_rate': 1.5907e-06, 'epoch': 3.11, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 21:46:52,223 >> {'loss': 0.0005, 'learning_rate': 1.5822e-06, 'epoch': 3.11, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 21:47:05,373 >> {'loss': 0.0002, 'learning_rate': 1.5738e-06, 'epoch': 3.12, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 21:47:18,518 >> {'loss': 0.0004, 'learning_rate': 1.5654e-06, 'epoch': 3.12, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:47:31,666 >> {'loss': 0.0002, 'learning_rate': 1.5569e-06, 'epoch': 3.13, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:47:44,830 >> {'loss': 0.0085, 'learning_rate': 1.5485e-06, 'epoch': 3.14, 'throughput': 502.80} - -[INFO|callbacks.py:310] 2024-07-29 21:47:57,983 >> {'loss': 0.0007, 'learning_rate': 1.5401e-06, 'epoch': 3.14, 'throughput': 502.80} - -[INFO|callbacks.py:310] 2024-07-29 21:48:11,136 >> {'loss': 0.0020, 'learning_rate': 1.5318e-06, 'epoch': 3.15, 'throughput': 502.78} - -[INFO|callbacks.py:310] 2024-07-29 21:48:24,300 >> {'loss': 0.0005, 'learning_rate': 1.5234e-06, 'epoch': 3.15, 'throughput': 502.78} - -[INFO|callbacks.py:310] 2024-07-29 21:48:37,450 >> {'loss': 0.0002, 'learning_rate': 1.5151e-06, 'epoch': 3.16, 'throughput': 502.78} - -[INFO|callbacks.py:310] 2024-07-29 21:48:50,613 >> {'loss': 0.0020, 'learning_rate': 1.5067e-06, 'epoch': 3.16, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:49:03,763 >> {'loss': 0.0001, 'learning_rate': 1.4984e-06, 'epoch': 3.17, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:49:16,927 >> {'loss': 0.0002, 'learning_rate': 1.4901e-06, 'epoch': 3.18, 'throughput': 502.80} - -[INFO|callbacks.py:310] 2024-07-29 21:49:30,079 >> {'loss': 0.0003, 'learning_rate': 1.4818e-06, 'epoch': 3.18, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:49:43,227 >> {'loss': 0.0312, 'learning_rate': 1.4735e-06, 'epoch': 3.19, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:49:56,369 >> {'loss': 0.0003, 'learning_rate': 1.4652e-06, 'epoch': 3.19, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 21:50:09,511 >> {'loss': 0.0001, 'learning_rate': 1.4570e-06, 'epoch': 3.20, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 21:50:22,647 >> {'loss': 0.0475, 'learning_rate': 1.4487e-06, 'epoch': 3.20, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:50:35,786 >> {'loss': 0.0016, 'learning_rate': 1.4405e-06, 'epoch': 3.21, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:50:48,947 >> {'loss': 0.0002, 'learning_rate': 1.4323e-06, 'epoch': 3.22, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:51:02,093 >> {'loss': 0.0002, 'learning_rate': 1.4241e-06, 'epoch': 3.22, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:51:15,250 >> {'loss': 0.0140, 'learning_rate': 1.4159e-06, 'epoch': 3.23, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 21:51:28,397 >> {'loss': 0.0430, 'learning_rate': 1.4077e-06, 'epoch': 3.23, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:51:41,550 >> {'loss': 0.0283, 'learning_rate': 1.3995e-06, 'epoch': 3.24, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 21:51:54,701 >> {'loss': 0.0137, 'learning_rate': 1.3914e-06, 'epoch': 3.24, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:52:07,849 >> {'loss': 0.0249, 'learning_rate': 1.3833e-06, 'epoch': 3.25, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:52:21,007 >> {'loss': 0.0014, 'learning_rate': 1.3751e-06, 'epoch': 3.26, 'throughput': 502.78} - -[INFO|callbacks.py:310] 2024-07-29 21:52:34,172 >> {'loss': 0.0034, 'learning_rate': 1.3670e-06, 'epoch': 3.26, 'throughput': 502.78} - -[INFO|callbacks.py:310] 2024-07-29 21:52:47,312 >> {'loss': 0.0073, 'learning_rate': 1.3590e-06, 'epoch': 3.27, 'throughput': 502.76} - -[INFO|callbacks.py:310] 2024-07-29 21:53:00,457 >> {'loss': 0.0147, 'learning_rate': 1.3509e-06, 'epoch': 3.27, 'throughput': 502.75} - -[INFO|callbacks.py:310] 2024-07-29 21:53:13,610 >> {'loss': 0.0019, 'learning_rate': 1.3428e-06, 'epoch': 3.28, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 21:53:26,763 >> {'loss': 0.0025, 'learning_rate': 1.3348e-06, 'epoch': 3.28, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 21:53:39,920 >> {'loss': 0.0018, 'learning_rate': 1.3268e-06, 'epoch': 3.29, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 21:53:53,059 >> {'loss': 0.0023, 'learning_rate': 1.3188e-06, 'epoch': 3.30, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:54:06,220 >> {'loss': 0.0077, 'learning_rate': 1.3108e-06, 'epoch': 3.30, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:54:19,365 >> {'loss': 0.0014, 'learning_rate': 1.3028e-06, 'epoch': 3.31, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 21:54:32,514 >> {'loss': 0.0037, 'learning_rate': 1.2948e-06, 'epoch': 3.31, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:54:45,672 >> {'loss': 0.0160, 'learning_rate': 1.2869e-06, 'epoch': 3.32, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:54:58,828 >> {'loss': 0.0014, 'learning_rate': 1.2789e-06, 'epoch': 3.32, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:55:11,974 >> {'loss': 0.0307, 'learning_rate': 1.2710e-06, 'epoch': 3.33, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 21:55:25,119 >> {'loss': 0.0011, 'learning_rate': 1.2631e-06, 'epoch': 3.34, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 21:55:38,284 >> {'loss': 0.0187, 'learning_rate': 1.2552e-06, 'epoch': 3.34, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 21:55:51,446 >> {'loss': 0.0092, 'learning_rate': 1.2474e-06, 'epoch': 3.35, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 21:56:04,597 >> {'loss': 0.0016, 'learning_rate': 1.2395e-06, 'epoch': 3.35, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 21:56:17,750 >> {'loss': 0.0118, 'learning_rate': 1.2317e-06, 'epoch': 3.36, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:56:30,909 >> {'loss': 0.0227, 'learning_rate': 1.2239e-06, 'epoch': 3.36, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:56:44,055 >> {'loss': 0.0084, 'learning_rate': 1.2161e-06, 'epoch': 3.37, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:56:57,213 >> {'loss': 0.0190, 'learning_rate': 1.2083e-06, 'epoch': 3.38, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 21:57:10,356 >> {'loss': 0.0073, 'learning_rate': 1.2005e-06, 'epoch': 3.38, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 21:57:23,519 >> {'loss': 0.0009, 'learning_rate': 1.1928e-06, 'epoch': 3.39, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 21:57:36,680 >> {'loss': 0.0117, 'learning_rate': 1.1851e-06, 'epoch': 3.39, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 21:57:49,837 >> {'loss': 0.0170, 'learning_rate': 1.1773e-06, 'epoch': 3.40, 'throughput': 502.75} - -[INFO|callbacks.py:310] 2024-07-29 21:58:02,994 >> {'loss': 0.0054, 'learning_rate': 1.1696e-06, 'epoch': 3.40, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 21:58:16,144 >> {'loss': 0.0041, 'learning_rate': 1.1620e-06, 'epoch': 3.41, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 21:58:29,299 >> {'loss': 0.0020, 'learning_rate': 1.1543e-06, 'epoch': 3.42, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 21:58:42,451 >> {'loss': 0.0022, 'learning_rate': 1.1467e-06, 'epoch': 3.42, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 21:58:55,610 >> {'loss': 0.0004, 'learning_rate': 1.1390e-06, 'epoch': 3.43, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 21:59:08,770 >> {'loss': 0.0148, 'learning_rate': 1.1314e-06, 'epoch': 3.43, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 21:59:21,923 >> {'loss': 0.0490, 'learning_rate': 1.1238e-06, 'epoch': 3.44, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 21:59:35,086 >> {'loss': 0.0027, 'learning_rate': 1.1163e-06, 'epoch': 3.44, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 21:59:48,237 >> {'loss': 0.0007, 'learning_rate': 1.1087e-06, 'epoch': 3.45, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 22:00:01,383 >> {'loss': 0.0060, 'learning_rate': 1.1012e-06, 'epoch': 3.46, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 22:00:14,543 >> {'loss': 0.0066, 'learning_rate': 1.0937e-06, 'epoch': 3.46, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:00:27,700 >> {'loss': 0.0255, 'learning_rate': 1.0862e-06, 'epoch': 3.47, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 22:00:40,870 >> {'loss': 0.0075, 'learning_rate': 1.0787e-06, 'epoch': 3.47, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 22:00:54,002 >> {'loss': 0.0068, 'learning_rate': 1.0712e-06, 'epoch': 3.48, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 22:01:07,145 >> {'loss': 0.0008, 'learning_rate': 1.0638e-06, 'epoch': 3.48, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 22:01:20,290 >> {'loss': 0.0132, 'learning_rate': 1.0564e-06, 'epoch': 3.49, 'throughput': 502.75} - -[INFO|callbacks.py:310] 2024-07-29 22:01:33,444 >> {'loss': 0.0009, 'learning_rate': 1.0490e-06, 'epoch': 3.50, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 22:01:46,588 >> {'loss': 0.0009, 'learning_rate': 1.0416e-06, 'epoch': 3.50, 'throughput': 502.80} - -[INFO|callbacks.py:310] 2024-07-29 22:01:59,740 >> {'loss': 0.0006, 'learning_rate': 1.0342e-06, 'epoch': 3.51, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 22:02:12,909 >> {'loss': 0.0010, 'learning_rate': 1.0269e-06, 'epoch': 3.51, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 22:02:26,061 >> {'loss': 0.0219, 'learning_rate': 1.0195e-06, 'epoch': 3.52, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 22:02:39,208 >> {'loss': 0.0006, 'learning_rate': 1.0122e-06, 'epoch': 3.52, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 22:02:52,361 >> {'loss': 0.0360, 'learning_rate': 1.0049e-06, 'epoch': 3.53, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 22:03:05,507 >> {'loss': 0.0034, 'learning_rate': 9.9768e-07, 'epoch': 3.54, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 22:03:18,665 >> {'loss': 0.0039, 'learning_rate': 9.9043e-07, 'epoch': 3.54, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 22:03:31,814 >> {'loss': 0.0011, 'learning_rate': 9.8320e-07, 'epoch': 3.55, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 22:03:44,978 >> {'loss': 0.0017, 'learning_rate': 9.7600e-07, 'epoch': 3.55, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 22:03:58,123 >> {'loss': 0.0057, 'learning_rate': 9.6881e-07, 'epoch': 3.56, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 22:04:11,260 >> {'loss': 0.0199, 'learning_rate': 9.6164e-07, 'epoch': 3.56, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 22:04:24,420 >> {'loss': 0.0294, 'learning_rate': 9.5449e-07, 'epoch': 3.57, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 22:04:37,593 >> {'loss': 0.0124, 'learning_rate': 9.4737e-07, 'epoch': 3.58, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 22:04:50,748 >> {'loss': 0.0058, 'learning_rate': 9.4026e-07, 'epoch': 3.58, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 22:05:03,881 >> {'loss': 0.0113, 'learning_rate': 9.3318e-07, 'epoch': 3.59, 'throughput': 502.94} - -[INFO|callbacks.py:310] 2024-07-29 22:05:17,029 >> {'loss': 0.0058, 'learning_rate': 9.2611e-07, 'epoch': 3.59, 'throughput': 502.96} - -[INFO|callbacks.py:310] 2024-07-29 22:05:30,185 >> {'loss': 0.0019, 'learning_rate': 9.1907e-07, 'epoch': 3.60, 'throughput': 502.94} - -[INFO|callbacks.py:310] 2024-07-29 22:05:43,332 >> {'loss': 0.0443, 'learning_rate': 9.1204e-07, 'epoch': 3.60, 'throughput': 502.96} - -[INFO|callbacks.py:310] 2024-07-29 22:05:56,486 >> {'loss': 0.0246, 'learning_rate': 9.0504e-07, 'epoch': 3.61, 'throughput': 502.92} - -[INFO|callbacks.py:310] 2024-07-29 22:06:09,643 >> {'loss': 0.0039, 'learning_rate': 8.9806e-07, 'epoch': 3.62, 'throughput': 502.90} - -[INFO|callbacks.py:310] 2024-07-29 22:06:22,796 >> {'loss': 0.0302, 'learning_rate': 8.9110e-07, 'epoch': 3.62, 'throughput': 502.90} - -[INFO|callbacks.py:310] 2024-07-29 22:06:35,943 >> {'loss': 0.0039, 'learning_rate': 8.8416e-07, 'epoch': 3.63, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 22:06:49,097 >> {'loss': 0.0096, 'learning_rate': 8.7724e-07, 'epoch': 3.63, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 22:07:02,267 >> {'loss': 0.0314, 'learning_rate': 8.7035e-07, 'epoch': 3.64, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 22:07:15,419 >> {'loss': 0.0301, 'learning_rate': 8.6347e-07, 'epoch': 3.64, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 22:07:28,551 >> {'loss': 0.0012, 'learning_rate': 8.5662e-07, 'epoch': 3.65, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 22:07:41,714 >> {'loss': 0.0010, 'learning_rate': 8.4979e-07, 'epoch': 3.66, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 22:07:54,859 >> {'loss': 0.0053, 'learning_rate': 8.4298e-07, 'epoch': 3.66, 'throughput': 502.82} - -[INFO|callbacks.py:310] 2024-07-29 22:08:08,005 >> {'loss': 0.0107, 'learning_rate': 8.3619e-07, 'epoch': 3.67, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 22:08:21,165 >> {'loss': 0.0016, 'learning_rate': 8.2942e-07, 'epoch': 3.67, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 22:08:34,318 >> {'loss': 0.0109, 'learning_rate': 8.2268e-07, 'epoch': 3.68, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 22:08:47,474 >> {'loss': 0.0107, 'learning_rate': 8.1596e-07, 'epoch': 3.68, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 22:09:00,632 >> {'loss': 0.0013, 'learning_rate': 8.0926e-07, 'epoch': 3.69, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 22:09:13,780 >> {'loss': 0.0097, 'learning_rate': 8.0258e-07, 'epoch': 3.69, 'throughput': 502.91} - -[INFO|callbacks.py:310] 2024-07-29 22:09:26,939 >> {'loss': 0.0082, 'learning_rate': 7.9593e-07, 'epoch': 3.70, 'throughput': 502.94} - -[INFO|callbacks.py:310] 2024-07-29 22:09:40,117 >> {'loss': 0.0209, 'learning_rate': 7.8929e-07, 'epoch': 3.71, 'throughput': 502.91} - -[INFO|callbacks.py:310] 2024-07-29 22:09:53,261 >> {'loss': 0.0014, 'learning_rate': 7.8268e-07, 'epoch': 3.71, 'throughput': 502.94} - -[INFO|callbacks.py:310] 2024-07-29 22:10:06,419 >> {'loss': 0.0190, 'learning_rate': 7.7610e-07, 'epoch': 3.72, 'throughput': 502.93} - -[INFO|callbacks.py:310] 2024-07-29 22:10:19,570 >> {'loss': 0.0187, 'learning_rate': 7.6953e-07, 'epoch': 3.72, 'throughput': 502.90} - -[INFO|callbacks.py:310] 2024-07-29 22:10:32,719 >> {'loss': 0.0118, 'learning_rate': 7.6299e-07, 'epoch': 3.73, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 22:10:45,865 >> {'loss': 0.0027, 'learning_rate': 7.5647e-07, 'epoch': 3.73, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 22:10:59,011 >> {'loss': 0.0088, 'learning_rate': 7.4998e-07, 'epoch': 3.74, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 22:11:12,176 >> {'loss': 0.0252, 'learning_rate': 7.4350e-07, 'epoch': 3.75, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 22:11:25,331 >> {'loss': 0.0030, 'learning_rate': 7.3705e-07, 'epoch': 3.75, 'throughput': 502.87} - -[INFO|callbacks.py:310] 2024-07-29 22:11:38,492 >> {'loss': 0.0149, 'learning_rate': 7.3063e-07, 'epoch': 3.76, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 22:11:51,658 >> {'loss': 0.0009, 'learning_rate': 7.2423e-07, 'epoch': 3.76, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 22:12:04,819 >> {'loss': 0.0023, 'learning_rate': 7.1785e-07, 'epoch': 3.77, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 22:12:17,968 >> {'loss': 0.0062, 'learning_rate': 7.1149e-07, 'epoch': 3.77, 'throughput': 502.81} - -[INFO|callbacks.py:310] 2024-07-29 22:12:31,130 >> {'loss': 0.0077, 'learning_rate': 7.0516e-07, 'epoch': 3.78, 'throughput': 502.84} - -[INFO|callbacks.py:310] 2024-07-29 22:12:44,299 >> {'loss': 0.0091, 'learning_rate': 6.9885e-07, 'epoch': 3.79, 'throughput': 502.85} - -[INFO|callbacks.py:310] 2024-07-29 22:12:57,448 >> {'loss': 0.0027, 'learning_rate': 6.9256e-07, 'epoch': 3.79, 'throughput': 502.86} - -[INFO|callbacks.py:310] 2024-07-29 22:13:10,612 >> {'loss': 0.0048, 'learning_rate': 6.8630e-07, 'epoch': 3.80, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 22:13:23,767 >> {'loss': 0.0084, 'learning_rate': 6.8007e-07, 'epoch': 3.80, 'throughput': 502.89} - -[INFO|callbacks.py:310] 2024-07-29 22:13:36,910 >> {'loss': 0.0037, 'learning_rate': 6.7385e-07, 'epoch': 3.81, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 22:13:50,057 >> {'loss': 0.0013, 'learning_rate': 6.6766e-07, 'epoch': 3.81, 'throughput': 502.88} - -[INFO|callbacks.py:310] 2024-07-29 22:14:03,212 >> {'loss': 0.0099, 'learning_rate': 6.6150e-07, 'epoch': 3.82, 'throughput': 502.83} - -[INFO|callbacks.py:310] 2024-07-29 22:14:16,362 >> {'loss': 0.0004, 'learning_rate': 6.5536e-07, 'epoch': 3.83, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 22:14:29,513 >> {'loss': 0.0196, 'learning_rate': 6.4924e-07, 'epoch': 3.83, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 22:14:42,665 >> {'loss': 0.0013, 'learning_rate': 6.4315e-07, 'epoch': 3.84, 'throughput': 502.75} - -[INFO|callbacks.py:310] 2024-07-29 22:14:55,835 >> {'loss': 0.0008, 'learning_rate': 6.3708e-07, 'epoch': 3.84, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 22:15:08,998 >> {'loss': 0.0177, 'learning_rate': 6.3104e-07, 'epoch': 3.85, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 22:15:22,149 >> {'loss': 0.0148, 'learning_rate': 6.2502e-07, 'epoch': 3.85, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:15:35,308 >> {'loss': 0.0012, 'learning_rate': 6.1903e-07, 'epoch': 3.86, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 22:15:48,467 >> {'loss': 0.0031, 'learning_rate': 6.1306e-07, 'epoch': 3.87, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 22:16:01,615 >> {'loss': 0.0024, 'learning_rate': 6.0712e-07, 'epoch': 3.87, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 22:16:14,754 >> {'loss': 0.0008, 'learning_rate': 6.0120e-07, 'epoch': 3.88, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 22:16:27,924 >> {'loss': 0.0042, 'learning_rate': 5.9530e-07, 'epoch': 3.88, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 22:16:41,081 >> {'loss': 0.0235, 'learning_rate': 5.8944e-07, 'epoch': 3.89, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 22:16:54,235 >> {'loss': 0.0029, 'learning_rate': 5.8359e-07, 'epoch': 3.89, 'throughput': 502.79} - -[INFO|callbacks.py:310] 2024-07-29 22:17:07,391 >> {'loss': 0.0037, 'learning_rate': 5.7777e-07, 'epoch': 3.90, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 22:17:20,543 >> {'loss': 0.0145, 'learning_rate': 5.7198e-07, 'epoch': 3.91, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 22:17:33,694 >> {'loss': 0.0093, 'learning_rate': 5.6621e-07, 'epoch': 3.91, 'throughput': 502.77} - -[INFO|callbacks.py:310] 2024-07-29 22:17:46,855 >> {'loss': 0.0066, 'learning_rate': 5.6047e-07, 'epoch': 3.92, 'throughput': 502.75} - -[INFO|callbacks.py:310] 2024-07-29 22:18:00,015 >> {'loss': 0.0010, 'learning_rate': 5.5476e-07, 'epoch': 3.92, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 22:18:13,171 >> {'loss': 0.0029, 'learning_rate': 5.4907e-07, 'epoch': 3.93, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 22:18:26,322 >> {'loss': 0.0232, 'learning_rate': 5.4340e-07, 'epoch': 3.93, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:18:39,480 >> {'loss': 0.0004, 'learning_rate': 5.3776e-07, 'epoch': 3.94, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:18:52,638 >> {'loss': 0.0003, 'learning_rate': 5.3215e-07, 'epoch': 3.95, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 22:19:05,793 >> {'loss': 0.0011, 'learning_rate': 5.2656e-07, 'epoch': 3.95, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 22:19:18,954 >> {'loss': 0.0009, 'learning_rate': 5.2100e-07, 'epoch': 3.96, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 22:19:32,095 >> {'loss': 0.0009, 'learning_rate': 5.1547e-07, 'epoch': 3.96, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:19:45,250 >> {'loss': 0.0039, 'learning_rate': 5.0996e-07, 'epoch': 3.97, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 22:19:58,406 >> {'loss': 0.0109, 'learning_rate': 5.0447e-07, 'epoch': 3.97, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 22:20:11,579 >> {'loss': 0.0455, 'learning_rate': 4.9902e-07, 'epoch': 3.98, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 22:20:24,723 >> {'loss': 0.0003, 'learning_rate': 4.9359e-07, 'epoch': 3.99, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 22:20:37,864 >> {'loss': 0.0004, 'learning_rate': 4.8818e-07, 'epoch': 3.99, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:20:51,025 >> {'loss': 0.0003, 'learning_rate': 4.8281e-07, 'epoch': 4.00, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 22:21:04,177 >> {'loss': 0.0006, 'learning_rate': 4.7746e-07, 'epoch': 4.00, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 22:21:17,341 >> {'loss': 0.0041, 'learning_rate': 4.7213e-07, 'epoch': 4.01, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:21:30,503 >> {'loss': 0.0017, 'learning_rate': 4.6684e-07, 'epoch': 4.01, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:21:43,639 >> {'loss': 0.0003, 'learning_rate': 4.6157e-07, 'epoch': 4.02, 'throughput': 502.59} - -[INFO|callbacks.py:310] 2024-07-29 22:21:56,770 >> {'loss': 0.0101, 'learning_rate': 4.5632e-07, 'epoch': 4.03, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:22:09,931 >> {'loss': 0.0206, 'learning_rate': 4.5111e-07, 'epoch': 4.03, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 22:22:23,089 >> {'loss': 0.0058, 'learning_rate': 4.4592e-07, 'epoch': 4.04, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:22:36,237 >> {'loss': 0.0007, 'learning_rate': 4.4076e-07, 'epoch': 4.04, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:22:49,388 >> {'loss': 0.0035, 'learning_rate': 4.3562e-07, 'epoch': 4.05, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 22:23:02,532 >> {'loss': 0.0074, 'learning_rate': 4.3051e-07, 'epoch': 4.05, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 22:23:15,687 >> {'loss': 0.0002, 'learning_rate': 4.2543e-07, 'epoch': 4.06, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 22:23:28,835 >> {'loss': 0.0010, 'learning_rate': 4.2038e-07, 'epoch': 4.07, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 22:23:41,992 >> {'loss': 0.0004, 'learning_rate': 4.1536e-07, 'epoch': 4.07, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 22:23:55,130 >> {'loss': 0.0044, 'learning_rate': 4.1036e-07, 'epoch': 4.08, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:24:08,299 >> {'loss': 0.0065, 'learning_rate': 4.0539e-07, 'epoch': 4.08, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 22:24:21,449 >> {'loss': 0.0004, 'learning_rate': 4.0044e-07, 'epoch': 4.09, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 22:24:34,612 >> {'loss': 0.0044, 'learning_rate': 3.9553e-07, 'epoch': 4.09, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 22:24:47,758 >> {'loss': 0.0111, 'learning_rate': 3.9064e-07, 'epoch': 4.10, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 22:25:00,899 >> {'loss': 0.0123, 'learning_rate': 3.8578e-07, 'epoch': 4.11, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:25:14,037 >> {'loss': 0.0011, 'learning_rate': 3.8095e-07, 'epoch': 4.11, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:25:27,209 >> {'loss': 0.0002, 'learning_rate': 3.7615e-07, 'epoch': 4.12, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 22:25:40,356 >> {'loss': 0.0020, 'learning_rate': 3.7137e-07, 'epoch': 4.12, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 22:25:53,499 >> {'loss': 0.0040, 'learning_rate': 3.6662e-07, 'epoch': 4.13, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:26:06,672 >> {'loss': 0.0010, 'learning_rate': 3.6190e-07, 'epoch': 4.13, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:26:19,819 >> {'loss': 0.0013, 'learning_rate': 3.5721e-07, 'epoch': 4.14, 'throughput': 502.51} - -[INFO|callbacks.py:310] 2024-07-29 22:26:32,963 >> {'loss': 0.0002, 'learning_rate': 3.5255e-07, 'epoch': 4.15, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 22:26:46,112 >> {'loss': 0.0006, 'learning_rate': 3.4791e-07, 'epoch': 4.15, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 22:26:59,258 >> {'loss': 0.0011, 'learning_rate': 3.4331e-07, 'epoch': 4.16, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 22:27:12,413 >> {'loss': 0.0046, 'learning_rate': 3.3873e-07, 'epoch': 4.16, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:27:25,573 >> {'loss': 0.0026, 'learning_rate': 3.3418e-07, 'epoch': 4.17, 'throughput': 502.59} - -[INFO|callbacks.py:310] 2024-07-29 22:27:38,731 >> {'loss': 0.0045, 'learning_rate': 3.2966e-07, 'epoch': 4.17, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 22:27:51,891 >> {'loss': 0.0154, 'learning_rate': 3.2517e-07, 'epoch': 4.18, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 22:28:05,032 >> {'loss': 0.0009, 'learning_rate': 3.2070e-07, 'epoch': 4.19, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:28:18,170 >> {'loss': 0.0002, 'learning_rate': 3.1627e-07, 'epoch': 4.19, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 22:28:31,337 >> {'loss': 0.0005, 'learning_rate': 3.1186e-07, 'epoch': 4.20, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 22:28:44,491 >> {'loss': 0.0021, 'learning_rate': 3.0749e-07, 'epoch': 4.20, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:28:57,633 >> {'loss': 0.0006, 'learning_rate': 3.0314e-07, 'epoch': 4.21, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:29:10,783 >> {'loss': 0.0359, 'learning_rate': 2.9882e-07, 'epoch': 4.21, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:29:23,947 >> {'loss': 0.0002, 'learning_rate': 2.9453e-07, 'epoch': 4.22, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:29:37,105 >> {'loss': 0.0093, 'learning_rate': 2.9027e-07, 'epoch': 4.23, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:29:50,263 >> {'loss': 0.0011, 'learning_rate': 2.8604e-07, 'epoch': 4.23, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 22:30:03,403 >> {'loss': 0.0014, 'learning_rate': 2.8183e-07, 'epoch': 4.24, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:30:16,570 >> {'loss': 0.0041, 'learning_rate': 2.7766e-07, 'epoch': 4.24, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:30:29,715 >> {'loss': 0.0077, 'learning_rate': 2.7352e-07, 'epoch': 4.25, 'throughput': 502.59} - -[INFO|callbacks.py:310] 2024-07-29 22:30:42,879 >> {'loss': 0.0005, 'learning_rate': 2.6940e-07, 'epoch': 4.25, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 22:30:56,035 >> {'loss': 0.0015, 'learning_rate': 2.6532e-07, 'epoch': 4.26, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 22:31:09,186 >> {'loss': 0.0072, 'learning_rate': 2.6126e-07, 'epoch': 4.27, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 22:31:22,341 >> {'loss': 0.0010, 'learning_rate': 2.5723e-07, 'epoch': 4.27, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 22:31:35,487 >> {'loss': 0.0043, 'learning_rate': 2.5324e-07, 'epoch': 4.28, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:31:48,639 >> {'loss': 0.0030, 'learning_rate': 2.4927e-07, 'epoch': 4.28, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:32:01,781 >> {'loss': 0.0047, 'learning_rate': 2.4533e-07, 'epoch': 4.29, 'throughput': 502.51} - -[INFO|callbacks.py:310] 2024-07-29 22:32:14,921 >> {'loss': 0.0004, 'learning_rate': 2.4142e-07, 'epoch': 4.29, 'throughput': 502.52} - -[INFO|callbacks.py:310] 2024-07-29 22:32:28,095 >> {'loss': 0.0004, 'learning_rate': 2.3755e-07, 'epoch': 4.30, 'throughput': 502.50} - -[INFO|callbacks.py:310] 2024-07-29 22:32:41,263 >> {'loss': 0.0036, 'learning_rate': 2.3370e-07, 'epoch': 4.31, 'throughput': 502.52} - -[INFO|callbacks.py:310] 2024-07-29 22:32:54,415 >> {'loss': 0.0004, 'learning_rate': 2.2988e-07, 'epoch': 4.31, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 22:33:07,559 >> {'loss': 0.0003, 'learning_rate': 2.2609e-07, 'epoch': 4.32, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 22:33:20,716 >> {'loss': 0.0089, 'learning_rate': 2.2233e-07, 'epoch': 4.32, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 22:33:33,851 >> {'loss': 0.0002, 'learning_rate': 2.1861e-07, 'epoch': 4.33, 'throughput': 502.50} - -[INFO|callbacks.py:310] 2024-07-29 22:33:46,995 >> {'loss': 0.0002, 'learning_rate': 2.1491e-07, 'epoch': 4.33, 'throughput': 502.52} - -[INFO|callbacks.py:310] 2024-07-29 22:34:00,158 >> {'loss': 0.0033, 'learning_rate': 2.1124e-07, 'epoch': 4.34, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 22:34:13,311 >> {'loss': 0.0109, 'learning_rate': 2.0760e-07, 'epoch': 4.34, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 22:34:26,470 >> {'loss': 0.0031, 'learning_rate': 2.0399e-07, 'epoch': 4.35, 'throughput': 502.46} - -[INFO|callbacks.py:310] 2024-07-29 22:34:39,637 >> {'loss': 0.0005, 'learning_rate': 2.0042e-07, 'epoch': 4.36, 'throughput': 502.43} - -[INFO|callbacks.py:310] 2024-07-29 22:34:52,805 >> {'loss': 0.0006, 'learning_rate': 1.9687e-07, 'epoch': 4.36, 'throughput': 502.45} - -[INFO|callbacks.py:310] 2024-07-29 22:35:05,963 >> {'loss': 0.0035, 'learning_rate': 1.9335e-07, 'epoch': 4.37, 'throughput': 502.43} - -[INFO|callbacks.py:310] 2024-07-29 22:35:19,110 >> {'loss': 0.0143, 'learning_rate': 1.8987e-07, 'epoch': 4.37, 'throughput': 502.44} - -[INFO|callbacks.py:310] 2024-07-29 22:35:32,251 >> {'loss': 0.0037, 'learning_rate': 1.8641e-07, 'epoch': 4.38, 'throughput': 502.44} - -[INFO|callbacks.py:310] 2024-07-29 22:35:45,428 >> {'loss': 0.0082, 'learning_rate': 1.8299e-07, 'epoch': 4.38, 'throughput': 502.43} - -[INFO|callbacks.py:310] 2024-07-29 22:35:58,582 >> {'loss': 0.0003, 'learning_rate': 1.7959e-07, 'epoch': 4.39, 'throughput': 502.42} - -[INFO|callbacks.py:310] 2024-07-29 22:36:11,730 >> {'loss': 0.0156, 'learning_rate': 1.7623e-07, 'epoch': 4.40, 'throughput': 502.40} - -[INFO|callbacks.py:310] 2024-07-29 22:36:24,881 >> {'loss': 0.0003, 'learning_rate': 1.7289e-07, 'epoch': 4.40, 'throughput': 502.40} - -[INFO|callbacks.py:310] 2024-07-29 22:36:38,041 >> {'loss': 0.0028, 'learning_rate': 1.6959e-07, 'epoch': 4.41, 'throughput': 502.41} - -[INFO|callbacks.py:310] 2024-07-29 22:36:51,184 >> {'loss': 0.0002, 'learning_rate': 1.6632e-07, 'epoch': 4.41, 'throughput': 502.42} - -[INFO|callbacks.py:310] 2024-07-29 22:37:04,324 >> {'loss': 0.0109, 'learning_rate': 1.6308e-07, 'epoch': 4.42, 'throughput': 502.40} - -[INFO|callbacks.py:310] 2024-07-29 22:37:17,498 >> {'loss': 0.0214, 'learning_rate': 1.5987e-07, 'epoch': 4.42, 'throughput': 502.38} - -[INFO|callbacks.py:310] 2024-07-29 22:37:30,657 >> {'loss': 0.0004, 'learning_rate': 1.5669e-07, 'epoch': 4.43, 'throughput': 502.36} - -[INFO|callbacks.py:310] 2024-07-29 22:37:43,785 >> {'loss': 0.0034, 'learning_rate': 1.5354e-07, 'epoch': 4.44, 'throughput': 502.36} - -[INFO|callbacks.py:310] 2024-07-29 22:37:56,949 >> {'loss': 0.0014, 'learning_rate': 1.5042e-07, 'epoch': 4.44, 'throughput': 502.37} - -[INFO|callbacks.py:310] 2024-07-29 22:38:10,094 >> {'loss': 0.0012, 'learning_rate': 1.4734e-07, 'epoch': 4.45, 'throughput': 502.38} - -[INFO|callbacks.py:310] 2024-07-29 22:38:23,248 >> {'loss': 0.0009, 'learning_rate': 1.4428e-07, 'epoch': 4.45, 'throughput': 502.38} - -[INFO|callbacks.py:310] 2024-07-29 22:38:36,385 >> {'loss': 0.0038, 'learning_rate': 1.4126e-07, 'epoch': 4.46, 'throughput': 502.41} - -[INFO|callbacks.py:310] 2024-07-29 22:38:49,545 >> {'loss': 0.0003, 'learning_rate': 1.3826e-07, 'epoch': 4.46, 'throughput': 502.39} - -[INFO|callbacks.py:310] 2024-07-29 22:39:02,706 >> {'loss': 0.0009, 'learning_rate': 1.3530e-07, 'epoch': 4.47, 'throughput': 502.40} - -[INFO|callbacks.py:310] 2024-07-29 22:39:15,854 >> {'loss': 0.0120, 'learning_rate': 1.3237e-07, 'epoch': 4.48, 'throughput': 502.45} - -[INFO|callbacks.py:310] 2024-07-29 22:39:29,005 >> {'loss': 0.0003, 'learning_rate': 1.2947e-07, 'epoch': 4.48, 'throughput': 502.47} - -[INFO|callbacks.py:310] 2024-07-29 22:39:42,157 >> {'loss': 0.0001, 'learning_rate': 1.2660e-07, 'epoch': 4.49, 'throughput': 502.46} - -[INFO|callbacks.py:310] 2024-07-29 22:39:55,294 >> {'loss': 0.0066, 'learning_rate': 1.2377e-07, 'epoch': 4.49, 'throughput': 502.44} - -[INFO|callbacks.py:310] 2024-07-29 22:40:08,447 >> {'loss': 0.0001, 'learning_rate': 1.2096e-07, 'epoch': 4.50, 'throughput': 502.48} - -[INFO|callbacks.py:310] 2024-07-29 22:40:21,613 >> {'loss': 0.0008, 'learning_rate': 1.1819e-07, 'epoch': 4.50, 'throughput': 502.49} - -[INFO|callbacks.py:310] 2024-07-29 22:40:34,776 >> {'loss': 0.0001, 'learning_rate': 1.1544e-07, 'epoch': 4.51, 'throughput': 502.52} - -[INFO|callbacks.py:310] 2024-07-29 22:40:47,918 >> {'loss': 0.0001, 'learning_rate': 1.1273e-07, 'epoch': 4.52, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:41:01,054 >> {'loss': 0.0007, 'learning_rate': 1.1005e-07, 'epoch': 4.52, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:41:14,212 >> {'loss': 0.0003, 'learning_rate': 1.0740e-07, 'epoch': 4.53, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:41:27,362 >> {'loss': 0.0034, 'learning_rate': 1.0479e-07, 'epoch': 4.53, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 22:41:40,510 >> {'loss': 0.0004, 'learning_rate': 1.0220e-07, 'epoch': 4.54, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:41:53,651 >> {'loss': 0.0003, 'learning_rate': 9.9646e-08, 'epoch': 4.54, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 22:42:06,818 >> {'loss': 0.0002, 'learning_rate': 9.7124e-08, 'epoch': 4.55, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 22:42:19,965 >> {'loss': 0.0097, 'learning_rate': 9.4634e-08, 'epoch': 4.56, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 22:42:33,121 >> {'loss': 0.0002, 'learning_rate': 9.2175e-08, 'epoch': 4.56, 'throughput': 502.69} - -[INFO|callbacks.py:310] 2024-07-29 22:42:46,266 >> {'loss': 0.0029, 'learning_rate': 8.9748e-08, 'epoch': 4.57, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:42:59,415 >> {'loss': 0.0059, 'learning_rate': 8.7353e-08, 'epoch': 4.57, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 22:43:12,576 >> {'loss': 0.0001, 'learning_rate': 8.4990e-08, 'epoch': 4.58, 'throughput': 502.73} - -[INFO|callbacks.py:310] 2024-07-29 22:43:25,732 >> {'loss': 0.0004, 'learning_rate': 8.2658e-08, 'epoch': 4.58, 'throughput': 502.74} - -[INFO|callbacks.py:310] 2024-07-29 22:43:38,905 >> {'loss': 0.0002, 'learning_rate': 8.0359e-08, 'epoch': 4.59, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:43:52,049 >> {'loss': 0.0040, 'learning_rate': 7.8091e-08, 'epoch': 4.60, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:44:05,204 >> {'loss': 0.0013, 'learning_rate': 7.5855e-08, 'epoch': 4.60, 'throughput': 502.71} - -[INFO|callbacks.py:310] 2024-07-29 22:44:18,354 >> {'loss': 0.0011, 'learning_rate': 7.3652e-08, 'epoch': 4.61, 'throughput': 502.72} - -[INFO|callbacks.py:310] 2024-07-29 22:44:31,511 >> {'loss': 0.0017, 'learning_rate': 7.1480e-08, 'epoch': 4.61, 'throughput': 502.70} - -[INFO|callbacks.py:310] 2024-07-29 22:44:44,674 >> {'loss': 0.0049, 'learning_rate': 6.9340e-08, 'epoch': 4.62, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 22:44:57,833 >> {'loss': 0.0047, 'learning_rate': 6.7233e-08, 'epoch': 4.62, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 22:45:10,980 >> {'loss': 0.0001, 'learning_rate': 6.5157e-08, 'epoch': 4.63, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 22:45:24,138 >> {'loss': 0.0002, 'learning_rate': 6.3114e-08, 'epoch': 4.64, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:45:37,292 >> {'loss': 0.0003, 'learning_rate': 6.1103e-08, 'epoch': 4.64, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 22:45:50,434 >> {'loss': 0.0098, 'learning_rate': 5.9124e-08, 'epoch': 4.65, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:46:03,594 >> {'loss': 0.0014, 'learning_rate': 5.7177e-08, 'epoch': 4.65, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:46:16,750 >> {'loss': 0.0006, 'learning_rate': 5.5262e-08, 'epoch': 4.66, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 22:46:29,899 >> {'loss': 0.0099, 'learning_rate': 5.3380e-08, 'epoch': 4.66, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:46:43,058 >> {'loss': 0.0019, 'learning_rate': 5.1530e-08, 'epoch': 4.67, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:46:56,225 >> {'loss': 0.0009, 'learning_rate': 4.9712e-08, 'epoch': 4.68, 'throughput': 502.59} - -[INFO|callbacks.py:310] 2024-07-29 22:47:09,368 >> {'loss': 0.0189, 'learning_rate': 4.7926e-08, 'epoch': 4.68, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:47:22,514 >> {'loss': 0.0001, 'learning_rate': 4.6173e-08, 'epoch': 4.69, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 22:47:35,677 >> {'loss': 0.0075, 'learning_rate': 4.4452e-08, 'epoch': 4.69, 'throughput': 502.58} - -[INFO|callbacks.py:310] 2024-07-29 22:47:48,835 >> {'loss': 0.0004, 'learning_rate': 4.2764e-08, 'epoch': 4.70, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:48:01,995 >> {'loss': 0.0002, 'learning_rate': 4.1108e-08, 'epoch': 4.70, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 22:48:15,127 >> {'loss': 0.0004, 'learning_rate': 3.9484e-08, 'epoch': 4.71, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:48:28,281 >> {'loss': 0.0015, 'learning_rate': 3.7893e-08, 'epoch': 4.72, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:48:41,436 >> {'loss': 0.0002, 'learning_rate': 3.6334e-08, 'epoch': 4.72, 'throughput': 502.62} - -[INFO|callbacks.py:310] 2024-07-29 22:48:54,604 >> {'loss': 0.0137, 'learning_rate': 3.4808e-08, 'epoch': 4.73, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 22:49:07,765 >> {'loss': 0.0004, 'learning_rate': 3.3315e-08, 'epoch': 4.73, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 22:49:20,922 >> {'loss': 0.0006, 'learning_rate': 3.1854e-08, 'epoch': 4.74, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 22:49:34,056 >> {'loss': 0.0003, 'learning_rate': 3.0425e-08, 'epoch': 4.74, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 22:49:47,215 >> {'loss': 0.0006, 'learning_rate': 2.9029e-08, 'epoch': 4.75, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:50:00,366 >> {'loss': 0.0064, 'learning_rate': 2.7666e-08, 'epoch': 4.76, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:50:13,527 >> {'loss': 0.0100, 'learning_rate': 2.6335e-08, 'epoch': 4.76, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 22:50:26,682 >> {'loss': 0.0047, 'learning_rate': 2.5037e-08, 'epoch': 4.77, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:50:39,849 >> {'loss': 0.0004, 'learning_rate': 2.3771e-08, 'epoch': 4.77, 'throughput': 502.52} - -[INFO|callbacks.py:310] 2024-07-29 22:50:52,999 >> {'loss': 0.0004, 'learning_rate': 2.2539e-08, 'epoch': 4.78, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 22:51:06,145 >> {'loss': 0.0018, 'learning_rate': 2.1338e-08, 'epoch': 4.78, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 22:51:19,285 >> {'loss': 0.0003, 'learning_rate': 2.0171e-08, 'epoch': 4.79, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 22:51:32,437 >> {'loss': 0.0093, 'learning_rate': 1.9036e-08, 'epoch': 4.80, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 22:51:45,602 >> {'loss': 0.0007, 'learning_rate': 1.7934e-08, 'epoch': 4.80, 'throughput': 502.52} - -[INFO|callbacks.py:310] 2024-07-29 22:51:58,759 >> {'loss': 0.0043, 'learning_rate': 1.6865e-08, 'epoch': 4.81, 'throughput': 502.53} - -[INFO|callbacks.py:310] 2024-07-29 22:52:11,900 >> {'loss': 0.0001, 'learning_rate': 1.5829e-08, 'epoch': 4.81, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 22:52:25,045 >> {'loss': 0.0001, 'learning_rate': 1.4825e-08, 'epoch': 4.82, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:52:38,203 >> {'loss': 0.0001, 'learning_rate': 1.3854e-08, 'epoch': 4.82, 'throughput': 502.52} - -[INFO|callbacks.py:310] 2024-07-29 22:52:51,353 >> {'loss': 0.0046, 'learning_rate': 1.2916e-08, 'epoch': 4.83, 'throughput': 502.55} - -[INFO|callbacks.py:310] 2024-07-29 22:53:04,502 >> {'loss': 0.0001, 'learning_rate': 1.2010e-08, 'epoch': 4.84, 'throughput': 502.56} - -[INFO|callbacks.py:310] 2024-07-29 22:53:17,647 >> {'loss': 0.0025, 'learning_rate': 1.1138e-08, 'epoch': 4.84, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:53:30,802 >> {'loss': 0.0002, 'learning_rate': 1.0298e-08, 'epoch': 4.85, 'throughput': 502.54} - -[INFO|callbacks.py:310] 2024-07-29 22:53:43,951 >> {'loss': 0.0002, 'learning_rate': 9.4913e-09, 'epoch': 4.85, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:53:57,116 >> {'loss': 0.0002, 'learning_rate': 8.7173e-09, 'epoch': 4.86, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:54:10,267 >> {'loss': 0.0005, 'learning_rate': 7.9761e-09, 'epoch': 4.86, 'throughput': 502.60} - -[INFO|callbacks.py:310] 2024-07-29 22:54:23,431 >> {'loss': 0.0020, 'learning_rate': 7.2678e-09, 'epoch': 4.87, 'throughput': 502.57} - -[INFO|callbacks.py:310] 2024-07-29 22:54:36,568 >> {'loss': 0.0052, 'learning_rate': 6.5924e-09, 'epoch': 4.88, 'throughput': 502.61} - -[INFO|callbacks.py:310] 2024-07-29 22:54:49,721 >> {'loss': 0.0001, 'learning_rate': 5.9499e-09, 'epoch': 4.88, 'throughput': 502.64} - -[INFO|callbacks.py:310] 2024-07-29 22:55:02,885 >> {'loss': 0.0032, 'learning_rate': 5.3403e-09, 'epoch': 4.89, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 22:55:16,028 >> {'loss': 0.0003, 'learning_rate': 4.7636e-09, 'epoch': 4.89, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 22:55:29,171 >> {'loss': 0.0003, 'learning_rate': 4.2198e-09, 'epoch': 4.90, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 22:55:42,306 >> {'loss': 0.0001, 'learning_rate': 3.7090e-09, 'epoch': 4.90, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 22:55:55,472 >> {'loss': 0.0006, 'learning_rate': 3.2310e-09, 'epoch': 4.91, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 22:56:08,634 >> {'loss': 0.0044, 'learning_rate': 2.7860e-09, 'epoch': 4.92, 'throughput': 502.65} - -[INFO|callbacks.py:310] 2024-07-29 22:56:21,781 >> {'loss': 0.0007, 'learning_rate': 2.3739e-09, 'epoch': 4.92, 'throughput': 502.63} - -[INFO|callbacks.py:310] 2024-07-29 22:56:34,930 >> {'loss': 0.0004, 'learning_rate': 1.9948e-09, 'epoch': 4.93, 'throughput': 502.68} - -[INFO|callbacks.py:310] 2024-07-29 22:56:48,081 >> {'loss': 0.0067, 'learning_rate': 1.6487e-09, 'epoch': 4.93, 'throughput': 502.66} - -[INFO|callbacks.py:310] 2024-07-29 22:57:01,246 >> {'loss': 0.0005, 'learning_rate': 1.3354e-09, 'epoch': 4.94, 'throughput': 502.67} - -[INFO|callbacks.py:310] 2024-07-29 22:57:14,403 >> {'loss': 0.0017, 'learning_rate': 1.0552e-09, 'epoch': 4.94, 'throughput': 502.68} +[INFO|attention.py:84] 2024-07-30 01:57:52,271 >> Using torch SDPA for faster training and inference. -[INFO|callbacks.py:310] 2024-07-29 22:57:27,569 >> {'loss': 0.0065, 'learning_rate': 8.0789e-10, 'epoch': 4.95, 'throughput': 502.65} +[INFO|loader.py:196] 2024-07-30 01:57:52,276 >> all params: 8,030,261,248 -[INFO|callbacks.py:310] 2024-07-29 22:57:40,730 >> {'loss': 0.0009, 'learning_rate': 5.9356e-10, 'epoch': 4.96, 'throughput': 502.65} +07/30/2024 01:57:52 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. -[INFO|callbacks.py:310] 2024-07-29 22:57:53,882 >> {'loss': 0.0001, 'learning_rate': 4.1220e-10, 'epoch': 4.96, 'throughput': 502.67} +07/30/2024 01:57:52 - INFO - llamafactory.model.loader - all params: 8,030,261,248 -[INFO|callbacks.py:310] 2024-07-29 22:58:07,051 >> {'loss': 0.0096, 'learning_rate': 2.6381e-10, 'epoch': 4.97, 'throughput': 502.65} +07/30/2024 01:57:52 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. -[INFO|callbacks.py:310] 2024-07-29 22:58:20,211 >> {'loss': 0.0099, 'learning_rate': 1.4839e-10, 'epoch': 4.97, 'throughput': 502.63} +07/30/2024 01:57:52 - INFO - llamafactory.model.loader - all params: 8,030,261,248 -[INFO|callbacks.py:310] 2024-07-29 22:58:33,362 >> {'loss': 0.0004, 'learning_rate': 6.5953e-11, 'epoch': 4.98, 'throughput': 502.63} +07/30/2024 01:57:52 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. -[INFO|callbacks.py:310] 2024-07-29 22:58:46,504 >> {'loss': 0.0026, 'learning_rate': 1.6488e-11, 'epoch': 4.98, 'throughput': 502.64} +07/30/2024 01:57:52 - INFO - llamafactory.model.loader - all params: 8,030,261,248 -[INFO|callbacks.py:310] 2024-07-29 22:58:59,658 >> {'loss': 0.0029, 'learning_rate': 0.0000e+00, 'epoch': 4.99, 'throughput': 502.64} +[INFO|trainer.py:3819] 2024-07-30 01:57:52,384 >> +***** Running Prediction ***** -[INFO|trainer.py:3503] 2024-07-29 22:59:07,308 >> Saving model checkpoint to saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/checkpoint-875 +[INFO|trainer.py:3821] 2024-07-30 01:57:52,384 >> Num examples = 2554 -[INFO|configuration_utils.py:472] 2024-07-29 22:59:07,310 >> Configuration saved in saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/checkpoint-875/config.json +[INFO|trainer.py:3824] 2024-07-30 01:57:52,384 >> Batch size = 2 -[INFO|configuration_utils.py:807] 2024-07-29 22:59:07,311 >> Configuration saved in saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/checkpoint-875/generation_config.json +07/30/2024 01:57:52 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. -[INFO|modeling_utils.py:2763] 2024-07-29 22:59:23,946 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 4 checkpoint shards. You can find where each parameters has been saved in the index located at saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/checkpoint-875/model.safetensors.index.json. +07/30/2024 01:57:52 - INFO - llamafactory.model.loader - all params: 8,030,261,248 -[INFO|tokenization_utils_base.py:2702] 2024-07-29 22:59:23,949 >> tokenizer config file saved in saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/checkpoint-875/tokenizer_config.json +07/30/2024 01:57:52 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. -[INFO|tokenization_utils_base.py:2711] 2024-07-29 22:59:23,949 >> Special tokens file saved in saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/checkpoint-875/special_tokens_map.json +07/30/2024 01:57:52 - INFO - llamafactory.model.loader - all params: 8,030,261,248 -[INFO|trainer.py:2394] 2024-07-29 23:00:00,785 >> +07/30/2024 01:57:52 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. -Training completed. Do not forget to share your model on huggingface.co/models =) +07/30/2024 01:57:52 - INFO - llamafactory.model.loader - all params: 8,030,261,248 +07/30/2024 01:57:52 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. +07/30/2024 01:57:52 - INFO - llamafactory.model.loader - all params: 8,030,261,248 -[INFO|trainer.py:3503] 2024-07-29 23:00:08,361 >> Saving model checkpoint to saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final +[WARNING|logging.py:328] 2024-07-30 01:57:53,028 >> We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) -[INFO|configuration_utils.py:472] 2024-07-29 23:00:08,364 >> Configuration saved in saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/config.json +07/30/2024 01:57:53 - WARNING - transformers.models.llama.modeling_llama - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) -[INFO|configuration_utils.py:807] 2024-07-29 23:00:08,364 >> Configuration saved in saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/generation_config.json +07/30/2024 01:57:53 - WARNING - transformers.models.llama.modeling_llama - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) -[INFO|modeling_utils.py:2763] 2024-07-29 23:00:25,499 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 4 checkpoint shards. You can find where each parameters has been saved in the index located at saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/model.safetensors.index.json. +07/30/2024 01:57:53 - WARNING - transformers.models.llama.modeling_llama - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) -[INFO|tokenization_utils_base.py:2702] 2024-07-29 23:00:25,503 >> tokenizer config file saved in saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/tokenizer_config.json +07/30/2024 01:57:53 - WARNING - transformers.models.llama.modeling_llama - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) -[INFO|tokenization_utils_base.py:2711] 2024-07-29 23:00:25,503 >> Special tokens file saved in saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-19-43-56_llama3.1_reeval_final/special_tokens_map.json +07/30/2024 01:57:53 - WARNING - transformers.models.llama.modeling_llama - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) -[WARNING|ploting.py:89] 2024-07-29 23:00:26,862 >> No metric eval_loss to plot. +07/30/2024 01:57:53 - WARNING - transformers.models.llama.modeling_llama - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) -[WARNING|ploting.py:89] 2024-07-29 23:00:26,862 >> No metric eval_accuracy to plot. +07/30/2024 01:57:53 - WARNING - transformers.models.llama.modeling_llama - We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache) -[INFO|modelcard.py:449] 2024-07-29 23:00:26,863 >> Dropping the following result as it does not have all the necessary fields: -{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} +[INFO|trainer.py:127] 2024-07-30 01:58:07,421 >> Saving prediction results to saves/LLaMA3.1-8B-Chat/full/eval_2024-07-30-01-57-05/generated_predictions.jsonl