Mardiyyah commited on 4 days ago

Commit

5af906c

•

1 Parent(s): fce413e

Mlr-shared-task-ewc_stabilised

Browse files

Files changed (19) hide show

.gitattributes +1 -0
README.md +74 -0
config.json +57 -0
model.safetensors +3 -0
sentencepiece.bpe.model +3 -0
special_tokens_map.json +51 -0
tokenizer.json +3 -0
tokenizer_config.json +54 -0
training_args.bin +3 -0
wandb/debug-internal.log +0 -0
wandb/debug.log +55 -0
wandb/run-20240916_182041-ye308qxr/files/config.yaml +746 -0
wandb/run-20240916_182041-ye308qxr/files/output.log +48 -0
wandb/run-20240916_182041-ye308qxr/files/requirements.txt +313 -0
wandb/run-20240916_182041-ye308qxr/files/wandb-metadata.json +281 -0
wandb/run-20240916_182041-ye308qxr/files/wandb-summary.json +1 -0
wandb/run-20240916_182041-ye308qxr/logs/debug-internal.log +0 -0
wandb/run-20240916_182041-ye308qxr/logs/debug.log +55 -0
wandb/run-20240916_182041-ye308qxr/run-ye308qxr.wandb +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,74 @@

+---
+license: afl-3.0
+base_model: masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0
+tags:
+- generated_from_trainer
+metrics:
+- f1
+- precision
+- recall
+- accuracy
+model-index:
+- name: ewc_stabilised
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# ewc_stabilised
+This model is a fine-tuned version of [masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0](https://huggingface.co/masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.1396
+- F1: 0.8317
+- Precision: 0.8305
+- Recall: 0.8328
+- Accuracy: 0.9605
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 16
+- eval_batch_size: 8
+- seed: 3407
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 64
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 1000
+- num_epochs: 5
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | F1     | Precision | Recall | Accuracy |
+|:-------------:|:------:|:----:|:---------------:|:------:|:---------:|:------:|:--------:|
+| 0.3184        | 0.9993 | 701  | 0.1480          | 0.7895 | 0.7950    | 0.7841 | 0.9511   |
+| 0.1333        | 2.0    | 1403 | 0.1271          | 0.8195 | 0.8148    | 0.8242 | 0.9578   |
+| 0.0975        | 2.9993 | 2104 | 0.1241          | 0.8289 | 0.8254    | 0.8324 | 0.9598   |
+| 0.0744        | 4.0    | 2806 | 0.1293          | 0.8307 | 0.8313    | 0.8300 | 0.9603   |
+| 0.0596        | 4.9964 | 3505 | 0.1396          | 0.8317 | 0.8305    | 0.8328 | 0.9605   |
+### Framework versions
+- Transformers 4.43.4
+- Pytorch 2.4.1+cu121
+- Datasets 2.20.0
+- Tokenizers 0.19.1

config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "_name_or_path": "masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0",
+  "adapters": {
+    "adapters": {},
+    "config_map": {},
+    "fusion_config_map": {},
+    "fusions": {}
+  },
+  "architectures": [
+    "XLMRobertaForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "O",
+    "1": "B-DATE",
+    "2": "I-DATE",
+    "3": "B-PER",
+    "4": "I-PER",
+    "5": "B-ORG",
+    "6": "I-ORG",
+    "7": "B-LOC",
+    "8": "I-LOC"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "B-DATE": 1,
+    "B-LOC": 7,
+    "B-ORG": 5,
+    "B-PER": 3,
+    "I-DATE": 2,
+    "I-LOC": 8,
+    "I-ORG": 6,
+    "I-PER": 4,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.43.4",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0ad0061b6f58ea3ce0807499ebb553a49eaf1c3396678ccbb63e42b6bd4d2d9
+size 2235448756

sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
+size 17082987

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bac205bc4ab5a9371a0af99442a4adf7691cee6dfbb90430ddecdc3718a4fb6
+size 5240

wandb/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,55 @@

+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Current SDK version is 0.17.9
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Configure stats pid to 1120321
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from /homes/amrufai/.config/wandb/settings
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from /nfs/production/literature/amina-mardiyyah/wandb/settings
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Applying login settings: {}
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Applying login settings: {}
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_init.py:_log_setup():524] Logging user logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/wandb/run-20240916_182041-ye308qxr/logs/debug.log
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:_log_setup():525] Logging internal logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/wandb/run-20240916_182041-ye308qxr/logs/debug-internal.log
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:_jupyter_setup():470] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f39d0d93210>
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:init():608] calling init triggers
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:init():615] wandb.init called with sweep_config: {}
+config: {}
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:init():658] starting backend
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:init():662] setting up manager
+2024-09-16 18:20:41,472 INFO    MainThread:1120321 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-09-16 18:20:41,477 INFO    MainThread:1120321 [wandb_init.py:init():670] backend started and connected
+2024-09-16 18:20:41,486 INFO    MainThread:1120321 [wandb_run.py:_label_probe_notebook():1344] probe notebook
+2024-09-16 18:20:41,488 INFO    MainThread:1120321 [wandb_run.py:_label_probe_notebook():1354] Unable to probe notebook: 'NoneType' object has no attribute 'get'
+2024-09-16 18:20:41,488 INFO    MainThread:1120321 [wandb_init.py:init():768] updated telemetry
+2024-09-16 18:20:41,499 INFO    MainThread:1120321 [wandb_init.py:init():801] communicating run to backend with 90.0 second timeout
+2024-09-16 18:20:41,903 INFO    MainThread:1120321 [wandb_init.py:init():852] starting run threads in backend
+2024-09-16 18:20:42,871 INFO    MainThread:1120321 [wandb_run.py:_console_start():2465] atexit reg
+2024-09-16 18:20:42,872 INFO    MainThread:1120321 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-09-16 18:20:42,873 INFO    MainThread:1120321 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-09-16 18:20:42,873 INFO    MainThread:1120321 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-09-16 18:20:42,881 INFO    MainThread:1120321 [wandb_init.py:init():895] run started, returning control to user process
+2024-09-16 18:20:42,888 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:20:42,888 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:22:30,477 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:22:30,484 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:22:30,484 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:24:04,645 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:24:04,650 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:24:04,650 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:24:11,088 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:24:11,158 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:24:11,158 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:24:14,909 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:24:14,916 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:24:14,916 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:25:16,270 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:25:23,022 INFO    MainThread:1120321 [wandb_run.py:_config_callback():1392] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['XLMRobertaForTokenClassification'], 'finetuning_task': None, 'id2label': {0: 'O', 1: 'B-DATE', 2: 'I-DATE', 3: 'B-PER', 4: 'I-PER', 5: 'B-ORG', 6: 'I-ORG', 7: 'B-LOC', 8: 'I-LOC'}, 'label2id': {'B-DATE': 1, 'B-LOC': 7, 'B-ORG': 5, 'B-PER': 3, 'I-DATE': 2, 'I-LOC': 8, 'I-ORG': 6, 'I-PER': 4, 'O': 0}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0', 'transformers_version': '4.43.4', 'adapters': {'adapters': {}, 'config_map': {}, 'fusion_config_map': {}, 'fusions': {}}, 'gradient_checkpointing': False, 'model_type': 'xlm-roberta', 'output_past': True, 'vocab_size': 250002, 'hidden_size': 1024, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'hidden_act': 'gelu', 'intermediate_size': 4096, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 514, 'type_vocab_size': 1, 'initializer_range': 0.02, 'layer_norm_eps': 1e-05, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/logs', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'eval_use_gather_object': False}
+2024-09-16 18:25:23,024 INFO    MainThread:1120321 [wandb_config.py:__setitem__():154] config set model/num_parameters = 558850057 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f3ae7ac3410>>
+2024-09-16 18:25:23,025 INFO    MainThread:1120321 [wandb_run.py:_config_callback():1392] config_cb model/num_parameters 558850057 None
+2024-09-16 19:06:54,766 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 19:06:54,766 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 19:07:02,732 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 19:07:02,738 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 19:07:02,738 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 19:07:21,411 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 19:07:21,531 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 19:07:21,531 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 19:08:43,069 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend

wandb/run-20240916_182041-ye308qxr/files/config.yaml ADDED Viewed

	@@ -0,0 +1,746 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.11.4
+    cli_version: 0.17.9
+    framework: huggingface
+    huggingface_version: 4.43.4
+    is_jupyter_run: true
+    is_kaggle_kernel: false
+    start_time: 1726507241
+    t:
+      1:
+      - 1
+      - 5
+      - 11
+      - 41
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 98
+      - 100
+      - 105
+      2:
+      - 1
+      - 5
+      - 11
+      - 41
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 98
+      - 100
+      - 105
+      3:
+      - 7
+      - 19
+      - 23
+      - 62
+      - 66
+      4: 3.11.4
+      5: 0.17.9
+      6: 4.43.4
+      8:
+      - 1
+      - 5
+      9:
+        1: transformers_trainer
+      13: linux-x86_64
+    m:
+    - 1: train/global_step
+      6:
+      - 3
+    - 1: train/loss
+      5: 1
+      6:
+      - 1
+    - 1: train/grad_norm
+      5: 1
+      6:
+      - 1
+    - 1: train/learning_rate
+      5: 1
+      6:
+      - 1
+    - 1: train/epoch
+      5: 1
+      6:
+      - 1
+    - 1: eval/loss
+      5: 1
+      6:
+      - 1
+    - 1: eval/f1
+      5: 1
+      6:
+      - 1
+    - 1: eval/precision
+      5: 1
+      6:
+      - 1
+    - 1: eval/recall
+      5: 1
+      6:
+      - 1
+    - 1: eval/accuracy
+      5: 1
+      6:
+      - 1
+    - 1: eval/runtime
+      5: 1
+      6:
+      - 1
+    - 1: eval/samples_per_second
+      5: 1
+      6:
+      - 1
+    - 1: eval/steps_per_second
+      5: 1
+      6:
+      - 1
+return_dict:
+  desc: null
+  value: true
+output_hidden_states:
+  desc: null
+  value: false
+output_attentions:
+  desc: null
+  value: false
+torchscript:
+  desc: null
+  value: false
+torch_dtype:
+  desc: null
+  value: float32
+use_bfloat16:
+  desc: null
+  value: false
+tf_legacy_loss:
+  desc: null
+  value: false
+pruned_heads:
+  desc: null
+  value: {}
+tie_word_embeddings:
+  desc: null
+  value: true
+chunk_size_feed_forward:
+  desc: null
+  value: 0
+is_encoder_decoder:
+  desc: null
+  value: false
+is_decoder:
+  desc: null
+  value: false
+cross_attention_hidden_size:
+  desc: null
+  value: null
+add_cross_attention:
+  desc: null
+  value: false
+tie_encoder_decoder:
+  desc: null
+  value: false
+max_length:
+  desc: null
+  value: 20
+min_length:
+  desc: null
+  value: 0
+do_sample:
+  desc: null
+  value: false
+early_stopping:
+  desc: null
+  value: false
+num_beams:
+  desc: null
+  value: 1
+num_beam_groups:
+  desc: null
+  value: 1
+diversity_penalty:
+  desc: null
+  value: 0.0
+temperature:
+  desc: null
+  value: 1.0
+top_k:
+  desc: null
+  value: 50
+top_p:
+  desc: null
+  value: 1.0
+typical_p:
+  desc: null
+  value: 1.0
+repetition_penalty:
+  desc: null
+  value: 1.0
+length_penalty:
+  desc: null
+  value: 1.0
+no_repeat_ngram_size:
+  desc: null
+  value: 0
+encoder_no_repeat_ngram_size:
+  desc: null
+  value: 0
+bad_words_ids:
+  desc: null
+  value: null
+num_return_sequences:
+  desc: null
+  value: 1
+output_scores:
+  desc: null
+  value: false
+return_dict_in_generate:
+  desc: null
+  value: false
+forced_bos_token_id:
+  desc: null
+  value: null
+forced_eos_token_id:
+  desc: null
+  value: null
+remove_invalid_values:
+  desc: null
+  value: false
+exponential_decay_length_penalty:
+  desc: null
+  value: null
+suppress_tokens:
+  desc: null
+  value: null
+begin_suppress_tokens:
+  desc: null
+  value: null
+architectures:
+  desc: null
+  value:
+  - XLMRobertaForTokenClassification
+finetuning_task:
+  desc: null
+  value: null
+id2label:
+  desc: null
+  value:
+    '0': O
+    '1': B-DATE
+    '2': I-DATE
+    '3': B-PER
+    '4': I-PER
+    '5': B-ORG
+    '6': I-ORG
+    '7': B-LOC
+    '8': I-LOC
+label2id:
+  desc: null
+  value:
+    B-DATE: 1
+    B-LOC: 7
+    B-ORG: 5
+    B-PER: 3
+    I-DATE: 2
+    I-LOC: 8
+    I-ORG: 6
+    I-PER: 4
+    O: 0
+tokenizer_class:
+  desc: null
+  value: null
+prefix:
+  desc: null
+  value: null
+bos_token_id:
+  desc: null
+  value: 0
+pad_token_id:
+  desc: null
+  value: 1
+eos_token_id:
+  desc: null
+  value: 2
+sep_token_id:
+  desc: null
+  value: null
+decoder_start_token_id:
+  desc: null
+  value: null
+task_specific_params:
+  desc: null
+  value: null
+problem_type:
+  desc: null
+  value: null
+_name_or_path:
+  desc: null
+  value: masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0
+transformers_version:
+  desc: null
+  value: 4.43.4
+adapters:
+  desc: null
+  value:
+    adapters: {}
+    config_map: {}
+    fusion_config_map: {}
+    fusions: {}
+gradient_checkpointing:
+  desc: null
+  value: false
+model_type:
+  desc: null
+  value: xlm-roberta
+output_past:
+  desc: null
+  value: true
+vocab_size:
+  desc: null
+  value: 250002
+hidden_size:
+  desc: null
+  value: 1024
+num_hidden_layers:
+  desc: null
+  value: 24
+num_attention_heads:
+  desc: null
+  value: 16
+hidden_act:
+  desc: null
+  value: gelu
+intermediate_size:
+  desc: null
+  value: 4096
+hidden_dropout_prob:
+  desc: null
+  value: 0.1
+attention_probs_dropout_prob:
+  desc: null
+  value: 0.1
+max_position_embeddings:
+  desc: null
+  value: 514
+type_vocab_size:
+  desc: null
+  value: 1
+initializer_range:
+  desc: null
+  value: 0.02
+layer_norm_eps:
+  desc: null
+  value: 1.0e-05
+position_embedding_type:
+  desc: null
+  value: absolute
+use_cache:
+  desc: null
+  value: true
+classifier_dropout:
+  desc: null
+  value: null
+output_dir:
+  desc: null
+  value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised
+overwrite_output_dir:
+  desc: null
+  value: false
+do_train:
+  desc: null
+  value: false
+do_eval:
+  desc: null
+  value: true
+do_predict:
+  desc: null
+  value: false
+eval_strategy:
+  desc: null
+  value: epoch
+prediction_loss_only:
+  desc: null
+  value: false
+per_device_train_batch_size:
+  desc: null
+  value: 16
+per_device_eval_batch_size:
+  desc: null
+  value: 8
+per_gpu_train_batch_size:
+  desc: null
+  value: null
+per_gpu_eval_batch_size:
+  desc: null
+  value: null
+gradient_accumulation_steps:
+  desc: null
+  value: 4
+eval_accumulation_steps:
+  desc: null
+  value: null
+eval_delay:
+  desc: null
+  value: 0
+torch_empty_cache_steps:
+  desc: null
+  value: null
+learning_rate:
+  desc: null
+  value: 2.0e-05
+weight_decay:
+  desc: null
+  value: 0.01
+adam_beta1:
+  desc: null
+  value: 0.9
+adam_beta2:
+  desc: null
+  value: 0.999
+adam_epsilon:
+  desc: null
+  value: 1.0e-08
+max_grad_norm:
+  desc: null
+  value: 1.0
+num_train_epochs:
+  desc: null
+  value: 5
+max_steps:
+  desc: null
+  value: -1
+lr_scheduler_type:
+  desc: null
+  value: linear
+lr_scheduler_kwargs:
+  desc: null
+  value: {}
+warmup_ratio:
+  desc: null
+  value: 0.0
+warmup_steps:
+  desc: null
+  value: 1000
+log_level:
+  desc: null
+  value: passive
+log_level_replica:
+  desc: null
+  value: warning
+log_on_each_node:
+  desc: null
+  value: true
+logging_dir:
+  desc: null
+  value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/logs
+logging_strategy:
+  desc: null
+  value: epoch
+logging_first_step:
+  desc: null
+  value: false
+logging_steps:
+  desc: null
+  value: 500
+logging_nan_inf_filter:
+  desc: null
+  value: true
+save_strategy:
+  desc: null
+  value: epoch
+save_steps:
+  desc: null
+  value: 500
+save_total_limit:
+  desc: null
+  value: 2
+save_safetensors:
+  desc: null
+  value: true
+save_on_each_node:
+  desc: null
+  value: false
+save_only_model:
+  desc: null
+  value: false
+restore_callback_states_from_checkpoint:
+  desc: null
+  value: false
+no_cuda:
+  desc: null
+  value: false
+use_cpu:
+  desc: null
+  value: false
+use_mps_device:
+  desc: null
+  value: false
+seed:
+  desc: null
+  value: 3407
+data_seed:
+  desc: null
+  value: null
+jit_mode_eval:
+  desc: null
+  value: false
+use_ipex:
+  desc: null
+  value: false
+bf16:
+  desc: null
+  value: false
+fp16:
+  desc: null
+  value: true
+fp16_opt_level:
+  desc: null
+  value: O1
+half_precision_backend:
+  desc: null
+  value: auto
+bf16_full_eval:
+  desc: null
+  value: false
+fp16_full_eval:
+  desc: null
+  value: false
+tf32:
+  desc: null
+  value: null
+local_rank:
+  desc: null
+  value: 0
+ddp_backend:
+  desc: null
+  value: null
+tpu_num_cores:
+  desc: null
+  value: null
+tpu_metrics_debug:
+  desc: null
+  value: false
+debug:
+  desc: null
+  value: []
+dataloader_drop_last:
+  desc: null
+  value: false
+eval_steps:
+  desc: null
+  value: null
+dataloader_num_workers:
+  desc: null
+  value: 0
+dataloader_prefetch_factor:
+  desc: null
+  value: null
+past_index:
+  desc: null
+  value: -1
+run_name:
+  desc: null
+  value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised
+disable_tqdm:
+  desc: null
+  value: false
+remove_unused_columns:
+  desc: null
+  value: true
+label_names:
+  desc: null
+  value: null
+load_best_model_at_end:
+  desc: null
+  value: true
+metric_for_best_model:
+  desc: null
+  value: eval_f1
+greater_is_better:
+  desc: null
+  value: true
+ignore_data_skip:
+  desc: null
+  value: false
+fsdp:
+  desc: null
+  value: []
+fsdp_min_num_params:
+  desc: null
+  value: 0
+fsdp_config:
+  desc: null
+  value:
+    min_num_params: 0
+    xla: false
+    xla_fsdp_v2: false
+    xla_fsdp_grad_ckpt: false
+fsdp_transformer_layer_cls_to_wrap:
+  desc: null
+  value: null
+accelerator_config:
+  desc: null
+  value:
+    split_batches: false
+    dispatch_batches: null
+    even_batches: true
+    use_seedable_sampler: true
+    non_blocking: false
+    gradient_accumulation_kwargs: null
+deepspeed:
+  desc: null
+  value: null
+label_smoothing_factor:
+  desc: null
+  value: 0.0
+optim:
+  desc: null
+  value: adamw_torch
+optim_args:
+  desc: null
+  value: null
+adafactor:
+  desc: null
+  value: false
+group_by_length:
+  desc: null
+  value: true
+length_column_name:
+  desc: null
+  value: length
+report_to:
+  desc: null
+  value:
+  - wandb
+ddp_find_unused_parameters:
+  desc: null
+  value: null
+ddp_bucket_cap_mb:
+  desc: null
+  value: null
+ddp_broadcast_buffers:
+  desc: null
+  value: null
+dataloader_pin_memory:
+  desc: null
+  value: true
+dataloader_persistent_workers:
+  desc: null
+  value: false
+skip_memory_metrics:
+  desc: null
+  value: true
+use_legacy_prediction_loop:
+  desc: null
+  value: false
+push_to_hub:
+  desc: null
+  value: false
+resume_from_checkpoint:
+  desc: null
+  value: null
+hub_model_id:
+  desc: null
+  value: null
+hub_strategy:
+  desc: null
+  value: every_save
+hub_token:
+  desc: null
+  value: <HUB_TOKEN>
+hub_private_repo:
+  desc: null
+  value: false
+hub_always_push:
+  desc: null
+  value: false
+gradient_checkpointing_kwargs:
+  desc: null
+  value: null
+include_inputs_for_metrics:
+  desc: null
+  value: false
+eval_do_concat_batches:
+  desc: null
+  value: true
+fp16_backend:
+  desc: null
+  value: auto
+evaluation_strategy:
+  desc: null
+  value: null
+push_to_hub_model_id:
+  desc: null
+  value: null
+push_to_hub_organization:
+  desc: null
+  value: null
+push_to_hub_token:
+  desc: null
+  value: <PUSH_TO_HUB_TOKEN>
+mp_parameters:
+  desc: null
+  value: ''
+auto_find_batch_size:
+  desc: null
+  value: false
+full_determinism:
+  desc: null
+  value: false
+torchdynamo:
+  desc: null
+  value: null
+ray_scope:
+  desc: null
+  value: last
+ddp_timeout:
+  desc: null
+  value: 1800
+torch_compile:
+  desc: null
+  value: false
+torch_compile_backend:
+  desc: null
+  value: null
+torch_compile_mode:
+  desc: null
+  value: null
+dispatch_batches:
+  desc: null
+  value: null
+split_batches:
+  desc: null
+  value: null
+include_tokens_per_second:
+  desc: null
+  value: false
+include_num_input_tokens_seen:
+  desc: null
+  value: false
+neftune_noise_alpha:
+  desc: null
+  value: null
+optim_target_modules:
+  desc: null
+  value: null
+batch_eval_metrics:
+  desc: null
+  value: false
+eval_on_start:
+  desc: null
+  value: false
+eval_use_gather_object:
+  desc: null
+  value: false
+model/num_parameters:
+  desc: null
+  value: 558850057

wandb/run-20240916_182041-ye308qxr/files/output.log ADDED Viewed

	@@ -0,0 +1,48 @@

+/nfs/production/literature/amina-mardiyyah/envs/llm-prompt/lib/python3.11/site-packages/accelerate/accelerator.py:488: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
+  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
+Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
+[34m[1mwandb[39m[22m: [33mWARNING[39m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+Classification Report:
+               precision    recall  f1-score   support
+        DATE       0.76      0.70      0.73      4012
+         LOC       0.77      0.78      0.77      9327
+         ORG       0.73      0.69      0.71      7547
+         PER       0.87      0.88      0.88     11204
+   micro avg       0.80      0.78      0.79     32090
+   macro avg       0.78      0.76      0.77     32090
+weighted avg       0.79      0.78      0.79     32090
+Classification Report:
+               precision    recall  f1-score   support
+        DATE       0.76      0.75      0.76      4012
+         LOC       0.82      0.79      0.80      9327
+         ORG       0.71      0.80      0.76      7547
+         PER       0.90      0.90      0.90     11204
+   micro avg       0.81      0.82      0.82     32090
+   macro avg       0.80      0.81      0.80     32090
+weighted avg       0.82      0.82      0.82     32090
+Classification Report:
+               precision    recall  f1-score   support
+        DATE       0.76      0.77      0.76      4012
+         LOC       0.81      0.81      0.81      9327
+         ORG       0.78      0.78      0.78      7547
+         PER       0.90      0.91      0.90     11204
+   micro avg       0.83      0.83      0.83     32090
+   macro avg       0.81      0.82      0.81     32090
+weighted avg       0.83      0.83      0.83     32090
+Classification Report:
+               precision    recall  f1-score   support
+        DATE       0.77      0.77      0.77      4012
+         LOC       0.81      0.81      0.81      9327
+         ORG       0.77      0.79      0.78      7547
+         PER       0.92      0.90      0.91     11204
+   micro avg       0.83      0.83      0.83     32090
+   macro avg       0.82      0.82      0.82     32090
+weighted avg       0.83      0.83      0.83     32090
+Classification Report:
+               precision    recall  f1-score   support
+        DATE       0.77      0.77      0.77      4012
+         LOC       0.81      0.81      0.81      9327
+         ORG       0.77      0.80      0.79      7547
+         PER       0.91      0.90      0.90     11204
+   micro avg       0.83      0.83      0.83     32090
+   macro avg       0.82      0.82      0.82     32090

wandb/run-20240916_182041-ye308qxr/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,313 @@

+AutoROM.accept-rom-license==0.6.1
+AutoROM==0.6.1
+Farama-Notifications==0.0.4
+GitPython==3.1.43
+Jinja2==3.1.4
+Mako==1.3.5
+Markdown==3.7
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+Pygments==2.18.0
+QtPy==2.4.1
+SQLAlchemy==2.0.32
+Send2Trash==1.8.3
+Shimmy==1.3.0
+Werkzeug==3.0.4
+absl-py==2.1.0
+accelerate==0.33.0
+aiohappyeyeballs==2.3.5
+aiohttp-retry==2.8.3
+aiohttp==3.10.3
+aiosignal==1.3.1
+ale-py==0.8.1
+alembic==1.13.2
+amqp==5.2.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.4.0
+appdirs==1.4.4
+argon2-cffi-bindings==21.2.0
+argon2-cffi==23.1.0
+arrow==1.3.0
+asttokens==2.4.1
+async-generator==1.10
+async-lru==2.0.4
+asyncssh==2.17.0
+atpublic==5.0
+attrs==24.2.0
+audioread==3.0.1
+babel==2.16.0
+beautifulsoup4==4.12.3
+billiard==4.2.0
+bitsandbytes==0.43.3
+bleach==6.1.0
+blis==0.7.11
+catalogue==2.0.10
+celery==5.4.0
+certifi==2024.7.4
+certipy==0.1.3
+cffi==1.17.0
+charset-normalizer==3.3.2
+click-didyoumean==0.3.1
+click-plugins==1.1.1
+click-repl==0.3.0
+click==8.1.7
+cloudpathlib==0.18.1
+cloudpickle==3.0.0
+colorama==0.4.6
+coloredlogs==15.0.1
+comm==0.2.2
+conda-store==2024.6.1
+confection==0.1.5
+configobj==5.0.8
+contourpy==1.2.1
+cryptography==43.0.0
+cuda-python==11.8.3
+cycler==0.12.1
+cymem==2.0.8
+datasets==2.20.0
+debugpy==1.8.5
+decorator==5.1.1
+defusedxml==0.7.1
+dictdiffer==0.9.0
+dill==0.3.8
+diskcache==5.6.3
+distro==1.9.0
+docker-pycreds==0.4.0
+docstring_parser==0.16
+dpath==2.2.0
+dulwich==0.22.1
+dvc-data==3.16.5
+dvc-http==2.32.0
+dvc-objects==5.1.0
+dvc-render==1.0.2
+dvc-studio-client==0.21.0
+dvc-task==0.4.0
+dvc==3.55.2
+en-core-web-lg==3.7.1
+en-core-web-sm==3.7.1
+entrypoints==0.4
+evaluate==0.4.2
+executing==2.0.1
+fastjsonschema==2.20.0
+filelock==3.15.4
+flatbuffers==24.3.25
+flatten-dict==0.4.2
+flufl.lock==8.1.0
+fonttools==4.53.1
+fqdn==1.5.1
+frozenlist==1.4.1
+fsspec==2024.5.0
+funcy==2.0
+gitdb==4.0.11
+grandalf==0.8
+greenlet==3.0.3
+grpcio==1.66.0
+gto==1.7.1
+gymnasium==0.29.1
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+huggingface-hub==0.24.5
+humanfriendly==10.0
+hydra-core==1.3.2
+idna==3.7
+importlib_resources==6.4.5
+ipykernel==6.29.5
+ipython==8.26.0
+ipywidgets==8.1.3
+isoduration==20.11.0
+iterative-telemetry==0.0.8
+jedi==0.19.1
+joblib==1.4.2
+json5==0.9.25
+jsonpointer==3.0.0
+jsonschema-specifications==2023.12.1
+jsonschema==4.23.0
+jupyter-console==6.6.3
+jupyter-events==0.10.0
+jupyter-launcher-shortcuts==4.0.3
+jupyter-lsp==2.2.5
+jupyter-telemetry==0.1.0
+jupyter==1.0.0
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyter_server==2.14.2
+jupyter_server_terminals==0.5.3
+jupyterhub==1.5.1
+jupyterlab-conda-store==2024.6.1
+jupyterlab==4.2.4
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.11
+kiwisolver==1.4.5
+kombu==5.4.0
+lab==8.2
+langcodes==3.4.0
+language_data==1.2.0
+lazy_loader==0.4
+librosa==0.10.2.post1
+llvmlite==0.43.0
+lxml==5.3.0
+marisa-trie==1.2.0
+markdown-it-py==3.0.0
+matplotlib-inline==0.1.7
+matplotlib==3.9.1.post1
+mdurl==0.1.2
+mistune==3.0.2
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.0.5
+multiprocess==0.70.16
+murmurhash==1.0.10
+nb_conda_store_kernels==0.1.5
+nbclient==0.10.0
+nbconvert==7.16.4
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.3
+nodejs==0.1.1
+notebook==7.2.1
+notebook_shim==0.2.4
+numba==0.60.0
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.6.20
+nvidia-nvtx-cu12==12.1.105
+oauthlib==3.2.2
+omegaconf==2.3.0
+onnx==1.16.2
+onnxruntime==1.19.0
+opencv-python==4.10.0.84
+optimum==1.21.4
+optional-django==0.1.0
+orjson==3.10.7
+overrides==7.7.0
+packaging==24.1
+pamela==1.2.0
+pandas==2.2.2
+pandocfilters==1.5.1
+parso==0.8.4
+pathspec==0.12.1
+peft==0.12.0
+pexpect==4.9.0
+pillow==10.4.0
+pip==23.1.2
+platformdirs==3.11.0
+pooch==1.8.2
+preshed==3.0.9
+prometheus_client==0.20.0
+prompt_toolkit==3.0.47
+protobuf==5.27.3
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyOpenSSL==24.2.1
+pyarrow-hotfix==0.6
+pyarrow==17.0.0
+pycparser==2.22
+pydantic==2.8.2
+pydantic_core==2.20.1
+pydot==3.0.1
+pygame==2.6.0
+pygit2==1.15.1
+pygtrie==2.5.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-json-logger==2.0.7
+pytz==2024.1
+pyzmq==26.1.0
+qtconsole==5.5.2
+referencing==0.35.1
+regex==2024.7.24
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.1
+rpds-py==0.20.0
+ruamel.yaml.clib==0.2.8
+ruamel.yaml==0.18.6
+safetensors==0.4.4
+scikit-learn==1.5.1
+scipy==1.14.0
+scmrepo==3.3.7
+seaborn==0.13.2
+semver==3.0.2
+sentencepiece==0.2.0
+sentry-sdk==2.14.0
+seqeval==1.2.2
+setproctitle==1.3.3
+setuptools==65.5.0
+shellingham==1.5.4
+shortuuid==1.0.13
+shtab==1.7.1
+simplejson==3.19.2
+six==1.16.0
+smart-open==7.0.4
+smmap==5.0.1
+sniffio==1.3.1
+soundfile==0.12.1
+soupsieve==2.5
+sox==1.5.0
+soxr==0.5.0.post1
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spacy==3.7.5
+sqltrie==0.11.1
+srsly==2.4.8
+stable_baselines3==2.3.2
+stack-data==0.6.3
+sympy==1.13.2
+tabulate==0.9.0
+tensorboard-data-server==0.7.2
+tensorboard==2.17.1
+terminado==0.18.1
+thinc==8.2.5
+threadpoolctl==3.5.0
+tinycss2==1.3.0
+tokenizers==0.19.1
+tomlkit==0.13.2
+torch==2.4.1
+torchaudio==2.4.1
+torchvision==0.19.1
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
+transformers==4.43.4
+triton==3.0.0
+trl==0.9.6
+txt2tags==3.9
+typer==0.12.3
+types-python-dateutil==2.9.0.20240316
+typing_extensions==4.12.2
+tyro==0.8.6
+tzdata==2024.1
+uri-template==1.3.0
+urllib3==2.2.2
+vine==5.1.0
+voluptuous==0.15.2
+wandb==0.17.9
+wasabi==1.1.3
+wcwidth==0.2.13
+weasel==0.4.1
+webcolors==24.8.0
+webencodings==0.5.1
+websocket-client==1.8.0
+widgetsnbextension==4.0.11
+wrapt==1.16.0
+xlrd==2.0.1
+xxhash==3.4.1
+yarl==1.9.4
+zc.lockfile==3.0.post1

wandb/run-20240916_182041-ye308qxr/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,281 @@

+{
+    "os": "Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28",
+    "python": "3.11.4",
+    "heartbeatAt": "2024-09-16T17:20:42.035382",
+    "startedAt": "2024-09-16T17:20:41.449030",
+    "docker": null,
+    "cuda": null,
+    "args": [],
+    "state": "running",
+    "program": "<python with no main file>",
+    "codePathLocal": null,
+    "host": "codon-gpu-014.ebi.ac.uk",
+    "username": "amrufai",
+    "executable": "/nfs/production/literature/amina-mardiyyah/envs/llm-prompt/bin/python",
+    "cpu_count": 48,
+    "cpu_count_logical": 48,
+    "cpu_freq": {
+        "current": 2869.226979166667,
+        "min": 0.0,
+        "max": 0.0
+    },
+    "cpu_freq_per_core": [
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3251.96,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3246.317,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3260.095,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3254.547,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3242.782,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3219.073,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3248.226,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3244.463,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3242.955,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 803.795,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3240.712,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3232.618,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3235.352,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 2800.0,
+            "min": 0.0,
+            "max": 0.0
+        }
+    ],
+    "disk": {
+        "/": {
+            "total": 47.760292053222656,
+            "used": 15.848091125488281
+        }
+    },
+    "gpu": "NVIDIA A100 80GB PCIe",
+    "gpu_count": 1,
+    "gpu_devices": [
+        {
+            "name": "NVIDIA A100 80GB PCIe",
+            "memory_total": 85899345920
+        }
+    ],
+    "memory": {
+        "total": 502.83758544921875
+    }
+}

wandb/run-20240916_182041-ye308qxr/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"train/loss": 0.0596, "train/grad_norm": 2.547844409942627, "train/learning_rate": 2.3952095808383236e-08, "train/epoch": 4.996436208125445, "train/global_step": 3505, "_timestamp": 1726510014.761806, "_runtime": 2773.28347492218, "_step": 10, "eval/loss": 0.13957327604293823, "eval/f1": 0.8316917954223653, "eval/precision": 0.8305416576027844, "eval/recall": 0.8328451230913057, "eval/accuracy": 0.9604729231827337, "eval/runtime": 75.264, "eval/samples_per_second": 149.102, "eval/steps_per_second": 18.641, "train_runtime": 2491.7461, "train_samples_per_second": 90.069, "train_steps_per_second": 1.407, "total_flos": 4.319274218928017e+16, "train_loss": 0.1366414232022752}

wandb/run-20240916_182041-ye308qxr/logs/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20240916_182041-ye308qxr/logs/debug.log ADDED Viewed

	@@ -0,0 +1,55 @@

+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Current SDK version is 0.17.9
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Configure stats pid to 1120321
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from /homes/amrufai/.config/wandb/settings
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from /nfs/production/literature/amina-mardiyyah/wandb/settings
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Applying login settings: {}
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_setup.py:_flush():77] Applying login settings: {}
+2024-09-16 18:20:41,469 INFO    MainThread:1120321 [wandb_init.py:_log_setup():524] Logging user logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/wandb/run-20240916_182041-ye308qxr/logs/debug.log
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:_log_setup():525] Logging internal logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/wandb/run-20240916_182041-ye308qxr/logs/debug-internal.log
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:_jupyter_setup():470] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f39d0d93210>
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:init():608] calling init triggers
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:init():615] wandb.init called with sweep_config: {}
+config: {}
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:init():658] starting backend
+2024-09-16 18:20:41,470 INFO    MainThread:1120321 [wandb_init.py:init():662] setting up manager
+2024-09-16 18:20:41,472 INFO    MainThread:1120321 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-09-16 18:20:41,477 INFO    MainThread:1120321 [wandb_init.py:init():670] backend started and connected
+2024-09-16 18:20:41,486 INFO    MainThread:1120321 [wandb_run.py:_label_probe_notebook():1344] probe notebook
+2024-09-16 18:20:41,488 INFO    MainThread:1120321 [wandb_run.py:_label_probe_notebook():1354] Unable to probe notebook: 'NoneType' object has no attribute 'get'
+2024-09-16 18:20:41,488 INFO    MainThread:1120321 [wandb_init.py:init():768] updated telemetry
+2024-09-16 18:20:41,499 INFO    MainThread:1120321 [wandb_init.py:init():801] communicating run to backend with 90.0 second timeout
+2024-09-16 18:20:41,903 INFO    MainThread:1120321 [wandb_init.py:init():852] starting run threads in backend
+2024-09-16 18:20:42,871 INFO    MainThread:1120321 [wandb_run.py:_console_start():2465] atexit reg
+2024-09-16 18:20:42,872 INFO    MainThread:1120321 [wandb_run.py:_redirect():2311] redirect: wrap_raw
+2024-09-16 18:20:42,873 INFO    MainThread:1120321 [wandb_run.py:_redirect():2376] Wrapping output streams.
+2024-09-16 18:20:42,873 INFO    MainThread:1120321 [wandb_run.py:_redirect():2401] Redirects installed.
+2024-09-16 18:20:42,881 INFO    MainThread:1120321 [wandb_init.py:init():895] run started, returning control to user process
+2024-09-16 18:20:42,888 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:20:42,888 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:22:30,477 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:22:30,484 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:22:30,484 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:24:04,645 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:24:04,650 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:24:04,650 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:24:11,088 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:24:11,158 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:24:11,158 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:24:14,909 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:24:14,916 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 18:24:14,916 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 18:25:16,270 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 18:25:23,022 INFO    MainThread:1120321 [wandb_run.py:_config_callback():1392] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['XLMRobertaForTokenClassification'], 'finetuning_task': None, 'id2label': {0: 'O', 1: 'B-DATE', 2: 'I-DATE', 3: 'B-PER', 4: 'I-PER', 5: 'B-ORG', 6: 'I-ORG', 7: 'B-LOC', 8: 'I-LOC'}, 'label2id': {'B-DATE': 1, 'B-LOC': 7, 'B-ORG': 5, 'B-PER': 3, 'I-DATE': 2, 'I-LOC': 8, 'I-ORG': 6, 'I-PER': 4, 'O': 0}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0', 'transformers_version': '4.43.4', 'adapters': {'adapters': {}, 'config_map': {}, 'fusion_config_map': {}, 'fusions': {}}, 'gradient_checkpointing': False, 'model_type': 'xlm-roberta', 'output_past': True, 'vocab_size': 250002, 'hidden_size': 1024, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'hidden_act': 'gelu', 'intermediate_size': 4096, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 514, 'type_vocab_size': 1, 'initializer_range': 0.02, 'layer_norm_eps': 1e-05, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/logs', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'eval_use_gather_object': False}
+2024-09-16 18:25:23,024 INFO    MainThread:1120321 [wandb_config.py:__setitem__():154] config set model/num_parameters = 558850057 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f3ae7ac3410>>
+2024-09-16 18:25:23,025 INFO    MainThread:1120321 [wandb_run.py:_config_callback():1392] config_cb model/num_parameters 558850057 None
+2024-09-16 19:06:54,766 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 19:06:54,766 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 19:07:02,732 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 19:07:02,738 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 19:07:02,738 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 19:07:21,411 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
+2024-09-16 19:07:21,531 INFO    MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
+2024-09-16 19:07:21,531 INFO    MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
+2024-09-16 19:08:43,069 INFO    MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend

wandb/run-20240916_182041-ye308qxr/run-ye308qxr.wandb ADDED Viewed

Binary file (198 kB). View file