Mardiyyah commited on
Commit
5af906c
1 Parent(s): fce413e

Mlr-shared-task-ewc_stabilised

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: afl-3.0
3
+ base_model: masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - f1
8
+ - precision
9
+ - recall
10
+ - accuracy
11
+ model-index:
12
+ - name: ewc_stabilised
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # ewc_stabilised
20
+
21
+ This model is a fine-tuned version of [masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0](https://huggingface.co/masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0) on the None dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 0.1396
24
+ - F1: 0.8317
25
+ - Precision: 0.8305
26
+ - Recall: 0.8328
27
+ - Accuracy: 0.9605
28
+
29
+ ## Model description
30
+
31
+ More information needed
32
+
33
+ ## Intended uses & limitations
34
+
35
+ More information needed
36
+
37
+ ## Training and evaluation data
38
+
39
+ More information needed
40
+
41
+ ## Training procedure
42
+
43
+ ### Training hyperparameters
44
+
45
+ The following hyperparameters were used during training:
46
+ - learning_rate: 2e-05
47
+ - train_batch_size: 16
48
+ - eval_batch_size: 8
49
+ - seed: 3407
50
+ - gradient_accumulation_steps: 4
51
+ - total_train_batch_size: 64
52
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
+ - lr_scheduler_type: linear
54
+ - lr_scheduler_warmup_steps: 1000
55
+ - num_epochs: 5
56
+ - mixed_precision_training: Native AMP
57
+
58
+ ### Training results
59
+
60
+ | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Accuracy |
61
+ |:-------------:|:------:|:----:|:---------------:|:------:|:---------:|:------:|:--------:|
62
+ | 0.3184 | 0.9993 | 701 | 0.1480 | 0.7895 | 0.7950 | 0.7841 | 0.9511 |
63
+ | 0.1333 | 2.0 | 1403 | 0.1271 | 0.8195 | 0.8148 | 0.8242 | 0.9578 |
64
+ | 0.0975 | 2.9993 | 2104 | 0.1241 | 0.8289 | 0.8254 | 0.8324 | 0.9598 |
65
+ | 0.0744 | 4.0 | 2806 | 0.1293 | 0.8307 | 0.8313 | 0.8300 | 0.9603 |
66
+ | 0.0596 | 4.9964 | 3505 | 0.1396 | 0.8317 | 0.8305 | 0.8328 | 0.9605 |
67
+
68
+
69
+ ### Framework versions
70
+
71
+ - Transformers 4.43.4
72
+ - Pytorch 2.4.1+cu121
73
+ - Datasets 2.20.0
74
+ - Tokenizers 0.19.1
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0",
3
+ "adapters": {
4
+ "adapters": {},
5
+ "config_map": {},
6
+ "fusion_config_map": {},
7
+ "fusions": {}
8
+ },
9
+ "architectures": [
10
+ "XLMRobertaForTokenClassification"
11
+ ],
12
+ "attention_probs_dropout_prob": 0.1,
13
+ "bos_token_id": 0,
14
+ "classifier_dropout": null,
15
+ "eos_token_id": 2,
16
+ "gradient_checkpointing": false,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 1024,
20
+ "id2label": {
21
+ "0": "O",
22
+ "1": "B-DATE",
23
+ "2": "I-DATE",
24
+ "3": "B-PER",
25
+ "4": "I-PER",
26
+ "5": "B-ORG",
27
+ "6": "I-ORG",
28
+ "7": "B-LOC",
29
+ "8": "I-LOC"
30
+ },
31
+ "initializer_range": 0.02,
32
+ "intermediate_size": 4096,
33
+ "label2id": {
34
+ "B-DATE": 1,
35
+ "B-LOC": 7,
36
+ "B-ORG": 5,
37
+ "B-PER": 3,
38
+ "I-DATE": 2,
39
+ "I-LOC": 8,
40
+ "I-ORG": 6,
41
+ "I-PER": 4,
42
+ "O": 0
43
+ },
44
+ "layer_norm_eps": 1e-05,
45
+ "max_position_embeddings": 514,
46
+ "model_type": "xlm-roberta",
47
+ "num_attention_heads": 16,
48
+ "num_hidden_layers": 24,
49
+ "output_past": true,
50
+ "pad_token_id": 1,
51
+ "position_embedding_type": "absolute",
52
+ "torch_dtype": "float32",
53
+ "transformers_version": "4.43.4",
54
+ "type_vocab_size": 1,
55
+ "use_cache": true,
56
+ "vocab_size": 250002
57
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ad0061b6f58ea3ce0807499ebb553a49eaf1c3396678ccbb63e42b6bd4d2d9
3
+ size 2235448756
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 512,
50
+ "pad_token": "<pad>",
51
+ "sep_token": "</s>",
52
+ "tokenizer_class": "XLMRobertaTokenizer",
53
+ "unk_token": "<unk>"
54
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bac205bc4ab5a9371a0af99442a4adf7691cee6dfbb90430ddecdc3718a4fb6
3
+ size 5240
wandb/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Current SDK version is 0.17.9
2
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Configure stats pid to 1120321
3
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from /homes/amrufai/.config/wandb/settings
4
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from /nfs/production/literature/amina-mardiyyah/wandb/settings
5
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
6
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program': '<python with no main file>'}
7
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Applying login settings: {}
8
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Applying login settings: {}
9
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_init.py:_log_setup():524] Logging user logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/wandb/run-20240916_182041-ye308qxr/logs/debug.log
10
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:_log_setup():525] Logging internal logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/wandb/run-20240916_182041-ye308qxr/logs/debug-internal.log
11
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:_jupyter_setup():470] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f39d0d93210>
12
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:init():608] calling init triggers
13
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:init():615] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:init():658] starting backend
16
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:init():662] setting up manager
17
+ 2024-09-16 18:20:41,472 INFO MainThread:1120321 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-09-16 18:20:41,477 INFO MainThread:1120321 [wandb_init.py:init():670] backend started and connected
19
+ 2024-09-16 18:20:41,486 INFO MainThread:1120321 [wandb_run.py:_label_probe_notebook():1344] probe notebook
20
+ 2024-09-16 18:20:41,488 INFO MainThread:1120321 [wandb_run.py:_label_probe_notebook():1354] Unable to probe notebook: 'NoneType' object has no attribute 'get'
21
+ 2024-09-16 18:20:41,488 INFO MainThread:1120321 [wandb_init.py:init():768] updated telemetry
22
+ 2024-09-16 18:20:41,499 INFO MainThread:1120321 [wandb_init.py:init():801] communicating run to backend with 90.0 second timeout
23
+ 2024-09-16 18:20:41,903 INFO MainThread:1120321 [wandb_init.py:init():852] starting run threads in backend
24
+ 2024-09-16 18:20:42,871 INFO MainThread:1120321 [wandb_run.py:_console_start():2465] atexit reg
25
+ 2024-09-16 18:20:42,872 INFO MainThread:1120321 [wandb_run.py:_redirect():2311] redirect: wrap_raw
26
+ 2024-09-16 18:20:42,873 INFO MainThread:1120321 [wandb_run.py:_redirect():2376] Wrapping output streams.
27
+ 2024-09-16 18:20:42,873 INFO MainThread:1120321 [wandb_run.py:_redirect():2401] Redirects installed.
28
+ 2024-09-16 18:20:42,881 INFO MainThread:1120321 [wandb_init.py:init():895] run started, returning control to user process
29
+ 2024-09-16 18:20:42,888 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
30
+ 2024-09-16 18:20:42,888 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
31
+ 2024-09-16 18:22:30,477 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
32
+ 2024-09-16 18:22:30,484 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
33
+ 2024-09-16 18:22:30,484 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
34
+ 2024-09-16 18:24:04,645 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
35
+ 2024-09-16 18:24:04,650 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
36
+ 2024-09-16 18:24:04,650 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
37
+ 2024-09-16 18:24:11,088 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
38
+ 2024-09-16 18:24:11,158 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
39
+ 2024-09-16 18:24:11,158 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
40
+ 2024-09-16 18:24:14,909 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
41
+ 2024-09-16 18:24:14,916 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
42
+ 2024-09-16 18:24:14,916 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
43
+ 2024-09-16 18:25:16,270 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
44
+ 2024-09-16 18:25:23,022 INFO MainThread:1120321 [wandb_run.py:_config_callback():1392] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['XLMRobertaForTokenClassification'], 'finetuning_task': None, 'id2label': {0: 'O', 1: 'B-DATE', 2: 'I-DATE', 3: 'B-PER', 4: 'I-PER', 5: 'B-ORG', 6: 'I-ORG', 7: 'B-LOC', 8: 'I-LOC'}, 'label2id': {'B-DATE': 1, 'B-LOC': 7, 'B-ORG': 5, 'B-PER': 3, 'I-DATE': 2, 'I-LOC': 8, 'I-ORG': 6, 'I-PER': 4, 'O': 0}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0', 'transformers_version': '4.43.4', 'adapters': {'adapters': {}, 'config_map': {}, 'fusion_config_map': {}, 'fusions': {}}, 'gradient_checkpointing': False, 'model_type': 'xlm-roberta', 'output_past': True, 'vocab_size': 250002, 'hidden_size': 1024, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'hidden_act': 'gelu', 'intermediate_size': 4096, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 514, 'type_vocab_size': 1, 'initializer_range': 0.02, 'layer_norm_eps': 1e-05, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/logs', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'eval_use_gather_object': False}
45
+ 2024-09-16 18:25:23,024 INFO MainThread:1120321 [wandb_config.py:__setitem__():154] config set model/num_parameters = 558850057 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f3ae7ac3410>>
46
+ 2024-09-16 18:25:23,025 INFO MainThread:1120321 [wandb_run.py:_config_callback():1392] config_cb model/num_parameters 558850057 None
47
+ 2024-09-16 19:06:54,766 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
48
+ 2024-09-16 19:06:54,766 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
49
+ 2024-09-16 19:07:02,732 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
50
+ 2024-09-16 19:07:02,738 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
51
+ 2024-09-16 19:07:02,738 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
52
+ 2024-09-16 19:07:21,411 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
53
+ 2024-09-16 19:07:21,531 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
54
+ 2024-09-16 19:07:21,531 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
55
+ 2024-09-16 19:08:43,069 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
wandb/run-20240916_182041-ye308qxr/files/config.yaml ADDED
@@ -0,0 +1,746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.11.4
7
+ cli_version: 0.17.9
8
+ framework: huggingface
9
+ huggingface_version: 4.43.4
10
+ is_jupyter_run: true
11
+ is_kaggle_kernel: false
12
+ start_time: 1726507241
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 41
19
+ - 49
20
+ - 51
21
+ - 53
22
+ - 55
23
+ - 71
24
+ - 98
25
+ - 100
26
+ - 105
27
+ 2:
28
+ - 1
29
+ - 5
30
+ - 11
31
+ - 41
32
+ - 49
33
+ - 51
34
+ - 53
35
+ - 55
36
+ - 71
37
+ - 98
38
+ - 100
39
+ - 105
40
+ 3:
41
+ - 7
42
+ - 19
43
+ - 23
44
+ - 62
45
+ - 66
46
+ 4: 3.11.4
47
+ 5: 0.17.9
48
+ 6: 4.43.4
49
+ 8:
50
+ - 1
51
+ - 5
52
+ 9:
53
+ 1: transformers_trainer
54
+ 13: linux-x86_64
55
+ m:
56
+ - 1: train/global_step
57
+ 6:
58
+ - 3
59
+ - 1: train/loss
60
+ 5: 1
61
+ 6:
62
+ - 1
63
+ - 1: train/grad_norm
64
+ 5: 1
65
+ 6:
66
+ - 1
67
+ - 1: train/learning_rate
68
+ 5: 1
69
+ 6:
70
+ - 1
71
+ - 1: train/epoch
72
+ 5: 1
73
+ 6:
74
+ - 1
75
+ - 1: eval/loss
76
+ 5: 1
77
+ 6:
78
+ - 1
79
+ - 1: eval/f1
80
+ 5: 1
81
+ 6:
82
+ - 1
83
+ - 1: eval/precision
84
+ 5: 1
85
+ 6:
86
+ - 1
87
+ - 1: eval/recall
88
+ 5: 1
89
+ 6:
90
+ - 1
91
+ - 1: eval/accuracy
92
+ 5: 1
93
+ 6:
94
+ - 1
95
+ - 1: eval/runtime
96
+ 5: 1
97
+ 6:
98
+ - 1
99
+ - 1: eval/samples_per_second
100
+ 5: 1
101
+ 6:
102
+ - 1
103
+ - 1: eval/steps_per_second
104
+ 5: 1
105
+ 6:
106
+ - 1
107
+ return_dict:
108
+ desc: null
109
+ value: true
110
+ output_hidden_states:
111
+ desc: null
112
+ value: false
113
+ output_attentions:
114
+ desc: null
115
+ value: false
116
+ torchscript:
117
+ desc: null
118
+ value: false
119
+ torch_dtype:
120
+ desc: null
121
+ value: float32
122
+ use_bfloat16:
123
+ desc: null
124
+ value: false
125
+ tf_legacy_loss:
126
+ desc: null
127
+ value: false
128
+ pruned_heads:
129
+ desc: null
130
+ value: {}
131
+ tie_word_embeddings:
132
+ desc: null
133
+ value: true
134
+ chunk_size_feed_forward:
135
+ desc: null
136
+ value: 0
137
+ is_encoder_decoder:
138
+ desc: null
139
+ value: false
140
+ is_decoder:
141
+ desc: null
142
+ value: false
143
+ cross_attention_hidden_size:
144
+ desc: null
145
+ value: null
146
+ add_cross_attention:
147
+ desc: null
148
+ value: false
149
+ tie_encoder_decoder:
150
+ desc: null
151
+ value: false
152
+ max_length:
153
+ desc: null
154
+ value: 20
155
+ min_length:
156
+ desc: null
157
+ value: 0
158
+ do_sample:
159
+ desc: null
160
+ value: false
161
+ early_stopping:
162
+ desc: null
163
+ value: false
164
+ num_beams:
165
+ desc: null
166
+ value: 1
167
+ num_beam_groups:
168
+ desc: null
169
+ value: 1
170
+ diversity_penalty:
171
+ desc: null
172
+ value: 0.0
173
+ temperature:
174
+ desc: null
175
+ value: 1.0
176
+ top_k:
177
+ desc: null
178
+ value: 50
179
+ top_p:
180
+ desc: null
181
+ value: 1.0
182
+ typical_p:
183
+ desc: null
184
+ value: 1.0
185
+ repetition_penalty:
186
+ desc: null
187
+ value: 1.0
188
+ length_penalty:
189
+ desc: null
190
+ value: 1.0
191
+ no_repeat_ngram_size:
192
+ desc: null
193
+ value: 0
194
+ encoder_no_repeat_ngram_size:
195
+ desc: null
196
+ value: 0
197
+ bad_words_ids:
198
+ desc: null
199
+ value: null
200
+ num_return_sequences:
201
+ desc: null
202
+ value: 1
203
+ output_scores:
204
+ desc: null
205
+ value: false
206
+ return_dict_in_generate:
207
+ desc: null
208
+ value: false
209
+ forced_bos_token_id:
210
+ desc: null
211
+ value: null
212
+ forced_eos_token_id:
213
+ desc: null
214
+ value: null
215
+ remove_invalid_values:
216
+ desc: null
217
+ value: false
218
+ exponential_decay_length_penalty:
219
+ desc: null
220
+ value: null
221
+ suppress_tokens:
222
+ desc: null
223
+ value: null
224
+ begin_suppress_tokens:
225
+ desc: null
226
+ value: null
227
+ architectures:
228
+ desc: null
229
+ value:
230
+ - XLMRobertaForTokenClassification
231
+ finetuning_task:
232
+ desc: null
233
+ value: null
234
+ id2label:
235
+ desc: null
236
+ value:
237
+ '0': O
238
+ '1': B-DATE
239
+ '2': I-DATE
240
+ '3': B-PER
241
+ '4': I-PER
242
+ '5': B-ORG
243
+ '6': I-ORG
244
+ '7': B-LOC
245
+ '8': I-LOC
246
+ label2id:
247
+ desc: null
248
+ value:
249
+ B-DATE: 1
250
+ B-LOC: 7
251
+ B-ORG: 5
252
+ B-PER: 3
253
+ I-DATE: 2
254
+ I-LOC: 8
255
+ I-ORG: 6
256
+ I-PER: 4
257
+ O: 0
258
+ tokenizer_class:
259
+ desc: null
260
+ value: null
261
+ prefix:
262
+ desc: null
263
+ value: null
264
+ bos_token_id:
265
+ desc: null
266
+ value: 0
267
+ pad_token_id:
268
+ desc: null
269
+ value: 1
270
+ eos_token_id:
271
+ desc: null
272
+ value: 2
273
+ sep_token_id:
274
+ desc: null
275
+ value: null
276
+ decoder_start_token_id:
277
+ desc: null
278
+ value: null
279
+ task_specific_params:
280
+ desc: null
281
+ value: null
282
+ problem_type:
283
+ desc: null
284
+ value: null
285
+ _name_or_path:
286
+ desc: null
287
+ value: masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0
288
+ transformers_version:
289
+ desc: null
290
+ value: 4.43.4
291
+ adapters:
292
+ desc: null
293
+ value:
294
+ adapters: {}
295
+ config_map: {}
296
+ fusion_config_map: {}
297
+ fusions: {}
298
+ gradient_checkpointing:
299
+ desc: null
300
+ value: false
301
+ model_type:
302
+ desc: null
303
+ value: xlm-roberta
304
+ output_past:
305
+ desc: null
306
+ value: true
307
+ vocab_size:
308
+ desc: null
309
+ value: 250002
310
+ hidden_size:
311
+ desc: null
312
+ value: 1024
313
+ num_hidden_layers:
314
+ desc: null
315
+ value: 24
316
+ num_attention_heads:
317
+ desc: null
318
+ value: 16
319
+ hidden_act:
320
+ desc: null
321
+ value: gelu
322
+ intermediate_size:
323
+ desc: null
324
+ value: 4096
325
+ hidden_dropout_prob:
326
+ desc: null
327
+ value: 0.1
328
+ attention_probs_dropout_prob:
329
+ desc: null
330
+ value: 0.1
331
+ max_position_embeddings:
332
+ desc: null
333
+ value: 514
334
+ type_vocab_size:
335
+ desc: null
336
+ value: 1
337
+ initializer_range:
338
+ desc: null
339
+ value: 0.02
340
+ layer_norm_eps:
341
+ desc: null
342
+ value: 1.0e-05
343
+ position_embedding_type:
344
+ desc: null
345
+ value: absolute
346
+ use_cache:
347
+ desc: null
348
+ value: true
349
+ classifier_dropout:
350
+ desc: null
351
+ value: null
352
+ output_dir:
353
+ desc: null
354
+ value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised
355
+ overwrite_output_dir:
356
+ desc: null
357
+ value: false
358
+ do_train:
359
+ desc: null
360
+ value: false
361
+ do_eval:
362
+ desc: null
363
+ value: true
364
+ do_predict:
365
+ desc: null
366
+ value: false
367
+ eval_strategy:
368
+ desc: null
369
+ value: epoch
370
+ prediction_loss_only:
371
+ desc: null
372
+ value: false
373
+ per_device_train_batch_size:
374
+ desc: null
375
+ value: 16
376
+ per_device_eval_batch_size:
377
+ desc: null
378
+ value: 8
379
+ per_gpu_train_batch_size:
380
+ desc: null
381
+ value: null
382
+ per_gpu_eval_batch_size:
383
+ desc: null
384
+ value: null
385
+ gradient_accumulation_steps:
386
+ desc: null
387
+ value: 4
388
+ eval_accumulation_steps:
389
+ desc: null
390
+ value: null
391
+ eval_delay:
392
+ desc: null
393
+ value: 0
394
+ torch_empty_cache_steps:
395
+ desc: null
396
+ value: null
397
+ learning_rate:
398
+ desc: null
399
+ value: 2.0e-05
400
+ weight_decay:
401
+ desc: null
402
+ value: 0.01
403
+ adam_beta1:
404
+ desc: null
405
+ value: 0.9
406
+ adam_beta2:
407
+ desc: null
408
+ value: 0.999
409
+ adam_epsilon:
410
+ desc: null
411
+ value: 1.0e-08
412
+ max_grad_norm:
413
+ desc: null
414
+ value: 1.0
415
+ num_train_epochs:
416
+ desc: null
417
+ value: 5
418
+ max_steps:
419
+ desc: null
420
+ value: -1
421
+ lr_scheduler_type:
422
+ desc: null
423
+ value: linear
424
+ lr_scheduler_kwargs:
425
+ desc: null
426
+ value: {}
427
+ warmup_ratio:
428
+ desc: null
429
+ value: 0.0
430
+ warmup_steps:
431
+ desc: null
432
+ value: 1000
433
+ log_level:
434
+ desc: null
435
+ value: passive
436
+ log_level_replica:
437
+ desc: null
438
+ value: warning
439
+ log_on_each_node:
440
+ desc: null
441
+ value: true
442
+ logging_dir:
443
+ desc: null
444
+ value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/logs
445
+ logging_strategy:
446
+ desc: null
447
+ value: epoch
448
+ logging_first_step:
449
+ desc: null
450
+ value: false
451
+ logging_steps:
452
+ desc: null
453
+ value: 500
454
+ logging_nan_inf_filter:
455
+ desc: null
456
+ value: true
457
+ save_strategy:
458
+ desc: null
459
+ value: epoch
460
+ save_steps:
461
+ desc: null
462
+ value: 500
463
+ save_total_limit:
464
+ desc: null
465
+ value: 2
466
+ save_safetensors:
467
+ desc: null
468
+ value: true
469
+ save_on_each_node:
470
+ desc: null
471
+ value: false
472
+ save_only_model:
473
+ desc: null
474
+ value: false
475
+ restore_callback_states_from_checkpoint:
476
+ desc: null
477
+ value: false
478
+ no_cuda:
479
+ desc: null
480
+ value: false
481
+ use_cpu:
482
+ desc: null
483
+ value: false
484
+ use_mps_device:
485
+ desc: null
486
+ value: false
487
+ seed:
488
+ desc: null
489
+ value: 3407
490
+ data_seed:
491
+ desc: null
492
+ value: null
493
+ jit_mode_eval:
494
+ desc: null
495
+ value: false
496
+ use_ipex:
497
+ desc: null
498
+ value: false
499
+ bf16:
500
+ desc: null
501
+ value: false
502
+ fp16:
503
+ desc: null
504
+ value: true
505
+ fp16_opt_level:
506
+ desc: null
507
+ value: O1
508
+ half_precision_backend:
509
+ desc: null
510
+ value: auto
511
+ bf16_full_eval:
512
+ desc: null
513
+ value: false
514
+ fp16_full_eval:
515
+ desc: null
516
+ value: false
517
+ tf32:
518
+ desc: null
519
+ value: null
520
+ local_rank:
521
+ desc: null
522
+ value: 0
523
+ ddp_backend:
524
+ desc: null
525
+ value: null
526
+ tpu_num_cores:
527
+ desc: null
528
+ value: null
529
+ tpu_metrics_debug:
530
+ desc: null
531
+ value: false
532
+ debug:
533
+ desc: null
534
+ value: []
535
+ dataloader_drop_last:
536
+ desc: null
537
+ value: false
538
+ eval_steps:
539
+ desc: null
540
+ value: null
541
+ dataloader_num_workers:
542
+ desc: null
543
+ value: 0
544
+ dataloader_prefetch_factor:
545
+ desc: null
546
+ value: null
547
+ past_index:
548
+ desc: null
549
+ value: -1
550
+ run_name:
551
+ desc: null
552
+ value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised
553
+ disable_tqdm:
554
+ desc: null
555
+ value: false
556
+ remove_unused_columns:
557
+ desc: null
558
+ value: true
559
+ label_names:
560
+ desc: null
561
+ value: null
562
+ load_best_model_at_end:
563
+ desc: null
564
+ value: true
565
+ metric_for_best_model:
566
+ desc: null
567
+ value: eval_f1
568
+ greater_is_better:
569
+ desc: null
570
+ value: true
571
+ ignore_data_skip:
572
+ desc: null
573
+ value: false
574
+ fsdp:
575
+ desc: null
576
+ value: []
577
+ fsdp_min_num_params:
578
+ desc: null
579
+ value: 0
580
+ fsdp_config:
581
+ desc: null
582
+ value:
583
+ min_num_params: 0
584
+ xla: false
585
+ xla_fsdp_v2: false
586
+ xla_fsdp_grad_ckpt: false
587
+ fsdp_transformer_layer_cls_to_wrap:
588
+ desc: null
589
+ value: null
590
+ accelerator_config:
591
+ desc: null
592
+ value:
593
+ split_batches: false
594
+ dispatch_batches: null
595
+ even_batches: true
596
+ use_seedable_sampler: true
597
+ non_blocking: false
598
+ gradient_accumulation_kwargs: null
599
+ deepspeed:
600
+ desc: null
601
+ value: null
602
+ label_smoothing_factor:
603
+ desc: null
604
+ value: 0.0
605
+ optim:
606
+ desc: null
607
+ value: adamw_torch
608
+ optim_args:
609
+ desc: null
610
+ value: null
611
+ adafactor:
612
+ desc: null
613
+ value: false
614
+ group_by_length:
615
+ desc: null
616
+ value: true
617
+ length_column_name:
618
+ desc: null
619
+ value: length
620
+ report_to:
621
+ desc: null
622
+ value:
623
+ - wandb
624
+ ddp_find_unused_parameters:
625
+ desc: null
626
+ value: null
627
+ ddp_bucket_cap_mb:
628
+ desc: null
629
+ value: null
630
+ ddp_broadcast_buffers:
631
+ desc: null
632
+ value: null
633
+ dataloader_pin_memory:
634
+ desc: null
635
+ value: true
636
+ dataloader_persistent_workers:
637
+ desc: null
638
+ value: false
639
+ skip_memory_metrics:
640
+ desc: null
641
+ value: true
642
+ use_legacy_prediction_loop:
643
+ desc: null
644
+ value: false
645
+ push_to_hub:
646
+ desc: null
647
+ value: false
648
+ resume_from_checkpoint:
649
+ desc: null
650
+ value: null
651
+ hub_model_id:
652
+ desc: null
653
+ value: null
654
+ hub_strategy:
655
+ desc: null
656
+ value: every_save
657
+ hub_token:
658
+ desc: null
659
+ value: <HUB_TOKEN>
660
+ hub_private_repo:
661
+ desc: null
662
+ value: false
663
+ hub_always_push:
664
+ desc: null
665
+ value: false
666
+ gradient_checkpointing_kwargs:
667
+ desc: null
668
+ value: null
669
+ include_inputs_for_metrics:
670
+ desc: null
671
+ value: false
672
+ eval_do_concat_batches:
673
+ desc: null
674
+ value: true
675
+ fp16_backend:
676
+ desc: null
677
+ value: auto
678
+ evaluation_strategy:
679
+ desc: null
680
+ value: null
681
+ push_to_hub_model_id:
682
+ desc: null
683
+ value: null
684
+ push_to_hub_organization:
685
+ desc: null
686
+ value: null
687
+ push_to_hub_token:
688
+ desc: null
689
+ value: <PUSH_TO_HUB_TOKEN>
690
+ mp_parameters:
691
+ desc: null
692
+ value: ''
693
+ auto_find_batch_size:
694
+ desc: null
695
+ value: false
696
+ full_determinism:
697
+ desc: null
698
+ value: false
699
+ torchdynamo:
700
+ desc: null
701
+ value: null
702
+ ray_scope:
703
+ desc: null
704
+ value: last
705
+ ddp_timeout:
706
+ desc: null
707
+ value: 1800
708
+ torch_compile:
709
+ desc: null
710
+ value: false
711
+ torch_compile_backend:
712
+ desc: null
713
+ value: null
714
+ torch_compile_mode:
715
+ desc: null
716
+ value: null
717
+ dispatch_batches:
718
+ desc: null
719
+ value: null
720
+ split_batches:
721
+ desc: null
722
+ value: null
723
+ include_tokens_per_second:
724
+ desc: null
725
+ value: false
726
+ include_num_input_tokens_seen:
727
+ desc: null
728
+ value: false
729
+ neftune_noise_alpha:
730
+ desc: null
731
+ value: null
732
+ optim_target_modules:
733
+ desc: null
734
+ value: null
735
+ batch_eval_metrics:
736
+ desc: null
737
+ value: false
738
+ eval_on_start:
739
+ desc: null
740
+ value: false
741
+ eval_use_gather_object:
742
+ desc: null
743
+ value: false
744
+ model/num_parameters:
745
+ desc: null
746
+ value: 558850057
wandb/run-20240916_182041-ye308qxr/files/output.log ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /nfs/production/literature/amina-mardiyyah/envs/llm-prompt/lib/python3.11/site-packages/accelerate/accelerator.py:488: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
2
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
3
+ Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
4
+ wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
5
+ Classification Report:
6
+ precision recall f1-score support
7
+ DATE 0.76 0.70 0.73 4012
8
+ LOC 0.77 0.78 0.77 9327
9
+ ORG 0.73 0.69 0.71 7547
10
+ PER 0.87 0.88 0.88 11204
11
+ micro avg 0.80 0.78 0.79 32090
12
+ macro avg 0.78 0.76 0.77 32090
13
+ weighted avg 0.79 0.78 0.79 32090
14
+ Classification Report:
15
+ precision recall f1-score support
16
+ DATE 0.76 0.75 0.76 4012
17
+ LOC 0.82 0.79 0.80 9327
18
+ ORG 0.71 0.80 0.76 7547
19
+ PER 0.90 0.90 0.90 11204
20
+ micro avg 0.81 0.82 0.82 32090
21
+ macro avg 0.80 0.81 0.80 32090
22
+ weighted avg 0.82 0.82 0.82 32090
23
+ Classification Report:
24
+ precision recall f1-score support
25
+ DATE 0.76 0.77 0.76 4012
26
+ LOC 0.81 0.81 0.81 9327
27
+ ORG 0.78 0.78 0.78 7547
28
+ PER 0.90 0.91 0.90 11204
29
+ micro avg 0.83 0.83 0.83 32090
30
+ macro avg 0.81 0.82 0.81 32090
31
+ weighted avg 0.83 0.83 0.83 32090
32
+ Classification Report:
33
+ precision recall f1-score support
34
+ DATE 0.77 0.77 0.77 4012
35
+ LOC 0.81 0.81 0.81 9327
36
+ ORG 0.77 0.79 0.78 7547
37
+ PER 0.92 0.90 0.91 11204
38
+ micro avg 0.83 0.83 0.83 32090
39
+ macro avg 0.82 0.82 0.82 32090
40
+ weighted avg 0.83 0.83 0.83 32090
41
+ Classification Report:
42
+ precision recall f1-score support
43
+ DATE 0.77 0.77 0.77 4012
44
+ LOC 0.81 0.81 0.81 9327
45
+ ORG 0.77 0.80 0.79 7547
46
+ PER 0.91 0.90 0.90 11204
47
+ micro avg 0.83 0.83 0.83 32090
48
+ macro avg 0.82 0.82 0.82 32090
wandb/run-20240916_182041-ye308qxr/files/requirements.txt ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AutoROM.accept-rom-license==0.6.1
2
+ AutoROM==0.6.1
3
+ Farama-Notifications==0.0.4
4
+ GitPython==3.1.43
5
+ Jinja2==3.1.4
6
+ Mako==1.3.5
7
+ Markdown==3.7
8
+ MarkupSafe==2.1.5
9
+ PyYAML==6.0.2
10
+ Pygments==2.18.0
11
+ QtPy==2.4.1
12
+ SQLAlchemy==2.0.32
13
+ Send2Trash==1.8.3
14
+ Shimmy==1.3.0
15
+ Werkzeug==3.0.4
16
+ absl-py==2.1.0
17
+ accelerate==0.33.0
18
+ aiohappyeyeballs==2.3.5
19
+ aiohttp-retry==2.8.3
20
+ aiohttp==3.10.3
21
+ aiosignal==1.3.1
22
+ ale-py==0.8.1
23
+ alembic==1.13.2
24
+ amqp==5.2.0
25
+ annotated-types==0.7.0
26
+ antlr4-python3-runtime==4.9.3
27
+ anyio==4.4.0
28
+ appdirs==1.4.4
29
+ argon2-cffi-bindings==21.2.0
30
+ argon2-cffi==23.1.0
31
+ arrow==1.3.0
32
+ asttokens==2.4.1
33
+ async-generator==1.10
34
+ async-lru==2.0.4
35
+ asyncssh==2.17.0
36
+ atpublic==5.0
37
+ attrs==24.2.0
38
+ audioread==3.0.1
39
+ babel==2.16.0
40
+ beautifulsoup4==4.12.3
41
+ billiard==4.2.0
42
+ bitsandbytes==0.43.3
43
+ bleach==6.1.0
44
+ blis==0.7.11
45
+ catalogue==2.0.10
46
+ celery==5.4.0
47
+ certifi==2024.7.4
48
+ certipy==0.1.3
49
+ cffi==1.17.0
50
+ charset-normalizer==3.3.2
51
+ click-didyoumean==0.3.1
52
+ click-plugins==1.1.1
53
+ click-repl==0.3.0
54
+ click==8.1.7
55
+ cloudpathlib==0.18.1
56
+ cloudpickle==3.0.0
57
+ colorama==0.4.6
58
+ coloredlogs==15.0.1
59
+ comm==0.2.2
60
+ conda-store==2024.6.1
61
+ confection==0.1.5
62
+ configobj==5.0.8
63
+ contourpy==1.2.1
64
+ cryptography==43.0.0
65
+ cuda-python==11.8.3
66
+ cycler==0.12.1
67
+ cymem==2.0.8
68
+ datasets==2.20.0
69
+ debugpy==1.8.5
70
+ decorator==5.1.1
71
+ defusedxml==0.7.1
72
+ dictdiffer==0.9.0
73
+ dill==0.3.8
74
+ diskcache==5.6.3
75
+ distro==1.9.0
76
+ docker-pycreds==0.4.0
77
+ docstring_parser==0.16
78
+ dpath==2.2.0
79
+ dulwich==0.22.1
80
+ dvc-data==3.16.5
81
+ dvc-http==2.32.0
82
+ dvc-objects==5.1.0
83
+ dvc-render==1.0.2
84
+ dvc-studio-client==0.21.0
85
+ dvc-task==0.4.0
86
+ dvc==3.55.2
87
+ en-core-web-lg==3.7.1
88
+ en-core-web-sm==3.7.1
89
+ entrypoints==0.4
90
+ evaluate==0.4.2
91
+ executing==2.0.1
92
+ fastjsonschema==2.20.0
93
+ filelock==3.15.4
94
+ flatbuffers==24.3.25
95
+ flatten-dict==0.4.2
96
+ flufl.lock==8.1.0
97
+ fonttools==4.53.1
98
+ fqdn==1.5.1
99
+ frozenlist==1.4.1
100
+ fsspec==2024.5.0
101
+ funcy==2.0
102
+ gitdb==4.0.11
103
+ grandalf==0.8
104
+ greenlet==3.0.3
105
+ grpcio==1.66.0
106
+ gto==1.7.1
107
+ gymnasium==0.29.1
108
+ h11==0.14.0
109
+ httpcore==1.0.5
110
+ httpx==0.27.0
111
+ huggingface-hub==0.24.5
112
+ humanfriendly==10.0
113
+ hydra-core==1.3.2
114
+ idna==3.7
115
+ importlib_resources==6.4.5
116
+ ipykernel==6.29.5
117
+ ipython==8.26.0
118
+ ipywidgets==8.1.3
119
+ isoduration==20.11.0
120
+ iterative-telemetry==0.0.8
121
+ jedi==0.19.1
122
+ joblib==1.4.2
123
+ json5==0.9.25
124
+ jsonpointer==3.0.0
125
+ jsonschema-specifications==2023.12.1
126
+ jsonschema==4.23.0
127
+ jupyter-console==6.6.3
128
+ jupyter-events==0.10.0
129
+ jupyter-launcher-shortcuts==4.0.3
130
+ jupyter-lsp==2.2.5
131
+ jupyter-telemetry==0.1.0
132
+ jupyter==1.0.0
133
+ jupyter_client==8.6.2
134
+ jupyter_core==5.7.2
135
+ jupyter_server==2.14.2
136
+ jupyter_server_terminals==0.5.3
137
+ jupyterhub==1.5.1
138
+ jupyterlab-conda-store==2024.6.1
139
+ jupyterlab==4.2.4
140
+ jupyterlab_pygments==0.3.0
141
+ jupyterlab_server==2.27.3
142
+ jupyterlab_widgets==3.0.11
143
+ kiwisolver==1.4.5
144
+ kombu==5.4.0
145
+ lab==8.2
146
+ langcodes==3.4.0
147
+ language_data==1.2.0
148
+ lazy_loader==0.4
149
+ librosa==0.10.2.post1
150
+ llvmlite==0.43.0
151
+ lxml==5.3.0
152
+ marisa-trie==1.2.0
153
+ markdown-it-py==3.0.0
154
+ matplotlib-inline==0.1.7
155
+ matplotlib==3.9.1.post1
156
+ mdurl==0.1.2
157
+ mistune==3.0.2
158
+ mpmath==1.3.0
159
+ msgpack==1.1.0
160
+ multidict==6.0.5
161
+ multiprocess==0.70.16
162
+ murmurhash==1.0.10
163
+ nb_conda_store_kernels==0.1.5
164
+ nbclient==0.10.0
165
+ nbconvert==7.16.4
166
+ nbformat==5.10.4
167
+ nest-asyncio==1.6.0
168
+ networkx==3.3
169
+ nodejs==0.1.1
170
+ notebook==7.2.1
171
+ notebook_shim==0.2.4
172
+ numba==0.60.0
173
+ numpy==1.26.4
174
+ nvidia-cublas-cu12==12.1.3.1
175
+ nvidia-cuda-cupti-cu12==12.1.105
176
+ nvidia-cuda-nvrtc-cu12==12.1.105
177
+ nvidia-cuda-runtime-cu12==12.1.105
178
+ nvidia-cudnn-cu12==8.9.2.26
179
+ nvidia-cudnn-cu12==9.1.0.70
180
+ nvidia-cufft-cu12==11.0.2.54
181
+ nvidia-curand-cu12==10.3.2.106
182
+ nvidia-cusolver-cu12==11.4.5.107
183
+ nvidia-cusparse-cu12==12.1.0.106
184
+ nvidia-nccl-cu12==2.19.3
185
+ nvidia-nccl-cu12==2.20.5
186
+ nvidia-nvjitlink-cu12==12.6.20
187
+ nvidia-nvtx-cu12==12.1.105
188
+ oauthlib==3.2.2
189
+ omegaconf==2.3.0
190
+ onnx==1.16.2
191
+ onnxruntime==1.19.0
192
+ opencv-python==4.10.0.84
193
+ optimum==1.21.4
194
+ optional-django==0.1.0
195
+ orjson==3.10.7
196
+ overrides==7.7.0
197
+ packaging==24.1
198
+ pamela==1.2.0
199
+ pandas==2.2.2
200
+ pandocfilters==1.5.1
201
+ parso==0.8.4
202
+ pathspec==0.12.1
203
+ peft==0.12.0
204
+ pexpect==4.9.0
205
+ pillow==10.4.0
206
+ pip==23.1.2
207
+ platformdirs==3.11.0
208
+ pooch==1.8.2
209
+ preshed==3.0.9
210
+ prometheus_client==0.20.0
211
+ prompt_toolkit==3.0.47
212
+ protobuf==5.27.3
213
+ psutil==6.0.0
214
+ ptyprocess==0.7.0
215
+ pure_eval==0.2.3
216
+ pyOpenSSL==24.2.1
217
+ pyarrow-hotfix==0.6
218
+ pyarrow==17.0.0
219
+ pycparser==2.22
220
+ pydantic==2.8.2
221
+ pydantic_core==2.20.1
222
+ pydot==3.0.1
223
+ pygame==2.6.0
224
+ pygit2==1.15.1
225
+ pygtrie==2.5.0
226
+ pyparsing==3.1.2
227
+ python-dateutil==2.9.0.post0
228
+ python-dotenv==1.0.1
229
+ python-json-logger==2.0.7
230
+ pytz==2024.1
231
+ pyzmq==26.1.0
232
+ qtconsole==5.5.2
233
+ referencing==0.35.1
234
+ regex==2024.7.24
235
+ requests==2.32.3
236
+ rfc3339-validator==0.1.4
237
+ rfc3986-validator==0.1.1
238
+ rich==13.7.1
239
+ rpds-py==0.20.0
240
+ ruamel.yaml.clib==0.2.8
241
+ ruamel.yaml==0.18.6
242
+ safetensors==0.4.4
243
+ scikit-learn==1.5.1
244
+ scipy==1.14.0
245
+ scmrepo==3.3.7
246
+ seaborn==0.13.2
247
+ semver==3.0.2
248
+ sentencepiece==0.2.0
249
+ sentry-sdk==2.14.0
250
+ seqeval==1.2.2
251
+ setproctitle==1.3.3
252
+ setuptools==65.5.0
253
+ shellingham==1.5.4
254
+ shortuuid==1.0.13
255
+ shtab==1.7.1
256
+ simplejson==3.19.2
257
+ six==1.16.0
258
+ smart-open==7.0.4
259
+ smmap==5.0.1
260
+ sniffio==1.3.1
261
+ soundfile==0.12.1
262
+ soupsieve==2.5
263
+ sox==1.5.0
264
+ soxr==0.5.0.post1
265
+ spacy-legacy==3.0.12
266
+ spacy-loggers==1.0.5
267
+ spacy==3.7.5
268
+ sqltrie==0.11.1
269
+ srsly==2.4.8
270
+ stable_baselines3==2.3.2
271
+ stack-data==0.6.3
272
+ sympy==1.13.2
273
+ tabulate==0.9.0
274
+ tensorboard-data-server==0.7.2
275
+ tensorboard==2.17.1
276
+ terminado==0.18.1
277
+ thinc==8.2.5
278
+ threadpoolctl==3.5.0
279
+ tinycss2==1.3.0
280
+ tokenizers==0.19.1
281
+ tomlkit==0.13.2
282
+ torch==2.4.1
283
+ torchaudio==2.4.1
284
+ torchvision==0.19.1
285
+ tornado==6.4.1
286
+ tqdm==4.66.5
287
+ traitlets==5.14.3
288
+ transformers==4.43.4
289
+ triton==3.0.0
290
+ trl==0.9.6
291
+ txt2tags==3.9
292
+ typer==0.12.3
293
+ types-python-dateutil==2.9.0.20240316
294
+ typing_extensions==4.12.2
295
+ tyro==0.8.6
296
+ tzdata==2024.1
297
+ uri-template==1.3.0
298
+ urllib3==2.2.2
299
+ vine==5.1.0
300
+ voluptuous==0.15.2
301
+ wandb==0.17.9
302
+ wasabi==1.1.3
303
+ wcwidth==0.2.13
304
+ weasel==0.4.1
305
+ webcolors==24.8.0
306
+ webencodings==0.5.1
307
+ websocket-client==1.8.0
308
+ widgetsnbextension==4.0.11
309
+ wrapt==1.16.0
310
+ xlrd==2.0.1
311
+ xxhash==3.4.1
312
+ yarl==1.9.4
313
+ zc.lockfile==3.0.post1
wandb/run-20240916_182041-ye308qxr/files/wandb-metadata.json ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28",
3
+ "python": "3.11.4",
4
+ "heartbeatAt": "2024-09-16T17:20:42.035382",
5
+ "startedAt": "2024-09-16T17:20:41.449030",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [],
9
+ "state": "running",
10
+ "program": "<python with no main file>",
11
+ "codePathLocal": null,
12
+ "host": "codon-gpu-014.ebi.ac.uk",
13
+ "username": "amrufai",
14
+ "executable": "/nfs/production/literature/amina-mardiyyah/envs/llm-prompt/bin/python",
15
+ "cpu_count": 48,
16
+ "cpu_count_logical": 48,
17
+ "cpu_freq": {
18
+ "current": 2869.226979166667,
19
+ "min": 0.0,
20
+ "max": 0.0
21
+ },
22
+ "cpu_freq_per_core": [
23
+ {
24
+ "current": 2800.0,
25
+ "min": 0.0,
26
+ "max": 0.0
27
+ },
28
+ {
29
+ "current": 3251.96,
30
+ "min": 0.0,
31
+ "max": 0.0
32
+ },
33
+ {
34
+ "current": 2800.0,
35
+ "min": 0.0,
36
+ "max": 0.0
37
+ },
38
+ {
39
+ "current": 3246.317,
40
+ "min": 0.0,
41
+ "max": 0.0
42
+ },
43
+ {
44
+ "current": 2800.0,
45
+ "min": 0.0,
46
+ "max": 0.0
47
+ },
48
+ {
49
+ "current": 3260.095,
50
+ "min": 0.0,
51
+ "max": 0.0
52
+ },
53
+ {
54
+ "current": 2800.0,
55
+ "min": 0.0,
56
+ "max": 0.0
57
+ },
58
+ {
59
+ "current": 3254.547,
60
+ "min": 0.0,
61
+ "max": 0.0
62
+ },
63
+ {
64
+ "current": 2800.0,
65
+ "min": 0.0,
66
+ "max": 0.0
67
+ },
68
+ {
69
+ "current": 3242.782,
70
+ "min": 0.0,
71
+ "max": 0.0
72
+ },
73
+ {
74
+ "current": 2800.0,
75
+ "min": 0.0,
76
+ "max": 0.0
77
+ },
78
+ {
79
+ "current": 3219.073,
80
+ "min": 0.0,
81
+ "max": 0.0
82
+ },
83
+ {
84
+ "current": 2800.0,
85
+ "min": 0.0,
86
+ "max": 0.0
87
+ },
88
+ {
89
+ "current": 3248.226,
90
+ "min": 0.0,
91
+ "max": 0.0
92
+ },
93
+ {
94
+ "current": 2800.0,
95
+ "min": 0.0,
96
+ "max": 0.0
97
+ },
98
+ {
99
+ "current": 3244.463,
100
+ "min": 0.0,
101
+ "max": 0.0
102
+ },
103
+ {
104
+ "current": 2800.0,
105
+ "min": 0.0,
106
+ "max": 0.0
107
+ },
108
+ {
109
+ "current": 3242.955,
110
+ "min": 0.0,
111
+ "max": 0.0
112
+ },
113
+ {
114
+ "current": 803.795,
115
+ "min": 0.0,
116
+ "max": 0.0
117
+ },
118
+ {
119
+ "current": 3240.712,
120
+ "min": 0.0,
121
+ "max": 0.0
122
+ },
123
+ {
124
+ "current": 2800.0,
125
+ "min": 0.0,
126
+ "max": 0.0
127
+ },
128
+ {
129
+ "current": 3232.618,
130
+ "min": 0.0,
131
+ "max": 0.0
132
+ },
133
+ {
134
+ "current": 2800.0,
135
+ "min": 0.0,
136
+ "max": 0.0
137
+ },
138
+ {
139
+ "current": 3235.352,
140
+ "min": 0.0,
141
+ "max": 0.0
142
+ },
143
+ {
144
+ "current": 2800.0,
145
+ "min": 0.0,
146
+ "max": 0.0
147
+ },
148
+ {
149
+ "current": 2800.0,
150
+ "min": 0.0,
151
+ "max": 0.0
152
+ },
153
+ {
154
+ "current": 2800.0,
155
+ "min": 0.0,
156
+ "max": 0.0
157
+ },
158
+ {
159
+ "current": 2800.0,
160
+ "min": 0.0,
161
+ "max": 0.0
162
+ },
163
+ {
164
+ "current": 2800.0,
165
+ "min": 0.0,
166
+ "max": 0.0
167
+ },
168
+ {
169
+ "current": 2800.0,
170
+ "min": 0.0,
171
+ "max": 0.0
172
+ },
173
+ {
174
+ "current": 2800.0,
175
+ "min": 0.0,
176
+ "max": 0.0
177
+ },
178
+ {
179
+ "current": 2800.0,
180
+ "min": 0.0,
181
+ "max": 0.0
182
+ },
183
+ {
184
+ "current": 2800.0,
185
+ "min": 0.0,
186
+ "max": 0.0
187
+ },
188
+ {
189
+ "current": 2800.0,
190
+ "min": 0.0,
191
+ "max": 0.0
192
+ },
193
+ {
194
+ "current": 2800.0,
195
+ "min": 0.0,
196
+ "max": 0.0
197
+ },
198
+ {
199
+ "current": 2800.0,
200
+ "min": 0.0,
201
+ "max": 0.0
202
+ },
203
+ {
204
+ "current": 2800.0,
205
+ "min": 0.0,
206
+ "max": 0.0
207
+ },
208
+ {
209
+ "current": 2800.0,
210
+ "min": 0.0,
211
+ "max": 0.0
212
+ },
213
+ {
214
+ "current": 2800.0,
215
+ "min": 0.0,
216
+ "max": 0.0
217
+ },
218
+ {
219
+ "current": 2800.0,
220
+ "min": 0.0,
221
+ "max": 0.0
222
+ },
223
+ {
224
+ "current": 2800.0,
225
+ "min": 0.0,
226
+ "max": 0.0
227
+ },
228
+ {
229
+ "current": 2800.0,
230
+ "min": 0.0,
231
+ "max": 0.0
232
+ },
233
+ {
234
+ "current": 2800.0,
235
+ "min": 0.0,
236
+ "max": 0.0
237
+ },
238
+ {
239
+ "current": 2800.0,
240
+ "min": 0.0,
241
+ "max": 0.0
242
+ },
243
+ {
244
+ "current": 2800.0,
245
+ "min": 0.0,
246
+ "max": 0.0
247
+ },
248
+ {
249
+ "current": 2800.0,
250
+ "min": 0.0,
251
+ "max": 0.0
252
+ },
253
+ {
254
+ "current": 2800.0,
255
+ "min": 0.0,
256
+ "max": 0.0
257
+ },
258
+ {
259
+ "current": 2800.0,
260
+ "min": 0.0,
261
+ "max": 0.0
262
+ }
263
+ ],
264
+ "disk": {
265
+ "/": {
266
+ "total": 47.760292053222656,
267
+ "used": 15.848091125488281
268
+ }
269
+ },
270
+ "gpu": "NVIDIA A100 80GB PCIe",
271
+ "gpu_count": 1,
272
+ "gpu_devices": [
273
+ {
274
+ "name": "NVIDIA A100 80GB PCIe",
275
+ "memory_total": 85899345920
276
+ }
277
+ ],
278
+ "memory": {
279
+ "total": 502.83758544921875
280
+ }
281
+ }
wandb/run-20240916_182041-ye308qxr/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 0.0596, "train/grad_norm": 2.547844409942627, "train/learning_rate": 2.3952095808383236e-08, "train/epoch": 4.996436208125445, "train/global_step": 3505, "_timestamp": 1726510014.761806, "_runtime": 2773.28347492218, "_step": 10, "eval/loss": 0.13957327604293823, "eval/f1": 0.8316917954223653, "eval/precision": 0.8305416576027844, "eval/recall": 0.8328451230913057, "eval/accuracy": 0.9604729231827337, "eval/runtime": 75.264, "eval/samples_per_second": 149.102, "eval/steps_per_second": 18.641, "train_runtime": 2491.7461, "train_samples_per_second": 90.069, "train_steps_per_second": 1.407, "total_flos": 4.319274218928017e+16, "train_loss": 0.1366414232022752}
wandb/run-20240916_182041-ye308qxr/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240916_182041-ye308qxr/logs/debug.log ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Current SDK version is 0.17.9
2
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Configure stats pid to 1120321
3
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from /homes/amrufai/.config/wandb/settings
4
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from /nfs/production/literature/amina-mardiyyah/wandb/settings
5
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
6
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program': '<python with no main file>'}
7
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Applying login settings: {}
8
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_setup.py:_flush():77] Applying login settings: {}
9
+ 2024-09-16 18:20:41,469 INFO MainThread:1120321 [wandb_init.py:_log_setup():524] Logging user logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/wandb/run-20240916_182041-ye308qxr/logs/debug.log
10
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:_log_setup():525] Logging internal logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/wandb/run-20240916_182041-ye308qxr/logs/debug-internal.log
11
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:_jupyter_setup():470] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f39d0d93210>
12
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:init():608] calling init triggers
13
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:init():615] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:init():658] starting backend
16
+ 2024-09-16 18:20:41,470 INFO MainThread:1120321 [wandb_init.py:init():662] setting up manager
17
+ 2024-09-16 18:20:41,472 INFO MainThread:1120321 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-09-16 18:20:41,477 INFO MainThread:1120321 [wandb_init.py:init():670] backend started and connected
19
+ 2024-09-16 18:20:41,486 INFO MainThread:1120321 [wandb_run.py:_label_probe_notebook():1344] probe notebook
20
+ 2024-09-16 18:20:41,488 INFO MainThread:1120321 [wandb_run.py:_label_probe_notebook():1354] Unable to probe notebook: 'NoneType' object has no attribute 'get'
21
+ 2024-09-16 18:20:41,488 INFO MainThread:1120321 [wandb_init.py:init():768] updated telemetry
22
+ 2024-09-16 18:20:41,499 INFO MainThread:1120321 [wandb_init.py:init():801] communicating run to backend with 90.0 second timeout
23
+ 2024-09-16 18:20:41,903 INFO MainThread:1120321 [wandb_init.py:init():852] starting run threads in backend
24
+ 2024-09-16 18:20:42,871 INFO MainThread:1120321 [wandb_run.py:_console_start():2465] atexit reg
25
+ 2024-09-16 18:20:42,872 INFO MainThread:1120321 [wandb_run.py:_redirect():2311] redirect: wrap_raw
26
+ 2024-09-16 18:20:42,873 INFO MainThread:1120321 [wandb_run.py:_redirect():2376] Wrapping output streams.
27
+ 2024-09-16 18:20:42,873 INFO MainThread:1120321 [wandb_run.py:_redirect():2401] Redirects installed.
28
+ 2024-09-16 18:20:42,881 INFO MainThread:1120321 [wandb_init.py:init():895] run started, returning control to user process
29
+ 2024-09-16 18:20:42,888 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
30
+ 2024-09-16 18:20:42,888 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
31
+ 2024-09-16 18:22:30,477 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
32
+ 2024-09-16 18:22:30,484 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
33
+ 2024-09-16 18:22:30,484 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
34
+ 2024-09-16 18:24:04,645 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
35
+ 2024-09-16 18:24:04,650 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
36
+ 2024-09-16 18:24:04,650 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
37
+ 2024-09-16 18:24:11,088 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
38
+ 2024-09-16 18:24:11,158 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
39
+ 2024-09-16 18:24:11,158 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
40
+ 2024-09-16 18:24:14,909 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
41
+ 2024-09-16 18:24:14,916 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
42
+ 2024-09-16 18:24:14,916 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
43
+ 2024-09-16 18:25:16,270 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
44
+ 2024-09-16 18:25:23,022 INFO MainThread:1120321 [wandb_run.py:_config_callback():1392] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['XLMRobertaForTokenClassification'], 'finetuning_task': None, 'id2label': {0: 'O', 1: 'B-DATE', 2: 'I-DATE', 3: 'B-PER', 4: 'I-PER', 5: 'B-ORG', 6: 'I-ORG', 7: 'B-LOC', 8: 'I-LOC'}, 'label2id': {'B-DATE': 1, 'B-LOC': 7, 'B-ORG': 5, 'B-PER': 3, 'I-DATE': 2, 'I-LOC': 8, 'I-ORG': 6, 'I-PER': 4, 'O': 0}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0', 'transformers_version': '4.43.4', 'adapters': {'adapters': {}, 'config_map': {}, 'fusion_config_map': {}, 'fusions': {}}, 'gradient_checkpointing': False, 'model_type': 'xlm-roberta', 'output_past': True, 'vocab_size': 250002, 'hidden_size': 1024, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'hidden_act': 'gelu', 'intermediate_size': 4096, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 514, 'type_vocab_size': 1, 'initializer_range': 0.02, 'layer_norm_eps': 1e-05, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised/logs', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'eval_use_gather_object': False}
45
+ 2024-09-16 18:25:23,024 INFO MainThread:1120321 [wandb_config.py:__setitem__():154] config set model/num_parameters = 558850057 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f3ae7ac3410>>
46
+ 2024-09-16 18:25:23,025 INFO MainThread:1120321 [wandb_run.py:_config_callback():1392] config_cb model/num_parameters 558850057 None
47
+ 2024-09-16 19:06:54,766 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
48
+ 2024-09-16 19:06:54,766 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
49
+ 2024-09-16 19:07:02,732 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
50
+ 2024-09-16 19:07:02,738 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
51
+ 2024-09-16 19:07:02,738 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
52
+ 2024-09-16 19:07:21,411 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
53
+ 2024-09-16 19:07:21,531 INFO MainThread:1120321 [jupyter.py:save_ipynb():372] not saving jupyter notebook
54
+ 2024-09-16 19:07:21,531 INFO MainThread:1120321 [wandb_init.py:_pause_backend():435] pausing backend
55
+ 2024-09-16 19:08:43,069 INFO MainThread:1120321 [wandb_init.py:_resume_backend():440] resuming backend
wandb/run-20240916_182041-ye308qxr/run-ye308qxr.wandb ADDED
Binary file (198 kB). View file