MaroueneA commited on
Commit
139e538
1 Parent(s): df619ae

Initial commit of my Gradio NLP app

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +81 -0
  2. models/UBC-NLP/ARBERT/config.json +33 -0
  3. models/UBC-NLP/ARBERT/model.safetensors +3 -0
  4. models/UBC-NLP/ARBERT/special_tokens_map.json +7 -0
  5. models/UBC-NLP/ARBERT/tokenizer.json +0 -0
  6. models/UBC-NLP/ARBERT/tokenizer_config.json +57 -0
  7. models/UBC-NLP/ARBERT/vocab.txt +0 -0
  8. models/UBC-NLP/MARBERT/config.json +33 -0
  9. models/UBC-NLP/MARBERT/model.safetensors +3 -0
  10. models/UBC-NLP/MARBERT/special_tokens_map.json +7 -0
  11. models/UBC-NLP/MARBERT/tokenizer.json +0 -0
  12. models/UBC-NLP/MARBERT/tokenizer_config.json +57 -0
  13. models/UBC-NLP/MARBERT/vocab.txt +0 -0
  14. models/bert-offensive/checkpoint-1000/config.json +27 -0
  15. models/bert-offensive/checkpoint-1000/model.safetensors +3 -0
  16. models/bert-offensive/checkpoint-1000/optimizer.pt +3 -0
  17. models/bert-offensive/checkpoint-1000/rng_state.pth +3 -0
  18. models/bert-offensive/checkpoint-1000/scheduler.pt +3 -0
  19. models/bert-offensive/checkpoint-1000/special_tokens_map.json +7 -0
  20. models/bert-offensive/checkpoint-1000/tokenizer.json +0 -0
  21. models/bert-offensive/checkpoint-1000/tokenizer_config.json +55 -0
  22. models/bert-offensive/checkpoint-1000/trainer_state.json +35 -0
  23. models/bert-offensive/checkpoint-1000/training_args.bin +3 -0
  24. models/bert-offensive/checkpoint-1000/vocab.txt +0 -0
  25. models/bert-offensive/checkpoint-1500/config.json +27 -0
  26. models/bert-offensive/checkpoint-1500/model.safetensors +3 -0
  27. models/bert-offensive/checkpoint-1500/optimizer.pt +3 -0
  28. models/bert-offensive/checkpoint-1500/rng_state.pth +3 -0
  29. models/bert-offensive/checkpoint-1500/scheduler.pt +3 -0
  30. models/bert-offensive/checkpoint-1500/special_tokens_map.json +7 -0
  31. models/bert-offensive/checkpoint-1500/tokenizer.json +0 -0
  32. models/bert-offensive/checkpoint-1500/tokenizer_config.json +55 -0
  33. models/bert-offensive/checkpoint-1500/trainer_state.json +54 -0
  34. models/bert-offensive/checkpoint-1500/training_args.bin +3 -0
  35. models/bert-offensive/checkpoint-1500/vocab.txt +0 -0
  36. models/bert-offensive/checkpoint-2000/config.json +27 -0
  37. models/bert-offensive/checkpoint-2000/model.safetensors +3 -0
  38. models/bert-offensive/checkpoint-2000/optimizer.pt +3 -0
  39. models/bert-offensive/checkpoint-2000/rng_state.pth +3 -0
  40. models/bert-offensive/checkpoint-2000/scheduler.pt +3 -0
  41. models/bert-offensive/checkpoint-2000/special_tokens_map.json +7 -0
  42. models/bert-offensive/checkpoint-2000/tokenizer.json +0 -0
  43. models/bert-offensive/checkpoint-2000/tokenizer_config.json +55 -0
  44. models/bert-offensive/checkpoint-2000/trainer_state.json +61 -0
  45. models/bert-offensive/checkpoint-2000/training_args.bin +3 -0
  46. models/bert-offensive/checkpoint-2000/vocab.txt +0 -0
  47. models/bert-offensive/checkpoint-2500/config.json +27 -0
  48. models/bert-offensive/checkpoint-2500/model.safetensors +3 -0
  49. models/bert-offensive/checkpoint-2500/optimizer.pt +3 -0
  50. models/bert-offensive/checkpoint-2500/rng_state.pth +3 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
3
+ import torch
4
+
5
+ # Load the saved models and tokenizers
6
+ model_roberta = AutoModelForSequenceClassification.from_pretrained("./models/roberta-base-offensive")
7
+ tokenizer_roberta = AutoTokenizer.from_pretrained("./models/roberta-base-offensive")
8
+
9
+ model_distilbert = AutoModelForSequenceClassification.from_pretrained("./models/distilbert-base-uncased-offensive")
10
+ tokenizer_distilbert = AutoTokenizer.from_pretrained("./models/distilbert-base-uncased-offensive")
11
+
12
+ model_deberta = AutoModelForSequenceClassification.from_pretrained("./models/deberta-offensive")
13
+ tokenizer_deberta = AutoTokenizer.from_pretrained("./models/deberta-offensive")
14
+
15
+ model_bert = AutoModelForSequenceClassification.from_pretrained("./models/bert-offensive")
16
+ tokenizer_bert = AutoTokenizer.from_pretrained("./models/bert-offensive")
17
+
18
+ # Arabic saved Models and tokenizers
19
+ model_arbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/ARBERT")
20
+ tokenizer_arbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/ARBERT")
21
+
22
+ model_marbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/MARBERT")
23
+ tokenizer_marbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/MARBERT")
24
+
25
+ def predict(tweet, model_choice):
26
+ if model_choice == "RoBERTa":
27
+ model = model_roberta
28
+ tokenizer = tokenizer_roberta
29
+ elif model_choice == "DistilBERT":
30
+ model = model_distilbert
31
+ tokenizer = tokenizer_distilbert
32
+ elif model_choice == "ARBERT":
33
+ model = model_arbert
34
+ tokenizer = tokenizer_arbert
35
+ elif model_choice == "MARBERT":
36
+ model = model_marbert
37
+ tokenizer = tokenizer_marbert
38
+ elif model_choice == "DeBERTa":
39
+ model = model_deberta
40
+ tokenizer = tokenizer_deberta
41
+ elif model_choice == "BERT":
42
+ model = model_bert
43
+ tokenizer = tokenizer_bert
44
+ else:
45
+ return "Model not selected", "Please select a model."
46
+
47
+ encoded_input = tokenizer.encode(tweet, return_tensors='pt', truncation=True, max_length=512, padding=True)
48
+ with torch.no_grad():
49
+ output = model(encoded_input)
50
+ logits = output.logits
51
+ probabilities = torch.softmax(logits, dim=-1)
52
+ prediction_index = probabilities.argmax().item()
53
+
54
+ prediction_map = {0: "Not Offensive", 1: "Offensive"}
55
+ prediction = prediction_map[prediction_index]
56
+ confidence = probabilities[0, prediction_index].item()
57
+
58
+ return prediction, f"Confidence: {confidence:.4f}"
59
+
60
+ def app_interface():
61
+ with gr.Blocks() as app:
62
+ gr.Markdown("## Offensive Language Detection")
63
+ gr.Markdown("### Instructions:")
64
+ gr.Markdown("1. Select the language of the text.\n2. Choose a model corresponding to the selected language:\n - For **English**: BERT, DeBERTa, RoBERTa, or DistilBERT\n - For **Tunisian Arabic**: ARBERT or MARBERT")
65
+ with gr.Row():
66
+ language = gr.Radio(["English", "Tunisian Arabic"], label="Choose Language")
67
+ with gr.Row():
68
+ model_choice = gr.Dropdown(["RoBERTa", "DistilBERT", "ARBERT", "MARBERT", "DeBERTa", "BERT"], label="Choose Model")
69
+ with gr.Row():
70
+ tweet = gr.Textbox(lines=4, placeholder="Enter your text here...", label="Text")
71
+ submit_btn = gr.Button("Predict")
72
+ with gr.Row():
73
+ prediction = gr.Textbox(label="Prediction")
74
+ confidence = gr.Textbox(label="Confidence")
75
+
76
+ submit_btn.click(fn=predict, inputs=[tweet, model_choice], outputs=[prediction, confidence])
77
+
78
+ return app
79
+
80
+ app = app_interface()
81
+ app.launch()
models/UBC-NLP/ARBERT/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "UBC-NLP/ARBERT",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "position_embedding_type": "absolute",
27
+ "problem_type": "single_label_classification",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.39.2",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 100000
33
+ }
models/UBC-NLP/ARBERT/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5721a08143fe631ff017403f0a526b5d570444ebab9af55869f50815112efd5
3
+ size 651395072
models/UBC-NLP/ARBERT/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
models/UBC-NLP/ARBERT/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/UBC-NLP/ARBERT/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
models/UBC-NLP/ARBERT/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/UBC-NLP/MARBERT/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "UBC-NLP/MARBERT",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "position_embedding_type": "absolute",
27
+ "problem_type": "single_label_classification",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.39.2",
30
+ "type_vocab_size": 2,
31
+ "use_cache": true,
32
+ "vocab_size": 100000
33
+ }
models/UBC-NLP/MARBERT/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:874e85391a27658550ccfde41216bf0d1bf732472e4461d8ba7876a4dcd1b2c6
3
+ size 651395072
models/UBC-NLP/MARBERT/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
models/UBC-NLP/MARBERT/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/UBC-NLP/MARBERT/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
models/UBC-NLP/MARBERT/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/bert-offensive/checkpoint-1000/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.39.3",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
models/bert-offensive/checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd658ae002182ad99ffbb9982d5d85515c0c49949eac2d6d4eb86c479d67955b
3
+ size 437958648
models/bert-offensive/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c5eb3685348006d842a33f8c23a222816e6efa7e3dbf1212516a44add594c0
3
+ size 876038394
models/bert-offensive/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ed3ecad87f77a0ed60753e4ffd969dbef607419cab5d7c3471fc6b16360766
3
+ size 14244
models/bert-offensive/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cdc454fd78e2054130f997d6bf748bc81368c748607a351ea7de15a408d26e6
3
+ size 1064
models/bert-offensive/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
models/bert-offensive/checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/bert-offensive/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
models/bert-offensive/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.6711409395973155,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.34,
13
+ "grad_norm": 3.309788227081299,
14
+ "learning_rate": 4.440715883668904e-05,
15
+ "loss": 0.5291,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.67,
20
+ "grad_norm": 8.38113784790039,
21
+ "learning_rate": 3.881431767337807e-05,
22
+ "loss": 0.4834,
23
+ "step": 1000
24
+ }
25
+ ],
26
+ "logging_steps": 500,
27
+ "max_steps": 4470,
28
+ "num_input_tokens_seen": 0,
29
+ "num_train_epochs": 3,
30
+ "save_steps": 500,
31
+ "total_flos": 423493688042880.0,
32
+ "train_batch_size": 8,
33
+ "trial_name": null,
34
+ "trial_params": null
35
+ }
models/bert-offensive/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff52faf0d3a27635d14f511f07bc80871b57c3a7826358862dda78696619e8f5
3
+ size 4856
models/bert-offensive/checkpoint-1000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/bert-offensive/checkpoint-1500/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.39.3",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
models/bert-offensive/checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b13c8461e29b1c84d3992da1258c4e67c9df1faa1b7d2a6d3866c8b4ab31264e
3
+ size 437958648
models/bert-offensive/checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a5c46aedd4909a23c89b0d3396dc7938f781b6289de190c8bb1fa275715998e
3
+ size 876038394
models/bert-offensive/checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a5e93a639b18c5080b3206380db8bdf3c9bf762e376ba68470d9bd63251d88
3
+ size 14244
models/bert-offensive/checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd70d973811a00199980200e5162073c3f4bfdfc513c94464ee46ef32af9473
3
+ size 1064
models/bert-offensive/checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
models/bert-offensive/checkpoint-1500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/bert-offensive/checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
models/bert-offensive/checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0067114093959733,
5
+ "eval_steps": 500,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.34,
13
+ "grad_norm": 3.309788227081299,
14
+ "learning_rate": 4.440715883668904e-05,
15
+ "loss": 0.5291,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.67,
20
+ "grad_norm": 8.38113784790039,
21
+ "learning_rate": 3.881431767337807e-05,
22
+ "loss": 0.4834,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.8244186046511628,
28
+ "eval_f1": 0.700990099009901,
29
+ "eval_loss": 0.40849873423576355,
30
+ "eval_precision": 0.6679245283018868,
31
+ "eval_recall": 0.7375,
32
+ "eval_runtime": 1.415,
33
+ "eval_samples_per_second": 607.771,
34
+ "eval_steps_per_second": 76.325,
35
+ "step": 1490
36
+ },
37
+ {
38
+ "epoch": 1.01,
39
+ "grad_norm": 4.857753753662109,
40
+ "learning_rate": 3.3221476510067115e-05,
41
+ "loss": 0.4743,
42
+ "step": 1500
43
+ }
44
+ ],
45
+ "logging_steps": 500,
46
+ "max_steps": 4470,
47
+ "num_input_tokens_seen": 0,
48
+ "num_train_epochs": 3,
49
+ "save_steps": 500,
50
+ "total_flos": 635261087615520.0,
51
+ "train_batch_size": 8,
52
+ "trial_name": null,
53
+ "trial_params": null
54
+ }
models/bert-offensive/checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff52faf0d3a27635d14f511f07bc80871b57c3a7826358862dda78696619e8f5
3
+ size 4856
models/bert-offensive/checkpoint-1500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/bert-offensive/checkpoint-2000/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.39.3",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
models/bert-offensive/checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d84caca90c708c6d9b08f796f62f3fe3770d1ebbe5dc4708d787c870652e882a
3
+ size 437958648
models/bert-offensive/checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f977dfb26bf16852ccff9864f65c99dfca153be14bcd7280af5f7a05a0d808
3
+ size 876038394
models/bert-offensive/checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57db07813490de347f6b29da476f84f897496ba4cc251212c6d017a619dce80
3
+ size 14244
models/bert-offensive/checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd68bf4dff9924c9178e7ecdd91ec247aec55edc72b3ca0231603fd33cef86d3
3
+ size 1064
models/bert-offensive/checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
models/bert-offensive/checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/bert-offensive/checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
models/bert-offensive/checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.342281879194631,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.34,
13
+ "grad_norm": 3.309788227081299,
14
+ "learning_rate": 4.440715883668904e-05,
15
+ "loss": 0.5291,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.67,
20
+ "grad_norm": 8.38113784790039,
21
+ "learning_rate": 3.881431767337807e-05,
22
+ "loss": 0.4834,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.8244186046511628,
28
+ "eval_f1": 0.700990099009901,
29
+ "eval_loss": 0.40849873423576355,
30
+ "eval_precision": 0.6679245283018868,
31
+ "eval_recall": 0.7375,
32
+ "eval_runtime": 1.415,
33
+ "eval_samples_per_second": 607.771,
34
+ "eval_steps_per_second": 76.325,
35
+ "step": 1490
36
+ },
37
+ {
38
+ "epoch": 1.01,
39
+ "grad_norm": 4.857753753662109,
40
+ "learning_rate": 3.3221476510067115e-05,
41
+ "loss": 0.4743,
42
+ "step": 1500
43
+ },
44
+ {
45
+ "epoch": 1.34,
46
+ "grad_norm": 20.212223052978516,
47
+ "learning_rate": 2.7628635346756153e-05,
48
+ "loss": 0.3753,
49
+ "step": 2000
50
+ }
51
+ ],
52
+ "logging_steps": 500,
53
+ "max_steps": 4470,
54
+ "num_input_tokens_seen": 0,
55
+ "num_train_epochs": 3,
56
+ "save_steps": 500,
57
+ "total_flos": 846855820558080.0,
58
+ "train_batch_size": 8,
59
+ "trial_name": null,
60
+ "trial_params": null
61
+ }
models/bert-offensive/checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff52faf0d3a27635d14f511f07bc80871b57c3a7826358862dda78696619e8f5
3
+ size 4856
models/bert-offensive/checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/bert-offensive/checkpoint-2500/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.39.3",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
models/bert-offensive/checkpoint-2500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58457eeb8339abe1f0ee26937443a95a8a25aae3c4fe5f569327ab3fd87f48c
3
+ size 437958648
models/bert-offensive/checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77406637943250708e3999e599a8b10ef39a486f27cfa07a01bbff9f9a76e71b
3
+ size 876038394
models/bert-offensive/checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ccff0626d2d12f751c557becd9d0ba8acfabed972b2c6065108c51d3e8d4d12
3
+ size 14244