scottsuk0306 commited on
Commit
0fe2f8c
1 Parent(s): c3773fa

Model save

Browse files
README.md CHANGED
@@ -2,28 +2,14 @@
2
  license: cc-by-nc-4.0
3
  base_model: sfairXC/FsfairX-LLaMA3-RM-v0.1
4
  tags:
5
- - easylm
6
- - alignment-handbook
7
  - trl
8
  - reward-trainer
 
 
9
  - generated_from_trainer
10
- datasets:
11
- - helpsteer-rm
12
- metrics:
13
- - accuracy
14
  model-index:
15
  - name: easylm-helpsteer-rm-FsfairX-LLaMA3-RM-v0.1
16
- results:
17
- - task:
18
- name: Text Classification
19
- type: text-classification
20
- dataset:
21
- name: helpsteer-rm
22
- type: helpsteer-rm
23
- metrics:
24
- - name: Accuracy
25
- type: accuracy
26
- value: 0.6189111747851003
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,10 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  # easylm-helpsteer-rm-FsfairX-LLaMA3-RM-v0.1
33
 
34
- This model is a fine-tuned version of [sfairXC/FsfairX-LLaMA3-RM-v0.1](https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1) on the helpsteer-rm dataset.
35
- It achieves the following results on the evaluation set:
36
- - Loss: 0.6558
37
- - Accuracy: 0.6189
38
 
39
  ## Model description
40
 
 
2
  license: cc-by-nc-4.0
3
  base_model: sfairXC/FsfairX-LLaMA3-RM-v0.1
4
  tags:
 
 
5
  - trl
6
  - reward-trainer
7
+ - easylm
8
+ - alignment-handbook
9
  - generated_from_trainer
 
 
 
 
10
  model-index:
11
  - name: easylm-helpsteer-rm-FsfairX-LLaMA3-RM-v0.1
12
+ results: []
 
 
 
 
 
 
 
 
 
 
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
17
 
18
  # easylm-helpsteer-rm-FsfairX-LLaMA3-RM-v0.1
19
 
20
+ This model is a fine-tuned version of [sfairXC/FsfairX-LLaMA3-RM-v0.1](https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1) on an unknown dataset.
 
 
 
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -7,9 +7,9 @@
7
  "eval_samples_per_second": 58.504,
8
  "eval_steps_per_second": 7.376,
9
  "total_flos": 0.0,
10
- "train_loss": 0.6827200654027559,
11
- "train_runtime": 753.5592,
12
  "train_samples": 6666,
13
- "train_samples_per_second": 8.846,
14
- "train_steps_per_second": 1.107
15
  }
 
7
  "eval_samples_per_second": 58.504,
8
  "eval_steps_per_second": 7.376,
9
  "total_flos": 0.0,
10
+ "train_loss": 0.7041080118083268,
11
+ "train_runtime": 641.2149,
12
  "train_samples": 6666,
13
+ "train_samples_per_second": 10.396,
14
+ "train_steps_per_second": 1.301
15
  }
config.json CHANGED
@@ -31,6 +31,6 @@
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.43.3",
34
- "use_cache": true,
35
  "vocab_size": 128257
36
  }
 
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.43.3",
34
+ "use_cache": false,
35
  "vocab_size": 128257
36
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8acc17427f8f0dd8c1f1f528cf530b2d66f161b5911af5b8d36a2138fca1c667
3
  size 4976706864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9b22e371184aacf60c8753ab4b3bf18b8091bd6600809c9d9a01574cf15ac6
3
  size 4976706864
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf18e01eb67a8e667dbe1c250eaddc16759f6e53f6c2ce59442fcf923204a156
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fb9476172104f9cbf563c96804d75d9910295c5f44b62b4815cefd5928c837a
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fafe4d838dbd806b0a060badc69971b47e1b16f02ba269d0ca71421fd6f78dbe
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be2c3efd182a08c5d23dddabf93c35c1ecb0316e338e60b7db71fabb06ce02e
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13cadc396a9ccebc408bad24f9762ca2e3d012d0bb83b69eaa3e43b7b0ca42cf
3
  size 117473824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:816c0d28901233ff0b5b13c507ffdcb6f99a08574fb73c407351aa7ee9ea847a
3
  size 117473824
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_eos_token": true,
3
  "added_tokens_decoder": {
4
  "128000": {
5
  "content": "<|begin_of_text|>",
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "128000": {
4
  "content": "<|begin_of_text|>",
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6827200654027559,
5
- "train_runtime": 753.5592,
6
  "train_samples": 6666,
7
- "train_samples_per_second": 8.846,
8
- "train_steps_per_second": 1.107
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7041080118083268,
5
+ "train_runtime": 641.2149,
6
  "train_samples": 6666,
7
+ "train_samples_per_second": 10.396,
8
+ "train_steps_per_second": 1.301
9
  }
trainer_state.json CHANGED
@@ -10,68 +10,68 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.11990407673860912,
13
- "grad_norm": 2.294161319732666,
14
  "learning_rate": 1.929887378670186e-05,
15
- "loss": 0.7381,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.23980815347721823,
20
- "grad_norm": 2.2879245281219482,
21
  "learning_rate": 1.729381074020218e-05,
22
- "loss": 0.6867,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.3597122302158273,
27
- "grad_norm": 4.782385349273682,
28
  "learning_rate": 1.4265971312744252e-05,
29
- "loss": 0.6877,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.47961630695443647,
34
- "grad_norm": 4.921843528747559,
35
  "learning_rate": 1.0639935022777741e-05,
36
- "loss": 0.6793,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.5995203836930456,
41
- "grad_norm": 3.5888805389404297,
42
  "learning_rate": 6.924163688955825e-06,
43
- "loss": 0.6807,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.7194244604316546,
48
- "grad_norm": 4.4004926681518555,
49
  "learning_rate": 3.6397022482313804e-06,
50
- "loss": 0.6611,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.8393285371702638,
55
- "grad_norm": 7.610467910766602,
56
  "learning_rate": 1.2471151031361795e-06,
57
- "loss": 0.651,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.9592326139088729,
62
- "grad_norm": 12.039061546325684,
63
  "learning_rate": 8.190336667550869e-08,
64
- "loss": 0.6708,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 1.0,
69
  "step": 834,
70
  "total_flos": 0.0,
71
- "train_loss": 0.6827200654027559,
72
- "train_runtime": 753.5592,
73
- "train_samples_per_second": 8.846,
74
- "train_steps_per_second": 1.107
75
  }
76
  ],
77
  "logging_steps": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.11990407673860912,
13
+ "grad_norm": 33.553314208984375,
14
  "learning_rate": 1.929887378670186e-05,
15
+ "loss": 0.7732,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.23980815347721823,
20
+ "grad_norm": 5.416075706481934,
21
  "learning_rate": 1.729381074020218e-05,
22
+ "loss": 0.7018,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.3597122302158273,
27
+ "grad_norm": 5.061486721038818,
28
  "learning_rate": 1.4265971312744252e-05,
29
+ "loss": 0.7014,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.47961630695443647,
34
+ "grad_norm": 3.992720603942871,
35
  "learning_rate": 1.0639935022777741e-05,
36
+ "loss": 0.6841,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.5995203836930456,
41
+ "grad_norm": 4.914097785949707,
42
  "learning_rate": 6.924163688955825e-06,
43
+ "loss": 0.7251,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.7194244604316546,
48
+ "grad_norm": 4.0411577224731445,
49
  "learning_rate": 3.6397022482313804e-06,
50
+ "loss": 0.6829,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.8393285371702638,
55
+ "grad_norm": 5.131595611572266,
56
  "learning_rate": 1.2471151031361795e-06,
57
+ "loss": 0.6744,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.9592326139088729,
62
+ "grad_norm": 12.620176315307617,
63
  "learning_rate": 8.190336667550869e-08,
64
+ "loss": 0.6864,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 1.0,
69
  "step": 834,
70
  "total_flos": 0.0,
71
+ "train_loss": 0.7041080118083268,
72
+ "train_runtime": 641.2149,
73
+ "train_samples_per_second": 10.396,
74
+ "train_steps_per_second": 1.301
75
  }
76
  ],
77
  "logging_steps": 100,