scottsuk0306 commited on
Commit
a8394b9
1 Parent(s): 493478d

Model save

Browse files
README.md CHANGED
@@ -2,12 +2,10 @@
2
  license: cc-by-nc-4.0
3
  base_model: sfairXC/FsfairX-LLaMA3-RM-v0.1
4
  tags:
5
- - easylm
6
  - trl
7
  - reward-trainer
 
8
  - generated_from_trainer
9
- metrics:
10
- - accuracy
11
  model-index:
12
  - name: easylm-helpsteer-rm-FsfairX-LLaMA3-RM-v0.1
13
  results: []
@@ -19,9 +17,6 @@ should probably proofread and complete it, then remove this comment. -->
19
  # easylm-helpsteer-rm-FsfairX-LLaMA3-RM-v0.1
20
 
21
  This model is a fine-tuned version of [sfairXC/FsfairX-LLaMA3-RM-v0.1](https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1) on an unknown dataset.
22
- It achieves the following results on the evaluation set:
23
- - Loss: 0.4821
24
- - Accuracy: 0.7784
25
 
26
  ## Model description
27
 
@@ -40,30 +35,25 @@ More information needed
40
  ### Training hyperparameters
41
 
42
  The following hyperparameters were used during training:
43
- - learning_rate: 1e-06
44
  - train_batch_size: 1
45
  - eval_batch_size: 1
46
  - seed: 42
47
  - distributed_type: multi-GPU
48
- - num_devices: 4
49
- - total_train_batch_size: 4
50
- - total_eval_batch_size: 4
51
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
  - lr_scheduler_type: cosine
53
  - num_epochs: 1
54
 
55
  ### Training results
56
 
57
- | Training Loss | Epoch | Step | Validation Loss | Accuracy |
58
- |:-------------:|:------:|:----:|:---------------:|:--------:|
59
- | 0.5465 | 0.3053 | 500 | 0.5170 | 0.7755 |
60
- | 0.6788 | 0.6105 | 1000 | 0.4570 | 0.8105 |
61
- | 0.5754 | 0.9158 | 1500 | 0.4829 | 0.7872 |
62
 
63
 
64
  ### Framework versions
65
 
66
  - Transformers 4.43.3
67
- - Pytorch 2.3.0+cu121
68
  - Datasets 2.20.0
69
  - Tokenizers 0.19.1
 
2
  license: cc-by-nc-4.0
3
  base_model: sfairXC/FsfairX-LLaMA3-RM-v0.1
4
  tags:
 
5
  - trl
6
  - reward-trainer
7
+ - easylm
8
  - generated_from_trainer
 
 
9
  model-index:
10
  - name: easylm-helpsteer-rm-FsfairX-LLaMA3-RM-v0.1
11
  results: []
 
17
  # easylm-helpsteer-rm-FsfairX-LLaMA3-RM-v0.1
18
 
19
  This model is a fine-tuned version of [sfairXC/FsfairX-LLaMA3-RM-v0.1](https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1) on an unknown dataset.
 
 
 
20
 
21
  ## Model description
22
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 2e-05
39
  - train_batch_size: 1
40
  - eval_batch_size: 1
41
  - seed: 42
42
  - distributed_type: multi-GPU
43
+ - num_devices: 8
44
+ - total_train_batch_size: 8
45
+ - total_eval_batch_size: 8
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - num_epochs: 1
49
 
50
  ### Training results
51
 
 
 
 
 
 
52
 
53
 
54
  ### Framework versions
55
 
56
  - Transformers 4.43.3
57
+ - Pytorch 2.4.0+cu121
58
  - Datasets 2.20.0
59
  - Tokenizers 0.19.1
all_results.json CHANGED
@@ -7,9 +7,9 @@
7
  "eval_samples_per_second": 8.749,
8
  "eval_steps_per_second": 2.194,
9
  "total_flos": 0.0,
10
- "train_loss": 0.5596484089945699,
11
- "train_runtime": 4783.2861,
12
- "train_samples": 6981,
13
- "train_samples_per_second": 1.369,
14
- "train_steps_per_second": 0.342
15
  }
 
7
  "eval_samples_per_second": 8.749,
8
  "eval_steps_per_second": 2.194,
9
  "total_flos": 0.0,
10
+ "train_loss": 0.6827200654027559,
11
+ "train_runtime": 753.5592,
12
+ "train_samples": 6666,
13
+ "train_samples_per_second": 8.846,
14
+ "train_steps_per_second": 1.107
15
  }
config.json CHANGED
@@ -31,6 +31,6 @@
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.43.3",
34
- "use_cache": true,
35
  "vocab_size": 128257
36
  }
 
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.43.3",
34
+ "use_cache": false,
35
  "vocab_size": 128257
36
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b34ffd54b3d4f05d88eb4812c9352a8abce9efa749a5c48c6d8143b7013448d8
3
  size 4976706864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8acc17427f8f0dd8c1f1f528cf530b2d66f161b5911af5b8d36a2138fca1c667
3
  size 4976706864
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e87c01447f917408327e8aa4a94c97a2e7aca8924f55f12d69c6c6c3d48d0bc
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf18e01eb67a8e667dbe1c250eaddc16759f6e53f6c2ce59442fcf923204a156
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b6882c3999b90fdc8d41a337b6c35224af0d347a818a87bc3f52b31e916b1a1
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fafe4d838dbd806b0a060badc69971b47e1b16f02ba269d0ca71421fd6f78dbe
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:264a63f89a5aeef6a33137a0f6080313ff2ec594f18f3b0bb495eaab43ea99c0
3
  size 117473824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13cadc396a9ccebc408bad24f9762ca2e3d012d0bb83b69eaa3e43b7b0ca42cf
3
  size 117473824
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "128000": {
4
  "content": "<|begin_of_text|>",
@@ -2067,5 +2068,6 @@
2067
  ],
2068
  "model_max_length": 4096,
2069
  "pad_token": "[PAD]",
 
2070
  "tokenizer_class": "PreTrainedTokenizerFast"
2071
  }
 
1
  {
2
+ "add_eos_token": true,
3
  "added_tokens_decoder": {
4
  "128000": {
5
  "content": "<|begin_of_text|>",
 
2068
  ],
2069
  "model_max_length": 4096,
2070
  "pad_token": "[PAD]",
2071
+ "padding_side": "right",
2072
  "tokenizer_class": "PreTrainedTokenizerFast"
2073
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5596484089945699,
5
- "train_runtime": 4783.2861,
6
- "train_samples": 6981,
7
- "train_samples_per_second": 1.369,
8
- "train_steps_per_second": 0.342
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.6827200654027559,
5
+ "train_runtime": 753.5592,
6
+ "train_samples": 6666,
7
+ "train_samples_per_second": 8.846,
8
+ "train_steps_per_second": 1.107
9
  }
trainer_state.json CHANGED
@@ -2,275 +2,80 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
- "eval_steps": 500,
6
- "global_step": 1638,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.030525030525030524,
13
- "grad_norm": 62.938530852833836,
14
- "learning_rate": 9.977026925653056e-07,
15
- "loss": 0.5083,
16
- "step": 50
17
- },
18
- {
19
- "epoch": 0.06105006105006105,
20
- "grad_norm": 213.3598267649049,
21
- "learning_rate": 9.908318807470199e-07,
22
- "loss": 0.5674,
23
  "step": 100
24
  },
25
  {
26
- "epoch": 0.09157509157509157,
27
- "grad_norm": 154.79969789365293,
28
- "learning_rate": 9.794507020134337e-07,
29
- "loss": 0.54,
30
- "step": 150
31
- },
32
- {
33
- "epoch": 0.1221001221001221,
34
- "grad_norm": 46.601082604459464,
35
- "learning_rate": 9.636637406306276e-07,
36
- "loss": 0.626,
37
  "step": 200
38
  },
39
  {
40
- "epoch": 0.15262515262515264,
41
- "grad_norm": 53.962192059634134,
42
- "learning_rate": 9.436160666136257e-07,
43
- "loss": 0.5212,
44
- "step": 250
45
- },
46
- {
47
- "epoch": 0.18315018315018314,
48
- "grad_norm": 87.58560621983881,
49
- "learning_rate": 9.194919026446978e-07,
50
- "loss": 0.5516,
51
  "step": 300
52
  },
53
  {
54
- "epoch": 0.21367521367521367,
55
- "grad_norm": 123.80979482151284,
56
- "learning_rate": 8.915129312088112e-07,
57
- "loss": 0.6218,
58
- "step": 350
59
- },
60
- {
61
- "epoch": 0.2442002442002442,
62
- "grad_norm": 390.7971062188635,
63
- "learning_rate": 8.599362575023441e-07,
64
- "loss": 0.6515,
65
  "step": 400
66
  },
67
  {
68
- "epoch": 0.27472527472527475,
69
- "grad_norm": 26.658076048563824,
70
- "learning_rate": 8.25052046834372e-07,
71
- "loss": 0.5756,
72
- "step": 450
73
- },
74
- {
75
- "epoch": 0.3052503052503053,
76
- "grad_norm": 57.30779689782776,
77
- "learning_rate": 7.87180858230979e-07,
78
- "loss": 0.5465,
79
  "step": 500
80
  },
81
  {
82
- "epoch": 0.3052503052503053,
83
- "eval_accuracy": 0.7755102040816326,
84
- "eval_loss": 0.5169934034347534,
85
- "eval_runtime": 38.1975,
86
- "eval_samples_per_second": 8.98,
87
- "eval_steps_per_second": 2.251,
88
- "step": 500
89
- },
90
- {
91
- "epoch": 0.33577533577533575,
92
- "grad_norm": 0.3421131228542828,
93
- "learning_rate": 7.466706987447221e-07,
94
- "loss": 0.573,
95
- "step": 550
96
- },
97
- {
98
- "epoch": 0.3663003663003663,
99
- "grad_norm": 322.32829569797076,
100
- "learning_rate": 7.03893825537875e-07,
101
- "loss": 0.5859,
102
  "step": 600
103
  },
104
  {
105
- "epoch": 0.3968253968253968,
106
- "grad_norm": 70.58010836437558,
107
- "learning_rate": 6.592433251258422e-07,
108
- "loss": 0.6098,
109
- "step": 650
110
- },
111
- {
112
- "epoch": 0.42735042735042733,
113
- "grad_norm": 46.90754337155108,
114
- "learning_rate": 6.131295012148612e-07,
115
- "loss": 0.5937,
116
  "step": 700
117
  },
118
  {
119
- "epoch": 0.45787545787545786,
120
- "grad_norm": 33.718249054223485,
121
- "learning_rate": 5.659761043269833e-07,
122
- "loss": 0.6069,
123
- "step": 750
124
- },
125
- {
126
- "epoch": 0.4884004884004884,
127
- "grad_norm": 67.18852674620265,
128
- "learning_rate": 5.182164378591751e-07,
129
- "loss": 0.5216,
130
  "step": 800
131
  },
132
- {
133
- "epoch": 0.518925518925519,
134
- "grad_norm": 63.20860748452122,
135
- "learning_rate": 4.7028937635885676e-07,
136
- "loss": 0.5404,
137
- "step": 850
138
- },
139
- {
140
- "epoch": 0.5494505494505495,
141
- "grad_norm": 101.12697436330178,
142
- "learning_rate": 4.226353326048593e-07,
143
- "loss": 0.6039,
144
- "step": 900
145
- },
146
- {
147
- "epoch": 0.57997557997558,
148
- "grad_norm": 16.22019917375954,
149
- "learning_rate": 3.7569221055322e-07,
150
- "loss": 0.4532,
151
- "step": 950
152
- },
153
- {
154
- "epoch": 0.6105006105006106,
155
- "grad_norm": 60.84282514547736,
156
- "learning_rate": 3.298913813371268e-07,
157
- "loss": 0.6788,
158
- "step": 1000
159
- },
160
- {
161
- "epoch": 0.6105006105006106,
162
- "eval_accuracy": 0.8104956268221575,
163
- "eval_loss": 0.45700323581695557,
164
- "eval_runtime": 39.2826,
165
- "eval_samples_per_second": 8.732,
166
- "eval_steps_per_second": 2.189,
167
- "step": 1000
168
- },
169
- {
170
- "epoch": 0.6410256410256411,
171
- "grad_norm": 27.664920029800854,
172
- "learning_rate": 2.856537192984728e-07,
173
- "loss": 0.498,
174
- "step": 1050
175
- },
176
- {
177
- "epoch": 0.6715506715506715,
178
- "grad_norm": 39.65159769933289,
179
- "learning_rate": 2.4338573447683793e-07,
180
- "loss": 0.4883,
181
- "step": 1100
182
- },
183
- {
184
- "epoch": 0.702075702075702,
185
- "grad_norm": 122.2483315006908,
186
- "learning_rate": 2.034758370953431e-07,
187
- "loss": 0.4834,
188
- "step": 1150
189
- },
190
- {
191
- "epoch": 0.7326007326007326,
192
- "grad_norm": 77.76945449705711,
193
- "learning_rate": 1.6629076836987782e-07,
194
- "loss": 0.6567,
195
- "step": 1200
196
- },
197
- {
198
- "epoch": 0.7631257631257631,
199
- "grad_norm": 294.4900200191884,
200
- "learning_rate": 1.3217223043981295e-07,
201
- "loss": 0.5157,
202
- "step": 1250
203
- },
204
- {
205
- "epoch": 0.7936507936507936,
206
- "grad_norm": 148.80279171380104,
207
- "learning_rate": 1.0143374638853891e-07,
208
- "loss": 0.5137,
209
- "step": 1300
210
- },
211
- {
212
- "epoch": 0.8241758241758241,
213
- "grad_norm": 196.55030736251504,
214
- "learning_rate": 7.435777920782443e-08,
215
- "loss": 0.5831,
216
- "step": 1350
217
- },
218
- {
219
- "epoch": 0.8547008547008547,
220
- "grad_norm": 16.26079979378068,
221
- "learning_rate": 5.119313618049309e-08,
222
- "loss": 0.5032,
223
- "step": 1400
224
- },
225
- {
226
- "epoch": 0.8852258852258852,
227
- "grad_norm": 56.51782762296821,
228
- "learning_rate": 3.2152682533139466e-08,
229
- "loss": 0.5128,
230
- "step": 1450
231
- },
232
- {
233
- "epoch": 0.9157509157509157,
234
- "grad_norm": 21.457330627788103,
235
- "learning_rate": 1.7411385368659936e-08,
236
- "loss": 0.5754,
237
- "step": 1500
238
- },
239
- {
240
- "epoch": 0.9157509157509157,
241
- "eval_accuracy": 0.7871720116618076,
242
- "eval_loss": 0.48293355107307434,
243
- "eval_runtime": 38.5213,
244
- "eval_samples_per_second": 8.904,
245
- "eval_steps_per_second": 2.233,
246
- "step": 1500
247
- },
248
- {
249
- "epoch": 0.9462759462759462,
250
- "grad_norm": 102.68064209299165,
251
- "learning_rate": 7.104705853346271e-09,
252
- "loss": 0.5787,
253
- "step": 1550
254
- },
255
- {
256
- "epoch": 0.9768009768009768,
257
- "grad_norm": 13.53505025487251,
258
- "learning_rate": 1.3273544331087737e-09,
259
- "loss": 0.5539,
260
- "step": 1600
261
- },
262
  {
263
  "epoch": 1.0,
264
- "step": 1638,
265
  "total_flos": 0.0,
266
- "train_loss": 0.5596484089945699,
267
- "train_runtime": 4783.2861,
268
- "train_samples_per_second": 1.369,
269
- "train_steps_per_second": 0.342
270
  }
271
  ],
272
- "logging_steps": 50,
273
- "max_steps": 1638,
274
  "num_input_tokens_seen": 0,
275
  "num_train_epochs": 1,
276
  "save_steps": 500,
@@ -280,8 +85,8 @@
280
  "should_epoch_stop": false,
281
  "should_evaluate": false,
282
  "should_log": false,
283
- "should_save": true,
284
- "should_training_stop": true
285
  },
286
  "attributes": {}
287
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
+ "eval_steps": 1000,
6
+ "global_step": 834,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.11990407673860912,
13
+ "grad_norm": 2.294161319732666,
14
+ "learning_rate": 1.929887378670186e-05,
15
+ "loss": 0.7381,
 
 
 
 
 
 
 
16
  "step": 100
17
  },
18
  {
19
+ "epoch": 0.23980815347721823,
20
+ "grad_norm": 2.2879245281219482,
21
+ "learning_rate": 1.729381074020218e-05,
22
+ "loss": 0.6867,
 
 
 
 
 
 
 
23
  "step": 200
24
  },
25
  {
26
+ "epoch": 0.3597122302158273,
27
+ "grad_norm": 4.782385349273682,
28
+ "learning_rate": 1.4265971312744252e-05,
29
+ "loss": 0.6877,
 
 
 
 
 
 
 
30
  "step": 300
31
  },
32
  {
33
+ "epoch": 0.47961630695443647,
34
+ "grad_norm": 4.921843528747559,
35
+ "learning_rate": 1.0639935022777741e-05,
36
+ "loss": 0.6793,
 
 
 
 
 
 
 
37
  "step": 400
38
  },
39
  {
40
+ "epoch": 0.5995203836930456,
41
+ "grad_norm": 3.5888805389404297,
42
+ "learning_rate": 6.924163688955825e-06,
43
+ "loss": 0.6807,
 
 
 
 
 
 
 
44
  "step": 500
45
  },
46
  {
47
+ "epoch": 0.7194244604316546,
48
+ "grad_norm": 4.4004926681518555,
49
+ "learning_rate": 3.6397022482313804e-06,
50
+ "loss": 0.6611,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  "step": 600
52
  },
53
  {
54
+ "epoch": 0.8393285371702638,
55
+ "grad_norm": 7.610467910766602,
56
+ "learning_rate": 1.2471151031361795e-06,
57
+ "loss": 0.651,
 
 
 
 
 
 
 
58
  "step": 700
59
  },
60
  {
61
+ "epoch": 0.9592326139088729,
62
+ "grad_norm": 12.039061546325684,
63
+ "learning_rate": 8.190336667550869e-08,
64
+ "loss": 0.6708,
 
 
 
 
 
 
 
65
  "step": 800
66
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  {
68
  "epoch": 1.0,
69
+ "step": 834,
70
  "total_flos": 0.0,
71
+ "train_loss": 0.6827200654027559,
72
+ "train_runtime": 753.5592,
73
+ "train_samples_per_second": 8.846,
74
+ "train_steps_per_second": 1.107
75
  }
76
  ],
77
+ "logging_steps": 100,
78
+ "max_steps": 834,
79
  "num_input_tokens_seen": 0,
80
  "num_train_epochs": 1,
81
  "save_steps": 500,
 
85
  "should_epoch_stop": false,
86
  "should_evaluate": false,
87
  "should_log": false,
88
+ "should_save": false,
89
+ "should_training_stop": false
90
  },
91
  "attributes": {}
92
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4608cf40f4c0de185f58bcdfe2c60989c03b08df7e180272c46735b16be89923
3
- size 6392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e7f7ddef7114b0f7e591d962ab6cae24ccd95a2ad6cb3e4dfd866dc0ef76178
3
+ size 6264