tuanio commited on
Commit
911ba74
1 Parent(s): eb217a7

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +47 -0
  2. mm_projector.bin +3 -0
  3. trainer_state.json +444 -0
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen1.5-0.5B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "freeze_mm_mlp_adapter": false,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "image_aspect_ratio": "square",
13
+ "image_projector_type": "mlp2x_gelu",
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 2816,
16
+ "max_position_embeddings": 32768,
17
+ "max_window_layers": 21,
18
+ "mm_hidden_size": 768,
19
+ "mm_image_tower": "google/siglip-base-patch16-256-multilingual",
20
+ "mm_projector_lr": null,
21
+ "mm_use_im_patch_token": false,
22
+ "mm_use_im_start_end": false,
23
+ "mm_video_tower": null,
24
+ "mm_vision_select_feature": "patch",
25
+ "mm_vision_select_layer": -2,
26
+ "model_type": "llava_qwen1_5",
27
+ "num_attention_heads": 16,
28
+ "num_hidden_layers": 24,
29
+ "num_key_value_heads": 16,
30
+ "pad_token_id": 151646,
31
+ "rms_norm_eps": 1e-06,
32
+ "rope_theta": 1000000.0,
33
+ "sliding_window": 32768,
34
+ "tie_word_embeddings": true,
35
+ "tokenizer_padding_side": "right",
36
+ "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.37.0",
38
+ "tune_mm_mlp_adapter": true,
39
+ "use_cache": true,
40
+ "use_mm_proj": true,
41
+ "use_sliding_window": false,
42
+ "video_global_proj": false,
43
+ "video_projector_type": "linear",
44
+ "video_spatial_proj": false,
45
+ "video_temproal_proj": false,
46
+ "vocab_size": 151936
47
+ }
mm_projector.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c81d2ae27b6d633ca661d0b0c2457645234188d507ef2680d0ceacd4084b5f
3
+ size 3675837
trainer_state.json ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2085,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "learning_rate": 0.00014354066985645933,
14
+ "loss": 4.6999,
15
+ "step": 30
16
+ },
17
+ {
18
+ "epoch": 0.09,
19
+ "learning_rate": 0.00028708133971291867,
20
+ "loss": 4.0234,
21
+ "step": 60
22
+ },
23
+ {
24
+ "epoch": 0.13,
25
+ "learning_rate": 0.00043062200956937803,
26
+ "loss": 3.84,
27
+ "step": 90
28
+ },
29
+ {
30
+ "epoch": 0.17,
31
+ "learning_rate": 0.0005741626794258373,
32
+ "loss": 3.7458,
33
+ "step": 120
34
+ },
35
+ {
36
+ "epoch": 0.22,
37
+ "learning_rate": 0.0007177033492822966,
38
+ "loss": 3.6694,
39
+ "step": 150
40
+ },
41
+ {
42
+ "epoch": 0.26,
43
+ "learning_rate": 0.0008612440191387561,
44
+ "loss": 3.6173,
45
+ "step": 180
46
+ },
47
+ {
48
+ "epoch": 0.3,
49
+ "learning_rate": 0.000999999298909658,
50
+ "loss": 3.5916,
51
+ "step": 210
52
+ },
53
+ {
54
+ "epoch": 0.35,
55
+ "learning_rate": 0.0009993264033223551,
56
+ "loss": 3.5461,
57
+ "step": 240
58
+ },
59
+ {
60
+ "epoch": 0.39,
61
+ "learning_rate": 0.0009973935099776767,
62
+ "loss": 3.5042,
63
+ "step": 270
64
+ },
65
+ {
66
+ "epoch": 0.43,
67
+ "learning_rate": 0.0009942054963290549,
68
+ "loss": 3.4956,
69
+ "step": 300
70
+ },
71
+ {
72
+ "epoch": 0.47,
73
+ "learning_rate": 0.0009897704069942402,
74
+ "loss": 3.5649,
75
+ "step": 330
76
+ },
77
+ {
78
+ "epoch": 0.52,
79
+ "learning_rate": 0.0009840994334555549,
80
+ "loss": 3.5746,
81
+ "step": 360
82
+ },
83
+ {
84
+ "epoch": 0.56,
85
+ "learning_rate": 0.0009772068858193608,
86
+ "loss": 3.5424,
87
+ "step": 390
88
+ },
89
+ {
90
+ "epoch": 0.6,
91
+ "learning_rate": 0.000969110156706009,
92
+ "loss": 3.567,
93
+ "step": 420
94
+ },
95
+ {
96
+ "epoch": 0.65,
97
+ "learning_rate": 0.0009598296773613879,
98
+ "loss": 3.4753,
99
+ "step": 450
100
+ },
101
+ {
102
+ "epoch": 0.69,
103
+ "learning_rate": 0.0009493888661008194,
104
+ "loss": 3.4324,
105
+ "step": 480
106
+ },
107
+ {
108
+ "epoch": 0.73,
109
+ "learning_rate": 0.0009378140692153992,
110
+ "loss": 3.4661,
111
+ "step": 510
112
+ },
113
+ {
114
+ "epoch": 0.78,
115
+ "learning_rate": 0.0009251344944898958,
116
+ "loss": 3.4081,
117
+ "step": 540
118
+ },
119
+ {
120
+ "epoch": 0.82,
121
+ "learning_rate": 0.0009113821374999736,
122
+ "loss": 3.3848,
123
+ "step": 570
124
+ },
125
+ {
126
+ "epoch": 0.86,
127
+ "learning_rate": 0.0008965917008747158,
128
+ "loss": 3.3552,
129
+ "step": 600
130
+ },
131
+ {
132
+ "epoch": 0.91,
133
+ "learning_rate": 0.000880800506728183,
134
+ "loss": 3.3352,
135
+ "step": 630
136
+ },
137
+ {
138
+ "epoch": 0.95,
139
+ "learning_rate": 0.0008640484024809779,
140
+ "loss": 3.2935,
141
+ "step": 660
142
+ },
143
+ {
144
+ "epoch": 0.99,
145
+ "learning_rate": 0.0008463776603094606,
146
+ "loss": 3.2754,
147
+ "step": 690
148
+ },
149
+ {
150
+ "epoch": 1.04,
151
+ "learning_rate": 0.0008278328704763516,
152
+ "loss": 3.2657,
153
+ "step": 720
154
+ },
155
+ {
156
+ "epoch": 1.08,
157
+ "learning_rate": 0.0008084608288118838,
158
+ "loss": 3.256,
159
+ "step": 750
160
+ },
161
+ {
162
+ "epoch": 1.12,
163
+ "learning_rate": 0.0007883104186294383,
164
+ "loss": 3.2201,
165
+ "step": 780
166
+ },
167
+ {
168
+ "epoch": 1.17,
169
+ "learning_rate": 0.0007674324873736347,
170
+ "loss": 3.1941,
171
+ "step": 810
172
+ },
173
+ {
174
+ "epoch": 1.21,
175
+ "learning_rate": 0.0007458797183121429,
176
+ "loss": 3.1856,
177
+ "step": 840
178
+ },
179
+ {
180
+ "epoch": 1.25,
181
+ "learning_rate": 0.0007237064975949886,
182
+ "loss": 3.1649,
183
+ "step": 870
184
+ },
185
+ {
186
+ "epoch": 1.29,
187
+ "learning_rate": 0.0007009687770168125,
188
+ "loss": 3.155,
189
+ "step": 900
190
+ },
191
+ {
192
+ "epoch": 1.34,
193
+ "learning_rate": 0.0006777239328283909,
194
+ "loss": 3.1357,
195
+ "step": 930
196
+ },
197
+ {
198
+ "epoch": 1.38,
199
+ "learning_rate": 0.0006540306209536906,
200
+ "loss": 3.1309,
201
+ "step": 960
202
+ },
203
+ {
204
+ "epoch": 1.42,
205
+ "learning_rate": 0.0006299486289778033,
206
+ "loss": 3.1091,
207
+ "step": 990
208
+ },
209
+ {
210
+ "epoch": 1.47,
211
+ "learning_rate": 0.0006055387252792498,
212
+ "loss": 3.0958,
213
+ "step": 1020
214
+ },
215
+ {
216
+ "epoch": 1.51,
217
+ "learning_rate": 0.0005808625056873581,
218
+ "loss": 3.0813,
219
+ "step": 1050
220
+ },
221
+ {
222
+ "epoch": 1.55,
223
+ "learning_rate": 0.0005559822380516539,
224
+ "loss": 3.0854,
225
+ "step": 1080
226
+ },
227
+ {
228
+ "epoch": 1.6,
229
+ "learning_rate": 0.0005309607051154799,
230
+ "loss": 3.068,
231
+ "step": 1110
232
+ },
233
+ {
234
+ "epoch": 1.64,
235
+ "learning_rate": 0.0005058610460903332,
236
+ "loss": 3.0664,
237
+ "step": 1140
238
+ },
239
+ {
240
+ "epoch": 1.68,
241
+ "learning_rate": 0.00048074659733069516,
242
+ "loss": 3.0627,
243
+ "step": 1170
244
+ },
245
+ {
246
+ "epoch": 1.73,
247
+ "learning_rate": 0.00045568073251138903,
248
+ "loss": 3.0376,
249
+ "step": 1200
250
+ },
251
+ {
252
+ "epoch": 1.77,
253
+ "learning_rate": 0.0004307267027107653,
254
+ "loss": 3.0274,
255
+ "step": 1230
256
+ },
257
+ {
258
+ "epoch": 1.81,
259
+ "learning_rate": 0.00040594747680324574,
260
+ "loss": 3.0212,
261
+ "step": 1260
262
+ },
263
+ {
264
+ "epoch": 1.86,
265
+ "learning_rate": 0.0003814055825639795,
266
+ "loss": 3.0101,
267
+ "step": 1290
268
+ },
269
+ {
270
+ "epoch": 1.9,
271
+ "learning_rate": 0.000357162948886567,
272
+ "loss": 3.0076,
273
+ "step": 1320
274
+ },
275
+ {
276
+ "epoch": 1.94,
277
+ "learning_rate": 0.00033328074951199845,
278
+ "loss": 3.0129,
279
+ "step": 1350
280
+ },
281
+ {
282
+ "epoch": 1.99,
283
+ "learning_rate": 0.0003098192486631408,
284
+ "loss": 2.9902,
285
+ "step": 1380
286
+ },
287
+ {
288
+ "epoch": 2.03,
289
+ "learning_rate": 0.00028683764897429804,
290
+ "loss": 2.9819,
291
+ "step": 1410
292
+ },
293
+ {
294
+ "epoch": 2.07,
295
+ "learning_rate": 0.0002643939420995788,
296
+ "loss": 2.9891,
297
+ "step": 1440
298
+ },
299
+ {
300
+ "epoch": 2.12,
301
+ "learning_rate": 0.00024254476237704588,
302
+ "loss": 2.9702,
303
+ "step": 1470
304
+ },
305
+ {
306
+ "epoch": 2.16,
307
+ "learning_rate": 0.00022134524391790916,
308
+ "loss": 2.972,
309
+ "step": 1500
310
+ },
311
+ {
312
+ "epoch": 2.2,
313
+ "learning_rate": 0.00020084888148138487,
314
+ "loss": 2.9733,
315
+ "step": 1530
316
+ },
317
+ {
318
+ "epoch": 2.24,
319
+ "learning_rate": 0.00018110739548628618,
320
+ "loss": 2.9688,
321
+ "step": 1560
322
+ },
323
+ {
324
+ "epoch": 2.29,
325
+ "learning_rate": 0.0001621706014999767,
326
+ "loss": 2.9629,
327
+ "step": 1590
328
+ },
329
+ {
330
+ "epoch": 2.33,
331
+ "learning_rate": 0.00014408628453401574,
332
+ "loss": 2.9609,
333
+ "step": 1620
334
+ },
335
+ {
336
+ "epoch": 2.37,
337
+ "learning_rate": 0.00012690007846369856,
338
+ "loss": 2.9461,
339
+ "step": 1650
340
+ },
341
+ {
342
+ "epoch": 2.42,
343
+ "learning_rate": 0.00011065535087576234,
344
+ "loss": 2.9493,
345
+ "step": 1680
346
+ },
347
+ {
348
+ "epoch": 2.46,
349
+ "learning_rate": 9.539309363483478e-05,
350
+ "loss": 2.9417,
351
+ "step": 1710
352
+ },
353
+ {
354
+ "epoch": 2.5,
355
+ "learning_rate": 8.115181944476685e-05,
356
+ "loss": 2.9459,
357
+ "step": 1740
358
+ },
359
+ {
360
+ "epoch": 2.55,
361
+ "learning_rate": 6.796746466586756e-05,
362
+ "loss": 2.9273,
363
+ "step": 1770
364
+ },
365
+ {
366
+ "epoch": 2.59,
367
+ "learning_rate": 5.58732986332719e-05,
368
+ "loss": 2.9397,
369
+ "step": 1800
370
+ },
371
+ {
372
+ "epoch": 2.63,
373
+ "learning_rate": 4.4899839705266174e-05,
374
+ "loss": 2.943,
375
+ "step": 1830
376
+ },
377
+ {
378
+ "epoch": 2.68,
379
+ "learning_rate": 3.507477825341493e-05,
380
+ "loss": 2.9381,
381
+ "step": 1860
382
+ },
383
+ {
384
+ "epoch": 2.72,
385
+ "learning_rate": 2.642290678881504e-05,
386
+ "loss": 2.9509,
387
+ "step": 1890
388
+ },
389
+ {
390
+ "epoch": 2.76,
391
+ "learning_rate": 1.8966057400797153e-05,
392
+ "loss": 2.9437,
393
+ "step": 1920
394
+ },
395
+ {
396
+ "epoch": 2.81,
397
+ "learning_rate": 1.272304666594032e-05,
398
+ "loss": 2.9374,
399
+ "step": 1950
400
+ },
401
+ {
402
+ "epoch": 2.85,
403
+ "learning_rate": 7.709628166416128e-06,
404
+ "loss": 2.9508,
405
+ "step": 1980
406
+ },
407
+ {
408
+ "epoch": 2.89,
409
+ "learning_rate": 3.93845273747806e-06,
410
+ "loss": 2.9407,
411
+ "step": 2010
412
+ },
413
+ {
414
+ "epoch": 2.94,
415
+ "learning_rate": 1.4190365444071153e-06,
416
+ "loss": 2.949,
417
+ "step": 2040
418
+ },
419
+ {
420
+ "epoch": 2.98,
421
+ "learning_rate": 1.5773706946742295e-07,
422
+ "loss": 2.9544,
423
+ "step": 2070
424
+ },
425
+ {
426
+ "epoch": 3.0,
427
+ "step": 2085,
428
+ "total_flos": 1.731497869485015e+17,
429
+ "train_loss": 3.211610127181458,
430
+ "train_runtime": 4482.5265,
431
+ "train_samples_per_second": 74.379,
432
+ "train_steps_per_second": 0.465
433
+ }
434
+ ],
435
+ "logging_steps": 30,
436
+ "max_steps": 2085,
437
+ "num_input_tokens_seen": 0,
438
+ "num_train_epochs": 3,
439
+ "save_steps": 5000,
440
+ "total_flos": 1.731497869485015e+17,
441
+ "train_batch_size": 16,
442
+ "trial_name": null,
443
+ "trial_params": null
444
+ }