yuweiiizz commited on
Commit
64e7925
1 Parent(s): da35f85

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0abaa86fc4833d57f7fe503c09526ace445919e1b76f14b35ac06c12e186254f
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3f6d7fbf311f06dffa533857155833f39e492a1e6e09467737aaae7d02033d4
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a61b8fba58f2ecec15e248785b8d20ba3be4672b027d63e4ae27a50a6e05e6
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42d1a12a31d0f16137c80f9dbea20b2559693f564e8c041bb266e4026444a44a
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4a887c0679a244fb3578da62fc4230274c5d38de547b25494a50298ffcd112e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fce579953b334f6cfba9152781b4b5b016a30a4024c41dd066d03ae60bfaddc7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a76e193687c482f6cc875caf45cbf094edc541bfbe3eb9f8259fd2d597d2f4e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719b9638750507eaec42e786cd14bdd5ecf37679f81d3d2e9daed79b83274704
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 18.183709992924626,
3
- "best_model_checkpoint": "./whisper-small-taiwanese-hanzi/checkpoint-4000",
4
- "epoch": 1.6,
5
  "eval_steps": 1000,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1163,14 +1163,303 @@
1163
  "eval_samples_per_second": 2.244,
1164
  "eval_steps_per_second": 0.281,
1165
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1166
  }
1167
  ],
1168
  "logging_steps": 25,
1169
- "max_steps": 5000,
1170
  "num_input_tokens_seen": 0,
1171
- "num_train_epochs": 2,
1172
  "save_steps": 1000,
1173
- "total_flos": 1.846946562048e+19,
1174
  "train_batch_size": 8,
1175
  "trial_name": null,
1176
  "trial_params": null
 
1
  {
2
+ "best_metric": 17.193157697590213,
3
+ "best_model_checkpoint": "./whisper-small-taiwanese-hanzi/checkpoint-5000",
4
+ "epoch": 2.0,
5
  "eval_steps": 1000,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1163
  "eval_samples_per_second": 2.244,
1164
  "eval_steps_per_second": 0.281,
1165
  "step": 4000
1166
+ },
1167
+ {
1168
+ "epoch": 1.6099999999999999,
1169
+ "grad_norm": 6.242763042449951,
1170
+ "learning_rate": 5.520833333333334e-06,
1171
+ "loss": 0.1892,
1172
+ "step": 4025
1173
+ },
1174
+ {
1175
+ "epoch": 1.62,
1176
+ "grad_norm": 7.155097007751465,
1177
+ "learning_rate": 5.486111111111112e-06,
1178
+ "loss": 0.2207,
1179
+ "step": 4050
1180
+ },
1181
+ {
1182
+ "epoch": 1.63,
1183
+ "grad_norm": 10.018449783325195,
1184
+ "learning_rate": 5.451388888888889e-06,
1185
+ "loss": 0.2497,
1186
+ "step": 4075
1187
+ },
1188
+ {
1189
+ "epoch": 1.6400000000000001,
1190
+ "grad_norm": 9.636473655700684,
1191
+ "learning_rate": 5.416666666666667e-06,
1192
+ "loss": 0.2356,
1193
+ "step": 4100
1194
+ },
1195
+ {
1196
+ "epoch": 1.65,
1197
+ "grad_norm": 8.315143585205078,
1198
+ "learning_rate": 5.381944444444445e-06,
1199
+ "loss": 0.2333,
1200
+ "step": 4125
1201
+ },
1202
+ {
1203
+ "epoch": 1.6600000000000001,
1204
+ "grad_norm": 8.561467170715332,
1205
+ "learning_rate": 5.347222222222222e-06,
1206
+ "loss": 0.2205,
1207
+ "step": 4150
1208
+ },
1209
+ {
1210
+ "epoch": 1.67,
1211
+ "grad_norm": 5.167340278625488,
1212
+ "learning_rate": 5.3125e-06,
1213
+ "loss": 0.221,
1214
+ "step": 4175
1215
+ },
1216
+ {
1217
+ "epoch": 1.6800000000000002,
1218
+ "grad_norm": 6.219302654266357,
1219
+ "learning_rate": 5.2777777777777785e-06,
1220
+ "loss": 0.2057,
1221
+ "step": 4200
1222
+ },
1223
+ {
1224
+ "epoch": 1.69,
1225
+ "grad_norm": 7.842504501342773,
1226
+ "learning_rate": 5.243055555555556e-06,
1227
+ "loss": 0.2239,
1228
+ "step": 4225
1229
+ },
1230
+ {
1231
+ "epoch": 1.7,
1232
+ "grad_norm": 5.829674243927002,
1233
+ "learning_rate": 5.208333333333334e-06,
1234
+ "loss": 0.2304,
1235
+ "step": 4250
1236
+ },
1237
+ {
1238
+ "epoch": 1.71,
1239
+ "grad_norm": 10.23341178894043,
1240
+ "learning_rate": 5.173611111111112e-06,
1241
+ "loss": 0.2134,
1242
+ "step": 4275
1243
+ },
1244
+ {
1245
+ "epoch": 1.72,
1246
+ "grad_norm": 5.533736228942871,
1247
+ "learning_rate": 5.138888888888889e-06,
1248
+ "loss": 0.2122,
1249
+ "step": 4300
1250
+ },
1251
+ {
1252
+ "epoch": 1.73,
1253
+ "grad_norm": 9.650428771972656,
1254
+ "learning_rate": 5.104166666666667e-06,
1255
+ "loss": 0.2256,
1256
+ "step": 4325
1257
+ },
1258
+ {
1259
+ "epoch": 1.74,
1260
+ "grad_norm": 7.121572494506836,
1261
+ "learning_rate": 5.069444444444445e-06,
1262
+ "loss": 0.2365,
1263
+ "step": 4350
1264
+ },
1265
+ {
1266
+ "epoch": 1.75,
1267
+ "grad_norm": 5.643657207489014,
1268
+ "learning_rate": 5.034722222222222e-06,
1269
+ "loss": 0.1922,
1270
+ "step": 4375
1271
+ },
1272
+ {
1273
+ "epoch": 1.76,
1274
+ "grad_norm": 7.42201042175293,
1275
+ "learning_rate": 5e-06,
1276
+ "loss": 0.2238,
1277
+ "step": 4400
1278
+ },
1279
+ {
1280
+ "epoch": 1.77,
1281
+ "grad_norm": 7.345561981201172,
1282
+ "learning_rate": 4.9652777777777786e-06,
1283
+ "loss": 0.2236,
1284
+ "step": 4425
1285
+ },
1286
+ {
1287
+ "epoch": 1.78,
1288
+ "grad_norm": 5.476310729980469,
1289
+ "learning_rate": 4.930555555555556e-06,
1290
+ "loss": 0.2232,
1291
+ "step": 4450
1292
+ },
1293
+ {
1294
+ "epoch": 1.79,
1295
+ "grad_norm": 9.794219970703125,
1296
+ "learning_rate": 4.895833333333333e-06,
1297
+ "loss": 0.2352,
1298
+ "step": 4475
1299
+ },
1300
+ {
1301
+ "epoch": 1.8,
1302
+ "grad_norm": 8.994114875793457,
1303
+ "learning_rate": 4.861111111111111e-06,
1304
+ "loss": 0.2164,
1305
+ "step": 4500
1306
+ },
1307
+ {
1308
+ "epoch": 1.81,
1309
+ "grad_norm": 8.589949607849121,
1310
+ "learning_rate": 4.8263888888888895e-06,
1311
+ "loss": 0.2062,
1312
+ "step": 4525
1313
+ },
1314
+ {
1315
+ "epoch": 1.8199999999999998,
1316
+ "grad_norm": 6.32391881942749,
1317
+ "learning_rate": 4.791666666666668e-06,
1318
+ "loss": 0.2476,
1319
+ "step": 4550
1320
+ },
1321
+ {
1322
+ "epoch": 1.83,
1323
+ "grad_norm": 7.506887435913086,
1324
+ "learning_rate": 4.756944444444445e-06,
1325
+ "loss": 0.2243,
1326
+ "step": 4575
1327
+ },
1328
+ {
1329
+ "epoch": 1.8399999999999999,
1330
+ "grad_norm": 5.173837661743164,
1331
+ "learning_rate": 4.722222222222222e-06,
1332
+ "loss": 0.1789,
1333
+ "step": 4600
1334
+ },
1335
+ {
1336
+ "epoch": 1.85,
1337
+ "grad_norm": 7.828786849975586,
1338
+ "learning_rate": 4.6875000000000004e-06,
1339
+ "loss": 0.2138,
1340
+ "step": 4625
1341
+ },
1342
+ {
1343
+ "epoch": 1.8599999999999999,
1344
+ "grad_norm": 6.593687057495117,
1345
+ "learning_rate": 4.652777777777779e-06,
1346
+ "loss": 0.2238,
1347
+ "step": 4650
1348
+ },
1349
+ {
1350
+ "epoch": 1.87,
1351
+ "grad_norm": 8.0968599319458,
1352
+ "learning_rate": 4.618055555555556e-06,
1353
+ "loss": 0.2038,
1354
+ "step": 4675
1355
+ },
1356
+ {
1357
+ "epoch": 1.88,
1358
+ "grad_norm": 7.589450359344482,
1359
+ "learning_rate": 4.583333333333333e-06,
1360
+ "loss": 0.203,
1361
+ "step": 4700
1362
+ },
1363
+ {
1364
+ "epoch": 1.8900000000000001,
1365
+ "grad_norm": 10.24547004699707,
1366
+ "learning_rate": 4.548611111111111e-06,
1367
+ "loss": 0.2468,
1368
+ "step": 4725
1369
+ },
1370
+ {
1371
+ "epoch": 1.9,
1372
+ "grad_norm": 6.878439426422119,
1373
+ "learning_rate": 4.5138888888888895e-06,
1374
+ "loss": 0.1983,
1375
+ "step": 4750
1376
+ },
1377
+ {
1378
+ "epoch": 1.9100000000000001,
1379
+ "grad_norm": 5.918954372406006,
1380
+ "learning_rate": 4.479166666666667e-06,
1381
+ "loss": 0.1921,
1382
+ "step": 4775
1383
+ },
1384
+ {
1385
+ "epoch": 1.92,
1386
+ "grad_norm": 10.184906005859375,
1387
+ "learning_rate": 4.444444444444444e-06,
1388
+ "loss": 0.1898,
1389
+ "step": 4800
1390
+ },
1391
+ {
1392
+ "epoch": 1.9300000000000002,
1393
+ "grad_norm": 8.617950439453125,
1394
+ "learning_rate": 4.409722222222222e-06,
1395
+ "loss": 0.1916,
1396
+ "step": 4825
1397
+ },
1398
+ {
1399
+ "epoch": 1.94,
1400
+ "grad_norm": 5.356297969818115,
1401
+ "learning_rate": 4.3750000000000005e-06,
1402
+ "loss": 0.1844,
1403
+ "step": 4850
1404
+ },
1405
+ {
1406
+ "epoch": 1.95,
1407
+ "grad_norm": 6.3137030601501465,
1408
+ "learning_rate": 4.340277777777779e-06,
1409
+ "loss": 0.2078,
1410
+ "step": 4875
1411
+ },
1412
+ {
1413
+ "epoch": 1.96,
1414
+ "grad_norm": 5.769162178039551,
1415
+ "learning_rate": 4.305555555555556e-06,
1416
+ "loss": 0.2193,
1417
+ "step": 4900
1418
+ },
1419
+ {
1420
+ "epoch": 1.97,
1421
+ "grad_norm": 8.022501945495605,
1422
+ "learning_rate": 4.270833333333333e-06,
1423
+ "loss": 0.2296,
1424
+ "step": 4925
1425
+ },
1426
+ {
1427
+ "epoch": 1.98,
1428
+ "grad_norm": 6.754506587982178,
1429
+ "learning_rate": 4.236111111111111e-06,
1430
+ "loss": 0.226,
1431
+ "step": 4950
1432
+ },
1433
+ {
1434
+ "epoch": 1.99,
1435
+ "grad_norm": 6.45380163192749,
1436
+ "learning_rate": 4.2013888888888896e-06,
1437
+ "loss": 0.1871,
1438
+ "step": 4975
1439
+ },
1440
+ {
1441
+ "epoch": 2.0,
1442
+ "grad_norm": 5.92453145980835,
1443
+ "learning_rate": 4.166666666666667e-06,
1444
+ "loss": 0.2223,
1445
+ "step": 5000
1446
+ },
1447
+ {
1448
+ "epoch": 2.0,
1449
+ "eval_cer": 17.193157697590213,
1450
+ "eval_loss": 0.29516366124153137,
1451
+ "eval_runtime": 1741.8319,
1452
+ "eval_samples_per_second": 2.26,
1453
+ "eval_steps_per_second": 0.282,
1454
+ "step": 5000
1455
  }
1456
  ],
1457
  "logging_steps": 25,
1458
+ "max_steps": 8000,
1459
  "num_input_tokens_seen": 0,
1460
+ "num_train_epochs": 4,
1461
  "save_steps": 1000,
1462
+ "total_flos": 2.30868320256e+19,
1463
  "train_batch_size": 8,
1464
  "trial_name": null,
1465
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5db4033f6b868aaeb993204292a7f37e97e009a90ed66631bd9548f433d7f150
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3de5b9bda0f933887f039c8c86c5054848a6cee2f3d7f8f9e5d030a92f7f5ad
3
  size 5176