yuweiiizz commited on
Commit
6ae13ba
1 Parent(s): da488c1

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7557404827630580061cacbdd8f26d0d44a02e5df077ae8459b8a8446eb057a
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac71f6104740cd4c6f5b2f0a4f1d312da1f27f2b28f81745260c3b9565e2c13f
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40065ff3d994eb26a33f8c24383aff7f13d2afaa2a976d0aaefce0b59d43194b
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa70ddee520870b9de82d68cf42fd025e0d52691a3ae84d400a9cf6b5eefc95d
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ada5f6f7cb1b6a49d79d11cd5642321498733c76d6eb8ca5030fe74fa4bc331
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bcd75decc8ec809bdd000c1a023eecd569d9a9775fe640822926fa2ab60021b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38c7bcc6d095cc304fbc9e04d83745ba25161601bac81ac7ac1f4f775235d730
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d64c6a32c80af160202850f4c0903e8541ea45c1c0ea5e8f7b7cbe62296d013
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 16.889332833895203,
3
- "best_model_checkpoint": "./whisper-small-taiwanese-hanzi/checkpoint-7000",
4
- "epoch": 2.8,
5
  "eval_steps": 1000,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2030,6 +2030,295 @@
2030
  "eval_samples_per_second": 2.241,
2031
  "eval_steps_per_second": 0.28,
2032
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2033
  }
2034
  ],
2035
  "logging_steps": 25,
@@ -2037,7 +2326,7 @@
2037
  "num_input_tokens_seen": 0,
2038
  "num_train_epochs": 4,
2039
  "save_steps": 1000,
2040
- "total_flos": 3.232156483584e+19,
2041
  "train_batch_size": 8,
2042
  "trial_name": null,
2043
  "trial_params": null
 
1
  {
2
+ "best_metric": 16.252549215465933,
3
+ "best_model_checkpoint": "./whisper-small-taiwanese-hanzi/checkpoint-8000",
4
+ "epoch": 3.2,
5
  "eval_steps": 1000,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2030
  "eval_samples_per_second": 2.241,
2031
  "eval_steps_per_second": 0.28,
2032
  "step": 7000
2033
+ },
2034
+ {
2035
+ "epoch": 2.81,
2036
+ "grad_norm": 4.931020736694336,
2037
+ "learning_rate": 1.3541666666666667e-06,
2038
+ "loss": 0.1069,
2039
+ "step": 7025
2040
+ },
2041
+ {
2042
+ "epoch": 2.82,
2043
+ "grad_norm": 4.510303497314453,
2044
+ "learning_rate": 1.3194444444444446e-06,
2045
+ "loss": 0.1088,
2046
+ "step": 7050
2047
+ },
2048
+ {
2049
+ "epoch": 2.83,
2050
+ "grad_norm": 6.164842128753662,
2051
+ "learning_rate": 1.2847222222222222e-06,
2052
+ "loss": 0.1168,
2053
+ "step": 7075
2054
+ },
2055
+ {
2056
+ "epoch": 2.84,
2057
+ "grad_norm": 4.769392490386963,
2058
+ "learning_rate": 1.25e-06,
2059
+ "loss": 0.1235,
2060
+ "step": 7100
2061
+ },
2062
+ {
2063
+ "epoch": 2.85,
2064
+ "grad_norm": 5.1608710289001465,
2065
+ "learning_rate": 1.2152777777777778e-06,
2066
+ "loss": 0.1068,
2067
+ "step": 7125
2068
+ },
2069
+ {
2070
+ "epoch": 2.86,
2071
+ "grad_norm": 6.107107639312744,
2072
+ "learning_rate": 1.1805555555555556e-06,
2073
+ "loss": 0.1213,
2074
+ "step": 7150
2075
+ },
2076
+ {
2077
+ "epoch": 2.87,
2078
+ "grad_norm": 5.863134860992432,
2079
+ "learning_rate": 1.1458333333333333e-06,
2080
+ "loss": 0.1087,
2081
+ "step": 7175
2082
+ },
2083
+ {
2084
+ "epoch": 2.88,
2085
+ "grad_norm": 4.510376453399658,
2086
+ "learning_rate": 1.111111111111111e-06,
2087
+ "loss": 0.1246,
2088
+ "step": 7200
2089
+ },
2090
+ {
2091
+ "epoch": 2.89,
2092
+ "grad_norm": 6.96931791305542,
2093
+ "learning_rate": 1.076388888888889e-06,
2094
+ "loss": 0.1113,
2095
+ "step": 7225
2096
+ },
2097
+ {
2098
+ "epoch": 2.9,
2099
+ "grad_norm": 6.812278747558594,
2100
+ "learning_rate": 1.0416666666666667e-06,
2101
+ "loss": 0.1392,
2102
+ "step": 7250
2103
+ },
2104
+ {
2105
+ "epoch": 2.91,
2106
+ "grad_norm": 5.756344318389893,
2107
+ "learning_rate": 1.0069444444444447e-06,
2108
+ "loss": 0.0846,
2109
+ "step": 7275
2110
+ },
2111
+ {
2112
+ "epoch": 2.92,
2113
+ "grad_norm": 3.710134506225586,
2114
+ "learning_rate": 9.722222222222224e-07,
2115
+ "loss": 0.101,
2116
+ "step": 7300
2117
+ },
2118
+ {
2119
+ "epoch": 2.93,
2120
+ "grad_norm": 6.572783946990967,
2121
+ "learning_rate": 9.375000000000001e-07,
2122
+ "loss": 0.1141,
2123
+ "step": 7325
2124
+ },
2125
+ {
2126
+ "epoch": 2.94,
2127
+ "grad_norm": 8.185981750488281,
2128
+ "learning_rate": 9.027777777777779e-07,
2129
+ "loss": 0.1207,
2130
+ "step": 7350
2131
+ },
2132
+ {
2133
+ "epoch": 2.95,
2134
+ "grad_norm": 4.434142589569092,
2135
+ "learning_rate": 8.680555555555556e-07,
2136
+ "loss": 0.1206,
2137
+ "step": 7375
2138
+ },
2139
+ {
2140
+ "epoch": 2.96,
2141
+ "grad_norm": 4.396365165710449,
2142
+ "learning_rate": 8.333333333333333e-07,
2143
+ "loss": 0.1235,
2144
+ "step": 7400
2145
+ },
2146
+ {
2147
+ "epoch": 2.9699999999999998,
2148
+ "grad_norm": 8.427244186401367,
2149
+ "learning_rate": 7.986111111111111e-07,
2150
+ "loss": 0.1131,
2151
+ "step": 7425
2152
+ },
2153
+ {
2154
+ "epoch": 2.98,
2155
+ "grad_norm": 5.311284065246582,
2156
+ "learning_rate": 7.63888888888889e-07,
2157
+ "loss": 0.1141,
2158
+ "step": 7450
2159
+ },
2160
+ {
2161
+ "epoch": 2.99,
2162
+ "grad_norm": 3.4007761478424072,
2163
+ "learning_rate": 7.291666666666667e-07,
2164
+ "loss": 0.1031,
2165
+ "step": 7475
2166
+ },
2167
+ {
2168
+ "epoch": 3.0,
2169
+ "grad_norm": 4.035045146942139,
2170
+ "learning_rate": 6.944444444444446e-07,
2171
+ "loss": 0.0908,
2172
+ "step": 7500
2173
+ },
2174
+ {
2175
+ "epoch": 3.01,
2176
+ "grad_norm": 2.659799337387085,
2177
+ "learning_rate": 6.597222222222223e-07,
2178
+ "loss": 0.061,
2179
+ "step": 7525
2180
+ },
2181
+ {
2182
+ "epoch": 3.02,
2183
+ "grad_norm": 3.8670284748077393,
2184
+ "learning_rate": 6.25e-07,
2185
+ "loss": 0.0687,
2186
+ "step": 7550
2187
+ },
2188
+ {
2189
+ "epoch": 3.03,
2190
+ "grad_norm": 3.3453280925750732,
2191
+ "learning_rate": 5.902777777777778e-07,
2192
+ "loss": 0.0619,
2193
+ "step": 7575
2194
+ },
2195
+ {
2196
+ "epoch": 3.04,
2197
+ "grad_norm": 4.7413249015808105,
2198
+ "learning_rate": 5.555555555555555e-07,
2199
+ "loss": 0.0701,
2200
+ "step": 7600
2201
+ },
2202
+ {
2203
+ "epoch": 3.05,
2204
+ "grad_norm": 3.685469627380371,
2205
+ "learning_rate": 5.208333333333334e-07,
2206
+ "loss": 0.0712,
2207
+ "step": 7625
2208
+ },
2209
+ {
2210
+ "epoch": 3.06,
2211
+ "grad_norm": 1.9484667778015137,
2212
+ "learning_rate": 4.861111111111112e-07,
2213
+ "loss": 0.0699,
2214
+ "step": 7650
2215
+ },
2216
+ {
2217
+ "epoch": 3.07,
2218
+ "grad_norm": 4.542367458343506,
2219
+ "learning_rate": 4.5138888888888893e-07,
2220
+ "loss": 0.0608,
2221
+ "step": 7675
2222
+ },
2223
+ {
2224
+ "epoch": 3.08,
2225
+ "grad_norm": 3.3884780406951904,
2226
+ "learning_rate": 4.1666666666666667e-07,
2227
+ "loss": 0.0629,
2228
+ "step": 7700
2229
+ },
2230
+ {
2231
+ "epoch": 3.09,
2232
+ "grad_norm": 3.8800745010375977,
2233
+ "learning_rate": 3.819444444444445e-07,
2234
+ "loss": 0.0627,
2235
+ "step": 7725
2236
+ },
2237
+ {
2238
+ "epoch": 3.1,
2239
+ "grad_norm": 3.6111207008361816,
2240
+ "learning_rate": 3.472222222222223e-07,
2241
+ "loss": 0.0616,
2242
+ "step": 7750
2243
+ },
2244
+ {
2245
+ "epoch": 3.11,
2246
+ "grad_norm": 4.225738525390625,
2247
+ "learning_rate": 3.125e-07,
2248
+ "loss": 0.0701,
2249
+ "step": 7775
2250
+ },
2251
+ {
2252
+ "epoch": 3.12,
2253
+ "grad_norm": 4.790248394012451,
2254
+ "learning_rate": 2.7777777777777776e-07,
2255
+ "loss": 0.0761,
2256
+ "step": 7800
2257
+ },
2258
+ {
2259
+ "epoch": 3.13,
2260
+ "grad_norm": 5.802876949310303,
2261
+ "learning_rate": 2.430555555555556e-07,
2262
+ "loss": 0.0647,
2263
+ "step": 7825
2264
+ },
2265
+ {
2266
+ "epoch": 3.14,
2267
+ "grad_norm": 4.658420085906982,
2268
+ "learning_rate": 2.0833333333333333e-07,
2269
+ "loss": 0.0605,
2270
+ "step": 7850
2271
+ },
2272
+ {
2273
+ "epoch": 3.15,
2274
+ "grad_norm": 3.0659847259521484,
2275
+ "learning_rate": 1.7361111111111115e-07,
2276
+ "loss": 0.062,
2277
+ "step": 7875
2278
+ },
2279
+ {
2280
+ "epoch": 3.16,
2281
+ "grad_norm": 2.227428436279297,
2282
+ "learning_rate": 1.3888888888888888e-07,
2283
+ "loss": 0.0572,
2284
+ "step": 7900
2285
+ },
2286
+ {
2287
+ "epoch": 3.17,
2288
+ "grad_norm": 2.0126748085021973,
2289
+ "learning_rate": 1.0416666666666667e-07,
2290
+ "loss": 0.056,
2291
+ "step": 7925
2292
+ },
2293
+ {
2294
+ "epoch": 3.18,
2295
+ "grad_norm": 5.308594703674316,
2296
+ "learning_rate": 6.944444444444444e-08,
2297
+ "loss": 0.0728,
2298
+ "step": 7950
2299
+ },
2300
+ {
2301
+ "epoch": 3.19,
2302
+ "grad_norm": 2.8052618503570557,
2303
+ "learning_rate": 3.472222222222222e-08,
2304
+ "loss": 0.0744,
2305
+ "step": 7975
2306
+ },
2307
+ {
2308
+ "epoch": 3.2,
2309
+ "grad_norm": 3.2474498748779297,
2310
+ "learning_rate": 0.0,
2311
+ "loss": 0.0678,
2312
+ "step": 8000
2313
+ },
2314
+ {
2315
+ "epoch": 3.2,
2316
+ "eval_cer": 16.252549215465933,
2317
+ "eval_loss": 0.2705218493938446,
2318
+ "eval_runtime": 1747.8851,
2319
+ "eval_samples_per_second": 2.252,
2320
+ "eval_steps_per_second": 0.281,
2321
+ "step": 8000
2322
  }
2323
  ],
2324
  "logging_steps": 25,
 
2326
  "num_input_tokens_seen": 0,
2327
  "num_train_epochs": 4,
2328
  "save_steps": 1000,
2329
+ "total_flos": 3.693893124096e+19,
2330
  "train_batch_size": 8,
2331
  "trial_name": null,
2332
  "trial_params": null