tarkarninswave's picture
Upload 4 files
2561917 verified
raw
history blame contribute delete
No virus
147 kB
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3476365312.0,
"BitsPerParam": 4.500495468534268
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 160694272,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
78464,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 160694272,
"byteOffset": 0
}
],
"md5sum": "c3c10550975219b982e13f308e8cbeaf"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "00267217d2fade8086f6dafe14826787"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "24e346f5ed8eb9e2465c92da680eb282"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fd213415ad31a193ca5820c117c20389"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 31391744,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
78464,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20086784,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20086784
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20094976
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22913024
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22921216
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 22929408
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 25747456
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 31383552
}
],
"md5sum": "69e7862f76be93aa6c1614082a8091db"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2f94a02294efb0558a00418a9c4fbdcf"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "dacaf6929a6ff739bedcb9838287d7ac"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "3fc31d339e4b3af6d9eb657c08c36210"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "9e4db364d0df84ebd315751494f51e49"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e4286da983c3e5fd23281cb02bf7a87c"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "7c0edb5fd2ab96ecf16f5aacf85933ba"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "43125249c5f0cec8cf843f6a8070800a"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "a8159c5c06a9c04fd1f8877bf4d72503"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "115c9c64be50493b7c5b991dd0f61d7b"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "33fb88810bdd4e5875a7ee70301850e6"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9c7037df839a741ad0c53fde12cab4cc"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "f748cd8a270cb1306ca0cc7b4bc77557"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 160694272,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
78464,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 160694272,
"byteOffset": 0
}
],
"md5sum": "66baa8abc7e0675bea840c71fa31e705"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 20086784,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
78464,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20086784,
"byteOffset": 0
}
],
"md5sum": "9904aba041af140a789416cab4d8f410"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f016608be854f71aebd54f9889080d5d"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "90238c4482c7ba6b626d4537e389e427"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29712384,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21241856
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21250048
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24068096
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29704192
}
],
"md5sum": "e6024d026665d8953e8495db8ae64000"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "137417db040a5bc38dd7ac6afdfe93bb"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "018626d3dba1e8248d722db8ccfe7fde"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "55be87cf52a022f27c9fa8c5acc8492a"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "85df2cff8a1b802f9f7ca47b6ce659a0"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4fa344e70b731917089c8adb3d8ef48a"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "6f12c6c59e110169b1b00a55ae934183"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e401f1acd344728f90e7e080b3267a70"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "551d98395d5f36332a3e819d49666259"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "208f70f5d39f69292101878b5fbc2e63"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "92ff7f770b2f0942fedd421e25819548"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 21233664
}
],
"md5sum": "66b8802dd45d9067f1a9402c6c2223bb"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a54b6d0c41e8677d9220a9720b8e7c20"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f8fdf1b6eba7af4fbdcfd4bf5b42c711"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "98870e4cd622d16e1a447e4a5a1924f7"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d7d479357f7569fe2f996c109a6c7916"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "6fdbf07a5878c4620160a94004171d23"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "e77f52336173714cffb65de585a37102"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a1a6e0c35a00b8c064c123fd760ee010"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "db893f60f0c41de0407d92a83ee6897e"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "d51adc9743e3deb7b233340333e17122"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "cc1949ff678e27e2ccadb1cb6ea720bd"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "d89fe8ec5e2cd634eb540292bd720a16"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "56ea6b787533188136fb6fd3d7852a37"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6e40b5fba91ac43117646e317124ef64"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c626632d4e83e7f0ee38fd1e9d8e34de"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "f648c90c61becc0b1379168a47ae5d20"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "23d16d6fd05f7b1410d48d4aa43d7bfd"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "73f86b105442ccbb9f0eb1550702d6fb"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "e52a7072eac1cefa271635df0d069c7b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8cbed6c63f9e4add4f4dce4b75a210eb"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "0dc46614265638efd056f9ff4f48c8bd"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "ada3fe3fc7c1733bbaaf57d2e0f03ee4"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "94e6fdb168a466356b9d8cd9a2ece439"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2049b892450606e9e28b5ba9211c0296"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "ef062e60fa60c45ddd2390686252e0fd"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "74b2c46929563e4eb7a42f4f35b2aeef"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "244cd9de0a448eb194c7289c0a9e34e7"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ead3b6e1abdc629a4e178683aa74a904"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 32538624,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24059904
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24068096
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 24076288
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26894336
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32530432
}
],
"md5sum": "051d6cd8375814ed975ecba4443de654"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a90e955fdeca6079ac676951d372d847"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2eb8210b7379355ae97012218fd45359"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "b2ffb7bd2258661387d7b46963b318ce"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2beb04604c468fbf5ca32b0fd76a2a42"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "54fe70cd251bf5403c8759f4e0acc71c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "2dae5c5d2cc736f57299922565710ba6"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e67743ac7053477c24812daae1620897"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "01ed83f3e2846ae66ad741a81df8a556"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "43498c8bd250344aa409c22e01a58337"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "bec359ffdaa3243de0b24e041d811a25"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f00c538d036e7f01ddc1b69462d4e797"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "14f32e57bba26c048a22622bc722c933"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "78bda738093ebd93d77b0b326906ccd0"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ad3629d5fbd476a77e5ff102477855b7"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "864bd5fe4f0dbc99d5a14e37b7da08a7"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6fe398fc19102e36e1e13a6d2e69be52"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4472fd33b78c16f65d9408d5c8f1ea8c"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "725001633ed51eb2eabafe5c9a42b225"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "af3b7f92a448c3811e234a90221c91f1"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fbdebc6e520ff6bb8cf52ee598782f87"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "b12ea89c44fd5731a623d0babcd8b55b"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7ec67b50df885233865630ae312c4044"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "626ce0d8167d9cd0d07ba73201ddf3ad"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "15085367b7473fb5133349947b08887b"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7db08cd01aeb3a792a305e5171da046c"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7380847032f3fee654a9db288a8894d0"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "65a54f5cf9cf4047501e2e5d0d887055"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "326f70ab1373ba1917267018405f9ea7"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "753082ba08d24363d0db337789192778"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "b015e5cd95d241bddb179c5aeb518fc1"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b3839ce6d5dc5cdd3416042529cef345"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "d20ae3fd74a102d784217be6aed2e693"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "b48aee2a08ddcc96b74f512adca112df"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6560805cb38b3f95749aefbd4fa80a21"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ac4f7941f4168b0054d5f8589f5601d7"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 29704192,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21233664
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21241856
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 24059904
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29696000
}
],
"md5sum": "c18a79e53fabab9cb20fa8cb3d753ca6"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "d5b8d6f8e6cabed006e2533fd8c68c49"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 21233664
}
],
"md5sum": "1f090aed7652526d5bb0747b4cdb1ba5"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11796480
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20185088
}
],
"md5sum": "c484f05f9b8f3c3f7ba47e61835602bd"
}
]
}