diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4319 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3476365312.0, + "BitsPerParam": 4.500495468534268 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 160694272, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 78464, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 160694272, + "byteOffset": 0 + } + ], + "md5sum": "c3c10550975219b982e13f308e8cbeaf" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "00267217d2fade8086f6dafe14826787" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "24e346f5ed8eb9e2465c92da680eb282" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "fd213415ad31a193ca5820c117c20389" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 31391744, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 78464, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20086784, + "byteOffset": 0 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20086784 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20094976 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22913024 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22921216 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 22929408 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 25747456 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31383552 + } + ], + "md5sum": "69e7862f76be93aa6c1614082a8091db" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2f94a02294efb0558a00418a9c4fbdcf" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "dacaf6929a6ff739bedcb9838287d7ac" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "3fc31d339e4b3af6d9eb657c08c36210" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "9e4db364d0df84ebd315751494f51e49" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "e4286da983c3e5fd23281cb02bf7a87c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "7c0edb5fd2ab96ecf16f5aacf85933ba" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "43125249c5f0cec8cf843f6a8070800a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a8159c5c06a9c04fd1f8877bf4d72503" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "115c9c64be50493b7c5b991dd0f61d7b" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "33fb88810bdd4e5875a7ee70301850e6" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9c7037df839a741ad0c53fde12cab4cc" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "f748cd8a270cb1306ca0cc7b4bc77557" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 160694272, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 78464, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 160694272, + "byteOffset": 0 + } + ], + "md5sum": "66baa8abc7e0675bea840c71fa31e705" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 20086784, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 78464, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20086784, + "byteOffset": 0 + } + ], + "md5sum": "9904aba041af140a789416cab4d8f410" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f016608be854f71aebd54f9889080d5d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "90238c4482c7ba6b626d4537e389e427" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29712384, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21241856 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21250048 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24068096 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29704192 + } + ], + "md5sum": "e6024d026665d8953e8495db8ae64000" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "137417db040a5bc38dd7ac6afdfe93bb" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "018626d3dba1e8248d722db8ccfe7fde" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "55be87cf52a022f27c9fa8c5acc8492a" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "85df2cff8a1b802f9f7ca47b6ce659a0" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "4fa344e70b731917089c8adb3d8ef48a" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "6f12c6c59e110169b1b00a55ae934183" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e401f1acd344728f90e7e080b3267a70" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "551d98395d5f36332a3e819d49666259" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "208f70f5d39f69292101878b5fbc2e63" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "92ff7f770b2f0942fedd421e25819548" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 21233664 + } + ], + "md5sum": "66b8802dd45d9067f1a9402c6c2223bb" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "a54b6d0c41e8677d9220a9720b8e7c20" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f8fdf1b6eba7af4fbdcfd4bf5b42c711" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "98870e4cd622d16e1a447e4a5a1924f7" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "d7d479357f7569fe2f996c109a6c7916" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6fdbf07a5878c4620160a94004171d23" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "e77f52336173714cffb65de585a37102" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "a1a6e0c35a00b8c064c123fd760ee010" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "db893f60f0c41de0407d92a83ee6897e" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "d51adc9743e3deb7b233340333e17122" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "cc1949ff678e27e2ccadb1cb6ea720bd" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d89fe8ec5e2cd634eb540292bd720a16" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "56ea6b787533188136fb6fd3d7852a37" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6e40b5fba91ac43117646e317124ef64" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c626632d4e83e7f0ee38fd1e9d8e34de" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "f648c90c61becc0b1379168a47ae5d20" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "23d16d6fd05f7b1410d48d4aa43d7bfd" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "73f86b105442ccbb9f0eb1550702d6fb" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "e52a7072eac1cefa271635df0d069c7b" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "8cbed6c63f9e4add4f4dce4b75a210eb" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "0dc46614265638efd056f9ff4f48c8bd" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "ada3fe3fc7c1733bbaaf57d2e0f03ee4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "94e6fdb168a466356b9d8cd9a2ece439" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2049b892450606e9e28b5ba9211c0296" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "ef062e60fa60c45ddd2390686252e0fd" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "74b2c46929563e4eb7a42f4f35b2aeef" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "244cd9de0a448eb194c7289c0a9e34e7" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ead3b6e1abdc629a4e178683aa74a904" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32538624, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24059904 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24068096 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 24076288 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26894336 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32530432 + } + ], + "md5sum": "051d6cd8375814ed975ecba4443de654" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "a90e955fdeca6079ac676951d372d847" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2eb8210b7379355ae97012218fd45359" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "b2ffb7bd2258661387d7b46963b318ce" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2beb04604c468fbf5ca32b0fd76a2a42" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "54fe70cd251bf5403c8759f4e0acc71c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "2dae5c5d2cc736f57299922565710ba6" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e67743ac7053477c24812daae1620897" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "01ed83f3e2846ae66ad741a81df8a556" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "43498c8bd250344aa409c22e01a58337" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "bec359ffdaa3243de0b24e041d811a25" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f00c538d036e7f01ddc1b69462d4e797" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "14f32e57bba26c048a22622bc722c933" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "78bda738093ebd93d77b0b326906ccd0" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ad3629d5fbd476a77e5ff102477855b7" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "864bd5fe4f0dbc99d5a14e37b7da08a7" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6fe398fc19102e36e1e13a6d2e69be52" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "4472fd33b78c16f65d9408d5c8f1ea8c" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "725001633ed51eb2eabafe5c9a42b225" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "af3b7f92a448c3811e234a90221c91f1" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "fbdebc6e520ff6bb8cf52ee598782f87" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "b12ea89c44fd5731a623d0babcd8b55b" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7ec67b50df885233865630ae312c4044" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "626ce0d8167d9cd0d07ba73201ddf3ad" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "15085367b7473fb5133349947b08887b" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7db08cd01aeb3a792a305e5171da046c" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "7380847032f3fee654a9db288a8894d0" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "65a54f5cf9cf4047501e2e5d0d887055" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "326f70ab1373ba1917267018405f9ea7" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "753082ba08d24363d0db337789192778" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "b015e5cd95d241bddb179c5aeb518fc1" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b3839ce6d5dc5cdd3416042529cef345" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d20ae3fd74a102d784217be6aed2e693" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "b48aee2a08ddcc96b74f512adca112df" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6560805cb38b3f95749aefbd4fa80a21" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ac4f7941f4168b0054d5f8589f5601d7" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 29704192, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21241856 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24059904 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29696000 + } + ], + "md5sum": "c18a79e53fabab9cb20fa8cb3d753ca6" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d5b8d6f8e6cabed006e2533fd8c68c49" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 21233664 + } + ], + "md5sum": "1f090aed7652526d5bb0747b4cdb1ba5" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 10485760 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11796480 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20185088 + } + ], + "md5sum": "c484f05f9b8f3c3f7ba47e61835602bd" + } + ] +} \ No newline at end of file