CharlieFRuan commited on
Commit
8d6d746
1 Parent(s): 75f03a6

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
mlc-chat-config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "gemma2",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_size": 2304,
7
+ "intermediate_size": 9216,
8
+ "attention_bias": false,
9
+ "num_attention_heads": 8,
10
+ "num_key_value_heads": 4,
11
+ "head_dim": 256,
12
+ "num_hidden_layers": 26,
13
+ "rms_norm_eps": 1e-06,
14
+ "vocab_size": 256000,
15
+ "hidden_activation": "gelu_pytorch_tanh",
16
+ "position_embedding_base": 10000.0,
17
+ "context_window_size": 4096,
18
+ "prefill_chunk_size": 2048,
19
+ "tensor_parallel_shards": 1,
20
+ "max_batch_size": 80,
21
+ "attn_logit_softcapping": 50.0,
22
+ "final_logit_softcapping": 30.0,
23
+ "query_pre_attn_scalar": 256,
24
+ "sliding_window": 4096
25
+ },
26
+ "vocab_size": 256000,
27
+ "context_window_size": 4096,
28
+ "sliding_window_size": -1,
29
+ "prefill_chunk_size": 2048,
30
+ "attention_sink_size": -1,
31
+ "tensor_parallel_shards": 1,
32
+ "temperature": 1.0,
33
+ "presence_penalty": 0.0,
34
+ "frequency_penalty": 0.0,
35
+ "repetition_penalty": 1.0,
36
+ "top_p": 1.0,
37
+ "tokenizer_files": [
38
+ "tokenizer.model",
39
+ "tokenizer.json",
40
+ "tokenizer_config.json"
41
+ ],
42
+ "tokenizer_info": {
43
+ "token_postproc_method": "byte_fallback",
44
+ "prepend_space_in_encode": false,
45
+ "strip_space_in_decode": false
46
+ },
47
+ "conv_template": {
48
+ "name": "gemma_instruction",
49
+ "system_template": "{system_message}",
50
+ "system_message": "",
51
+ "system_prefix_token_ids": [
52
+ 2
53
+ ],
54
+ "add_role_after_system_message": true,
55
+ "roles": {
56
+ "user": "<start_of_turn>user",
57
+ "assistant": "<start_of_turn>model"
58
+ },
59
+ "role_templates": {
60
+ "user": "{user_message}",
61
+ "assistant": "{assistant_message}",
62
+ "tool": "{tool_message}"
63
+ },
64
+ "messages": [],
65
+ "seps": [
66
+ "<end_of_turn>\n"
67
+ ],
68
+ "role_content_sep": "\n",
69
+ "role_empty_sep": "\n",
70
+ "stop_str": [
71
+ "<end_of_turn>"
72
+ ],
73
+ "stop_token_ids": [
74
+ 1,
75
+ 107
76
+ ],
77
+ "function_string": "",
78
+ "use_function_calling": false
79
+ },
80
+ "pad_token_id": 0,
81
+ "bos_token_id": 2,
82
+ "eos_token_id": [
83
+ 1,
84
+ 107
85
+ ]
86
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2854 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 210,
4
+ "ParamBytes": 5228683776.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 1179648000,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.weight",
15
+ "shape": [
16
+ 256000,
17
+ 2304
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 1179648000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "bd04f657e68800e39b6f5f375441ab1c"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 42467328,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.mlp.down_proj.weight",
34
+ "shape": [
35
+ 2304,
36
+ 9216
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 42467328,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "3cd51ffed8bfd0b4138d1aeead787cea"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 84934656,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 18432,
55
+ 2304
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 84934656,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "a05426adc350ec2c79e4bc61d70d60c1"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 42467328,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.1.mlp.down_proj.weight",
72
+ "shape": [
73
+ 2304,
74
+ 9216
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 42467328,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "01352f4651a8dae405b56910978034a7"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 84934656,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
91
+ "shape": [
92
+ 18432,
93
+ 2304
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 84934656,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "1ba6e5b23e2819379762052538555e2d"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 18874368,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
110
+ "shape": [
111
+ 4096,
112
+ 2304
113
+ ],
114
+ "dtype": "float16",
115
+ "format": "f32-to-bf16",
116
+ "nbytes": 18874368,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "ac52e6fa05a81436cc20a682bb31b01a"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 28348416,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.0.input_layernorm.weight",
129
+ "shape": [
130
+ 2304
131
+ ],
132
+ "dtype": "float16",
133
+ "format": "f32-to-bf16",
134
+ "nbytes": 4608,
135
+ "byteOffset": 0
136
+ },
137
+ {
138
+ "name": "model.layers.0.post_attention_layernorm.weight",
139
+ "shape": [
140
+ 2304
141
+ ],
142
+ "dtype": "float16",
143
+ "format": "f32-to-bf16",
144
+ "nbytes": 4608,
145
+ "byteOffset": 4608
146
+ },
147
+ {
148
+ "name": "model.layers.0.post_feedforward_layernorm.weight",
149
+ "shape": [
150
+ 2304
151
+ ],
152
+ "dtype": "float16",
153
+ "format": "f32-to-bf16",
154
+ "nbytes": 4608,
155
+ "byteOffset": 9216
156
+ },
157
+ {
158
+ "name": "model.layers.0.pre_feedforward_layernorm.weight",
159
+ "shape": [
160
+ 2304
161
+ ],
162
+ "dtype": "float16",
163
+ "format": "f32-to-bf16",
164
+ "nbytes": 4608,
165
+ "byteOffset": 13824
166
+ },
167
+ {
168
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
169
+ "shape": [
170
+ 4096,
171
+ 2304
172
+ ],
173
+ "dtype": "float16",
174
+ "format": "f32-to-bf16",
175
+ "nbytes": 18874368,
176
+ "byteOffset": 18432
177
+ },
178
+ {
179
+ "name": "model.layers.0.self_attn.o_proj.weight",
180
+ "shape": [
181
+ 2304,
182
+ 2048
183
+ ],
184
+ "dtype": "float16",
185
+ "format": "f32-to-bf16",
186
+ "nbytes": 9437184,
187
+ "byteOffset": 18892800
188
+ },
189
+ {
190
+ "name": "model.layers.1.input_layernorm.weight",
191
+ "shape": [
192
+ 2304
193
+ ],
194
+ "dtype": "float16",
195
+ "format": "f32-to-bf16",
196
+ "nbytes": 4608,
197
+ "byteOffset": 28329984
198
+ },
199
+ {
200
+ "name": "model.layers.1.post_attention_layernorm.weight",
201
+ "shape": [
202
+ 2304
203
+ ],
204
+ "dtype": "float16",
205
+ "format": "f32-to-bf16",
206
+ "nbytes": 4608,
207
+ "byteOffset": 28334592
208
+ },
209
+ {
210
+ "name": "model.layers.1.post_feedforward_layernorm.weight",
211
+ "shape": [
212
+ 2304
213
+ ],
214
+ "dtype": "float16",
215
+ "format": "f32-to-bf16",
216
+ "nbytes": 4608,
217
+ "byteOffset": 28339200
218
+ },
219
+ {
220
+ "name": "model.layers.1.pre_feedforward_layernorm.weight",
221
+ "shape": [
222
+ 2304
223
+ ],
224
+ "dtype": "float16",
225
+ "format": "f32-to-bf16",
226
+ "nbytes": 4608,
227
+ "byteOffset": 28343808
228
+ }
229
+ ],
230
+ "md5sum": "4baeb6ab99e46e08140f6d42a10a136f"
231
+ },
232
+ {
233
+ "dataPath": "params_shard_7.bin",
234
+ "format": "raw-shard",
235
+ "nbytes": 42467328,
236
+ "records": [
237
+ {
238
+ "name": "model.layers.10.mlp.down_proj.weight",
239
+ "shape": [
240
+ 2304,
241
+ 9216
242
+ ],
243
+ "dtype": "float16",
244
+ "format": "f32-to-bf16",
245
+ "nbytes": 42467328,
246
+ "byteOffset": 0
247
+ }
248
+ ],
249
+ "md5sum": "787dba4e17c4fd4357f74e69460c84d8"
250
+ },
251
+ {
252
+ "dataPath": "params_shard_8.bin",
253
+ "format": "raw-shard",
254
+ "nbytes": 84934656,
255
+ "records": [
256
+ {
257
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
258
+ "shape": [
259
+ 18432,
260
+ 2304
261
+ ],
262
+ "dtype": "float16",
263
+ "format": "f32-to-bf16",
264
+ "nbytes": 84934656,
265
+ "byteOffset": 0
266
+ }
267
+ ],
268
+ "md5sum": "9bee35a6635351a555004148477e8249"
269
+ },
270
+ {
271
+ "dataPath": "params_shard_9.bin",
272
+ "format": "raw-shard",
273
+ "nbytes": 28329984,
274
+ "records": [
275
+ {
276
+ "name": "model.layers.1.self_attn.o_proj.weight",
277
+ "shape": [
278
+ 2304,
279
+ 2048
280
+ ],
281
+ "dtype": "float16",
282
+ "format": "f32-to-bf16",
283
+ "nbytes": 9437184,
284
+ "byteOffset": 0
285
+ },
286
+ {
287
+ "name": "model.layers.10.input_layernorm.weight",
288
+ "shape": [
289
+ 2304
290
+ ],
291
+ "dtype": "float16",
292
+ "format": "f32-to-bf16",
293
+ "nbytes": 4608,
294
+ "byteOffset": 9437184
295
+ },
296
+ {
297
+ "name": "model.layers.10.post_attention_layernorm.weight",
298
+ "shape": [
299
+ 2304
300
+ ],
301
+ "dtype": "float16",
302
+ "format": "f32-to-bf16",
303
+ "nbytes": 4608,
304
+ "byteOffset": 9441792
305
+ },
306
+ {
307
+ "name": "model.layers.10.post_feedforward_layernorm.weight",
308
+ "shape": [
309
+ 2304
310
+ ],
311
+ "dtype": "float16",
312
+ "format": "f32-to-bf16",
313
+ "nbytes": 4608,
314
+ "byteOffset": 9446400
315
+ },
316
+ {
317
+ "name": "model.layers.10.pre_feedforward_layernorm.weight",
318
+ "shape": [
319
+ 2304
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 4608,
324
+ "byteOffset": 9451008
325
+ },
326
+ {
327
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
328
+ "shape": [
329
+ 4096,
330
+ 2304
331
+ ],
332
+ "dtype": "float16",
333
+ "format": "f32-to-bf16",
334
+ "nbytes": 18874368,
335
+ "byteOffset": 9455616
336
+ }
337
+ ],
338
+ "md5sum": "730e18cf91d7e7c6e816f531e1cfeff0"
339
+ },
340
+ {
341
+ "dataPath": "params_shard_10.bin",
342
+ "format": "raw-shard",
343
+ "nbytes": 42467328,
344
+ "records": [
345
+ {
346
+ "name": "model.layers.11.mlp.down_proj.weight",
347
+ "shape": [
348
+ 2304,
349
+ 9216
350
+ ],
351
+ "dtype": "float16",
352
+ "format": "f32-to-bf16",
353
+ "nbytes": 42467328,
354
+ "byteOffset": 0
355
+ }
356
+ ],
357
+ "md5sum": "746c0be2b6d8d216f1517570a9efb57c"
358
+ },
359
+ {
360
+ "dataPath": "params_shard_11.bin",
361
+ "format": "raw-shard",
362
+ "nbytes": 84934656,
363
+ "records": [
364
+ {
365
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
366
+ "shape": [
367
+ 18432,
368
+ 2304
369
+ ],
370
+ "dtype": "float16",
371
+ "format": "f32-to-bf16",
372
+ "nbytes": 84934656,
373
+ "byteOffset": 0
374
+ }
375
+ ],
376
+ "md5sum": "eff43f58197bd73031284872a1dd7d43"
377
+ },
378
+ {
379
+ "dataPath": "params_shard_12.bin",
380
+ "format": "raw-shard",
381
+ "nbytes": 28329984,
382
+ "records": [
383
+ {
384
+ "name": "model.layers.10.self_attn.o_proj.weight",
385
+ "shape": [
386
+ 2304,
387
+ 2048
388
+ ],
389
+ "dtype": "float16",
390
+ "format": "f32-to-bf16",
391
+ "nbytes": 9437184,
392
+ "byteOffset": 0
393
+ },
394
+ {
395
+ "name": "model.layers.11.input_layernorm.weight",
396
+ "shape": [
397
+ 2304
398
+ ],
399
+ "dtype": "float16",
400
+ "format": "f32-to-bf16",
401
+ "nbytes": 4608,
402
+ "byteOffset": 9437184
403
+ },
404
+ {
405
+ "name": "model.layers.11.post_attention_layernorm.weight",
406
+ "shape": [
407
+ 2304
408
+ ],
409
+ "dtype": "float16",
410
+ "format": "f32-to-bf16",
411
+ "nbytes": 4608,
412
+ "byteOffset": 9441792
413
+ },
414
+ {
415
+ "name": "model.layers.11.post_feedforward_layernorm.weight",
416
+ "shape": [
417
+ 2304
418
+ ],
419
+ "dtype": "float16",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 4608,
422
+ "byteOffset": 9446400
423
+ },
424
+ {
425
+ "name": "model.layers.11.pre_feedforward_layernorm.weight",
426
+ "shape": [
427
+ 2304
428
+ ],
429
+ "dtype": "float16",
430
+ "format": "f32-to-bf16",
431
+ "nbytes": 4608,
432
+ "byteOffset": 9451008
433
+ },
434
+ {
435
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
436
+ "shape": [
437
+ 4096,
438
+ 2304
439
+ ],
440
+ "dtype": "float16",
441
+ "format": "f32-to-bf16",
442
+ "nbytes": 18874368,
443
+ "byteOffset": 9455616
444
+ }
445
+ ],
446
+ "md5sum": "ad9b697ac38b21dd5d2d094049c4bbf9"
447
+ },
448
+ {
449
+ "dataPath": "params_shard_13.bin",
450
+ "format": "raw-shard",
451
+ "nbytes": 42467328,
452
+ "records": [
453
+ {
454
+ "name": "model.layers.12.mlp.down_proj.weight",
455
+ "shape": [
456
+ 2304,
457
+ 9216
458
+ ],
459
+ "dtype": "float16",
460
+ "format": "f32-to-bf16",
461
+ "nbytes": 42467328,
462
+ "byteOffset": 0
463
+ }
464
+ ],
465
+ "md5sum": "78594a4d8730c0de2807d79fb405e372"
466
+ },
467
+ {
468
+ "dataPath": "params_shard_14.bin",
469
+ "format": "raw-shard",
470
+ "nbytes": 84934656,
471
+ "records": [
472
+ {
473
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
474
+ "shape": [
475
+ 18432,
476
+ 2304
477
+ ],
478
+ "dtype": "float16",
479
+ "format": "f32-to-bf16",
480
+ "nbytes": 84934656,
481
+ "byteOffset": 0
482
+ }
483
+ ],
484
+ "md5sum": "71d536b6a9847882d6a817079cf5716b"
485
+ },
486
+ {
487
+ "dataPath": "params_shard_15.bin",
488
+ "format": "raw-shard",
489
+ "nbytes": 28329984,
490
+ "records": [
491
+ {
492
+ "name": "model.layers.11.self_attn.o_proj.weight",
493
+ "shape": [
494
+ 2304,
495
+ 2048
496
+ ],
497
+ "dtype": "float16",
498
+ "format": "f32-to-bf16",
499
+ "nbytes": 9437184,
500
+ "byteOffset": 0
501
+ },
502
+ {
503
+ "name": "model.layers.12.input_layernorm.weight",
504
+ "shape": [
505
+ 2304
506
+ ],
507
+ "dtype": "float16",
508
+ "format": "f32-to-bf16",
509
+ "nbytes": 4608,
510
+ "byteOffset": 9437184
511
+ },
512
+ {
513
+ "name": "model.layers.12.post_attention_layernorm.weight",
514
+ "shape": [
515
+ 2304
516
+ ],
517
+ "dtype": "float16",
518
+ "format": "f32-to-bf16",
519
+ "nbytes": 4608,
520
+ "byteOffset": 9441792
521
+ },
522
+ {
523
+ "name": "model.layers.12.post_feedforward_layernorm.weight",
524
+ "shape": [
525
+ 2304
526
+ ],
527
+ "dtype": "float16",
528
+ "format": "f32-to-bf16",
529
+ "nbytes": 4608,
530
+ "byteOffset": 9446400
531
+ },
532
+ {
533
+ "name": "model.layers.12.pre_feedforward_layernorm.weight",
534
+ "shape": [
535
+ 2304
536
+ ],
537
+ "dtype": "float16",
538
+ "format": "f32-to-bf16",
539
+ "nbytes": 4608,
540
+ "byteOffset": 9451008
541
+ },
542
+ {
543
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
544
+ "shape": [
545
+ 4096,
546
+ 2304
547
+ ],
548
+ "dtype": "float16",
549
+ "format": "f32-to-bf16",
550
+ "nbytes": 18874368,
551
+ "byteOffset": 9455616
552
+ }
553
+ ],
554
+ "md5sum": "11cba9275957c38808a9b64c73b91173"
555
+ },
556
+ {
557
+ "dataPath": "params_shard_16.bin",
558
+ "format": "raw-shard",
559
+ "nbytes": 42467328,
560
+ "records": [
561
+ {
562
+ "name": "model.layers.13.mlp.down_proj.weight",
563
+ "shape": [
564
+ 2304,
565
+ 9216
566
+ ],
567
+ "dtype": "float16",
568
+ "format": "f32-to-bf16",
569
+ "nbytes": 42467328,
570
+ "byteOffset": 0
571
+ }
572
+ ],
573
+ "md5sum": "0c38d06e0b70989cc822af58a3df50bb"
574
+ },
575
+ {
576
+ "dataPath": "params_shard_17.bin",
577
+ "format": "raw-shard",
578
+ "nbytes": 84934656,
579
+ "records": [
580
+ {
581
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
582
+ "shape": [
583
+ 18432,
584
+ 2304
585
+ ],
586
+ "dtype": "float16",
587
+ "format": "f32-to-bf16",
588
+ "nbytes": 84934656,
589
+ "byteOffset": 0
590
+ }
591
+ ],
592
+ "md5sum": "cb315422a8dfee20be4509c388d28df5"
593
+ },
594
+ {
595
+ "dataPath": "params_shard_18.bin",
596
+ "format": "raw-shard",
597
+ "nbytes": 28329984,
598
+ "records": [
599
+ {
600
+ "name": "model.layers.12.self_attn.o_proj.weight",
601
+ "shape": [
602
+ 2304,
603
+ 2048
604
+ ],
605
+ "dtype": "float16",
606
+ "format": "f32-to-bf16",
607
+ "nbytes": 9437184,
608
+ "byteOffset": 0
609
+ },
610
+ {
611
+ "name": "model.layers.13.input_layernorm.weight",
612
+ "shape": [
613
+ 2304
614
+ ],
615
+ "dtype": "float16",
616
+ "format": "f32-to-bf16",
617
+ "nbytes": 4608,
618
+ "byteOffset": 9437184
619
+ },
620
+ {
621
+ "name": "model.layers.13.post_attention_layernorm.weight",
622
+ "shape": [
623
+ 2304
624
+ ],
625
+ "dtype": "float16",
626
+ "format": "f32-to-bf16",
627
+ "nbytes": 4608,
628
+ "byteOffset": 9441792
629
+ },
630
+ {
631
+ "name": "model.layers.13.post_feedforward_layernorm.weight",
632
+ "shape": [
633
+ 2304
634
+ ],
635
+ "dtype": "float16",
636
+ "format": "f32-to-bf16",
637
+ "nbytes": 4608,
638
+ "byteOffset": 9446400
639
+ },
640
+ {
641
+ "name": "model.layers.13.pre_feedforward_layernorm.weight",
642
+ "shape": [
643
+ 2304
644
+ ],
645
+ "dtype": "float16",
646
+ "format": "f32-to-bf16",
647
+ "nbytes": 4608,
648
+ "byteOffset": 9451008
649
+ },
650
+ {
651
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
652
+ "shape": [
653
+ 4096,
654
+ 2304
655
+ ],
656
+ "dtype": "float16",
657
+ "format": "f32-to-bf16",
658
+ "nbytes": 18874368,
659
+ "byteOffset": 9455616
660
+ }
661
+ ],
662
+ "md5sum": "530323712e97e229aaa29629e8547567"
663
+ },
664
+ {
665
+ "dataPath": "params_shard_19.bin",
666
+ "format": "raw-shard",
667
+ "nbytes": 42467328,
668
+ "records": [
669
+ {
670
+ "name": "model.layers.14.mlp.down_proj.weight",
671
+ "shape": [
672
+ 2304,
673
+ 9216
674
+ ],
675
+ "dtype": "float16",
676
+ "format": "f32-to-bf16",
677
+ "nbytes": 42467328,
678
+ "byteOffset": 0
679
+ }
680
+ ],
681
+ "md5sum": "6875b5e39164f5eb0a3bead2f713e01b"
682
+ },
683
+ {
684
+ "dataPath": "params_shard_20.bin",
685
+ "format": "raw-shard",
686
+ "nbytes": 84934656,
687
+ "records": [
688
+ {
689
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
690
+ "shape": [
691
+ 18432,
692
+ 2304
693
+ ],
694
+ "dtype": "float16",
695
+ "format": "f32-to-bf16",
696
+ "nbytes": 84934656,
697
+ "byteOffset": 0
698
+ }
699
+ ],
700
+ "md5sum": "4cbb3e84e9b7d6be729bc2cb34e38827"
701
+ },
702
+ {
703
+ "dataPath": "params_shard_21.bin",
704
+ "format": "raw-shard",
705
+ "nbytes": 28329984,
706
+ "records": [
707
+ {
708
+ "name": "model.layers.13.self_attn.o_proj.weight",
709
+ "shape": [
710
+ 2304,
711
+ 2048
712
+ ],
713
+ "dtype": "float16",
714
+ "format": "f32-to-bf16",
715
+ "nbytes": 9437184,
716
+ "byteOffset": 0
717
+ },
718
+ {
719
+ "name": "model.layers.14.input_layernorm.weight",
720
+ "shape": [
721
+ 2304
722
+ ],
723
+ "dtype": "float16",
724
+ "format": "f32-to-bf16",
725
+ "nbytes": 4608,
726
+ "byteOffset": 9437184
727
+ },
728
+ {
729
+ "name": "model.layers.14.post_attention_layernorm.weight",
730
+ "shape": [
731
+ 2304
732
+ ],
733
+ "dtype": "float16",
734
+ "format": "f32-to-bf16",
735
+ "nbytes": 4608,
736
+ "byteOffset": 9441792
737
+ },
738
+ {
739
+ "name": "model.layers.14.post_feedforward_layernorm.weight",
740
+ "shape": [
741
+ 2304
742
+ ],
743
+ "dtype": "float16",
744
+ "format": "f32-to-bf16",
745
+ "nbytes": 4608,
746
+ "byteOffset": 9446400
747
+ },
748
+ {
749
+ "name": "model.layers.14.pre_feedforward_layernorm.weight",
750
+ "shape": [
751
+ 2304
752
+ ],
753
+ "dtype": "float16",
754
+ "format": "f32-to-bf16",
755
+ "nbytes": 4608,
756
+ "byteOffset": 9451008
757
+ },
758
+ {
759
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
760
+ "shape": [
761
+ 4096,
762
+ 2304
763
+ ],
764
+ "dtype": "float16",
765
+ "format": "f32-to-bf16",
766
+ "nbytes": 18874368,
767
+ "byteOffset": 9455616
768
+ }
769
+ ],
770
+ "md5sum": "69932e69b4d572540147fb8d8daa52ea"
771
+ },
772
+ {
773
+ "dataPath": "params_shard_22.bin",
774
+ "format": "raw-shard",
775
+ "nbytes": 42467328,
776
+ "records": [
777
+ {
778
+ "name": "model.layers.15.mlp.down_proj.weight",
779
+ "shape": [
780
+ 2304,
781
+ 9216
782
+ ],
783
+ "dtype": "float16",
784
+ "format": "f32-to-bf16",
785
+ "nbytes": 42467328,
786
+ "byteOffset": 0
787
+ }
788
+ ],
789
+ "md5sum": "ab6f22a124667f8c39b09023ebee129e"
790
+ },
791
+ {
792
+ "dataPath": "params_shard_23.bin",
793
+ "format": "raw-shard",
794
+ "nbytes": 84934656,
795
+ "records": [
796
+ {
797
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
798
+ "shape": [
799
+ 18432,
800
+ 2304
801
+ ],
802
+ "dtype": "float16",
803
+ "format": "f32-to-bf16",
804
+ "nbytes": 84934656,
805
+ "byteOffset": 0
806
+ }
807
+ ],
808
+ "md5sum": "97d2d3288acada5a59e0e8796bedf659"
809
+ },
810
+ {
811
+ "dataPath": "params_shard_24.bin",
812
+ "format": "raw-shard",
813
+ "nbytes": 28329984,
814
+ "records": [
815
+ {
816
+ "name": "model.layers.14.self_attn.o_proj.weight",
817
+ "shape": [
818
+ 2304,
819
+ 2048
820
+ ],
821
+ "dtype": "float16",
822
+ "format": "f32-to-bf16",
823
+ "nbytes": 9437184,
824
+ "byteOffset": 0
825
+ },
826
+ {
827
+ "name": "model.layers.15.input_layernorm.weight",
828
+ "shape": [
829
+ 2304
830
+ ],
831
+ "dtype": "float16",
832
+ "format": "f32-to-bf16",
833
+ "nbytes": 4608,
834
+ "byteOffset": 9437184
835
+ },
836
+ {
837
+ "name": "model.layers.15.post_attention_layernorm.weight",
838
+ "shape": [
839
+ 2304
840
+ ],
841
+ "dtype": "float16",
842
+ "format": "f32-to-bf16",
843
+ "nbytes": 4608,
844
+ "byteOffset": 9441792
845
+ },
846
+ {
847
+ "name": "model.layers.15.post_feedforward_layernorm.weight",
848
+ "shape": [
849
+ 2304
850
+ ],
851
+ "dtype": "float16",
852
+ "format": "f32-to-bf16",
853
+ "nbytes": 4608,
854
+ "byteOffset": 9446400
855
+ },
856
+ {
857
+ "name": "model.layers.15.pre_feedforward_layernorm.weight",
858
+ "shape": [
859
+ 2304
860
+ ],
861
+ "dtype": "float16",
862
+ "format": "f32-to-bf16",
863
+ "nbytes": 4608,
864
+ "byteOffset": 9451008
865
+ },
866
+ {
867
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
868
+ "shape": [
869
+ 4096,
870
+ 2304
871
+ ],
872
+ "dtype": "float16",
873
+ "format": "f32-to-bf16",
874
+ "nbytes": 18874368,
875
+ "byteOffset": 9455616
876
+ }
877
+ ],
878
+ "md5sum": "fa7092621e9e1bc30c07c2e2d84c4d2d"
879
+ },
880
+ {
881
+ "dataPath": "params_shard_25.bin",
882
+ "format": "raw-shard",
883
+ "nbytes": 42467328,
884
+ "records": [
885
+ {
886
+ "name": "model.layers.16.mlp.down_proj.weight",
887
+ "shape": [
888
+ 2304,
889
+ 9216
890
+ ],
891
+ "dtype": "float16",
892
+ "format": "f32-to-bf16",
893
+ "nbytes": 42467328,
894
+ "byteOffset": 0
895
+ }
896
+ ],
897
+ "md5sum": "8ca17fd3bd99a1b2f12030b5839695b4"
898
+ },
899
+ {
900
+ "dataPath": "params_shard_26.bin",
901
+ "format": "raw-shard",
902
+ "nbytes": 84934656,
903
+ "records": [
904
+ {
905
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
906
+ "shape": [
907
+ 18432,
908
+ 2304
909
+ ],
910
+ "dtype": "float16",
911
+ "format": "f32-to-bf16",
912
+ "nbytes": 84934656,
913
+ "byteOffset": 0
914
+ }
915
+ ],
916
+ "md5sum": "79d58166414da574f561ba51e86f2fd7"
917
+ },
918
+ {
919
+ "dataPath": "params_shard_27.bin",
920
+ "format": "raw-shard",
921
+ "nbytes": 28329984,
922
+ "records": [
923
+ {
924
+ "name": "model.layers.15.self_attn.o_proj.weight",
925
+ "shape": [
926
+ 2304,
927
+ 2048
928
+ ],
929
+ "dtype": "float16",
930
+ "format": "f32-to-bf16",
931
+ "nbytes": 9437184,
932
+ "byteOffset": 0
933
+ },
934
+ {
935
+ "name": "model.layers.16.input_layernorm.weight",
936
+ "shape": [
937
+ 2304
938
+ ],
939
+ "dtype": "float16",
940
+ "format": "f32-to-bf16",
941
+ "nbytes": 4608,
942
+ "byteOffset": 9437184
943
+ },
944
+ {
945
+ "name": "model.layers.16.post_attention_layernorm.weight",
946
+ "shape": [
947
+ 2304
948
+ ],
949
+ "dtype": "float16",
950
+ "format": "f32-to-bf16",
951
+ "nbytes": 4608,
952
+ "byteOffset": 9441792
953
+ },
954
+ {
955
+ "name": "model.layers.16.post_feedforward_layernorm.weight",
956
+ "shape": [
957
+ 2304
958
+ ],
959
+ "dtype": "float16",
960
+ "format": "f32-to-bf16",
961
+ "nbytes": 4608,
962
+ "byteOffset": 9446400
963
+ },
964
+ {
965
+ "name": "model.layers.16.pre_feedforward_layernorm.weight",
966
+ "shape": [
967
+ 2304
968
+ ],
969
+ "dtype": "float16",
970
+ "format": "f32-to-bf16",
971
+ "nbytes": 4608,
972
+ "byteOffset": 9451008
973
+ },
974
+ {
975
+ "name": "model.layers.16.self_attn.qkv_proj.weight",
976
+ "shape": [
977
+ 4096,
978
+ 2304
979
+ ],
980
+ "dtype": "float16",
981
+ "format": "f32-to-bf16",
982
+ "nbytes": 18874368,
983
+ "byteOffset": 9455616
984
+ }
985
+ ],
986
+ "md5sum": "7e13eeae220123e0c1f3e9d843255485"
987
+ },
988
+ {
989
+ "dataPath": "params_shard_28.bin",
990
+ "format": "raw-shard",
991
+ "nbytes": 42467328,
992
+ "records": [
993
+ {
994
+ "name": "model.layers.17.mlp.down_proj.weight",
995
+ "shape": [
996
+ 2304,
997
+ 9216
998
+ ],
999
+ "dtype": "float16",
1000
+ "format": "f32-to-bf16",
1001
+ "nbytes": 42467328,
1002
+ "byteOffset": 0
1003
+ }
1004
+ ],
1005
+ "md5sum": "22e316ebb372e29f220796afdcd2ee90"
1006
+ },
1007
+ {
1008
+ "dataPath": "params_shard_29.bin",
1009
+ "format": "raw-shard",
1010
+ "nbytes": 84934656,
1011
+ "records": [
1012
+ {
1013
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
1014
+ "shape": [
1015
+ 18432,
1016
+ 2304
1017
+ ],
1018
+ "dtype": "float16",
1019
+ "format": "f32-to-bf16",
1020
+ "nbytes": 84934656,
1021
+ "byteOffset": 0
1022
+ }
1023
+ ],
1024
+ "md5sum": "656dc7f41628463c8bfae102a733637b"
1025
+ },
1026
+ {
1027
+ "dataPath": "params_shard_30.bin",
1028
+ "format": "raw-shard",
1029
+ "nbytes": 28329984,
1030
+ "records": [
1031
+ {
1032
+ "name": "model.layers.16.self_attn.o_proj.weight",
1033
+ "shape": [
1034
+ 2304,
1035
+ 2048
1036
+ ],
1037
+ "dtype": "float16",
1038
+ "format": "f32-to-bf16",
1039
+ "nbytes": 9437184,
1040
+ "byteOffset": 0
1041
+ },
1042
+ {
1043
+ "name": "model.layers.17.input_layernorm.weight",
1044
+ "shape": [
1045
+ 2304
1046
+ ],
1047
+ "dtype": "float16",
1048
+ "format": "f32-to-bf16",
1049
+ "nbytes": 4608,
1050
+ "byteOffset": 9437184
1051
+ },
1052
+ {
1053
+ "name": "model.layers.17.post_attention_layernorm.weight",
1054
+ "shape": [
1055
+ 2304
1056
+ ],
1057
+ "dtype": "float16",
1058
+ "format": "f32-to-bf16",
1059
+ "nbytes": 4608,
1060
+ "byteOffset": 9441792
1061
+ },
1062
+ {
1063
+ "name": "model.layers.17.post_feedforward_layernorm.weight",
1064
+ "shape": [
1065
+ 2304
1066
+ ],
1067
+ "dtype": "float16",
1068
+ "format": "f32-to-bf16",
1069
+ "nbytes": 4608,
1070
+ "byteOffset": 9446400
1071
+ },
1072
+ {
1073
+ "name": "model.layers.17.pre_feedforward_layernorm.weight",
1074
+ "shape": [
1075
+ 2304
1076
+ ],
1077
+ "dtype": "float16",
1078
+ "format": "f32-to-bf16",
1079
+ "nbytes": 4608,
1080
+ "byteOffset": 9451008
1081
+ },
1082
+ {
1083
+ "name": "model.layers.17.self_attn.qkv_proj.weight",
1084
+ "shape": [
1085
+ 4096,
1086
+ 2304
1087
+ ],
1088
+ "dtype": "float16",
1089
+ "format": "f32-to-bf16",
1090
+ "nbytes": 18874368,
1091
+ "byteOffset": 9455616
1092
+ }
1093
+ ],
1094
+ "md5sum": "7717c1fb244f18ff11567ce399f43672"
1095
+ },
1096
+ {
1097
+ "dataPath": "params_shard_31.bin",
1098
+ "format": "raw-shard",
1099
+ "nbytes": 42467328,
1100
+ "records": [
1101
+ {
1102
+ "name": "model.layers.18.mlp.down_proj.weight",
1103
+ "shape": [
1104
+ 2304,
1105
+ 9216
1106
+ ],
1107
+ "dtype": "float16",
1108
+ "format": "f32-to-bf16",
1109
+ "nbytes": 42467328,
1110
+ "byteOffset": 0
1111
+ }
1112
+ ],
1113
+ "md5sum": "e689ecedbbd305c990a890ba7a512f3f"
1114
+ },
1115
+ {
1116
+ "dataPath": "params_shard_32.bin",
1117
+ "format": "raw-shard",
1118
+ "nbytes": 84934656,
1119
+ "records": [
1120
+ {
1121
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
1122
+ "shape": [
1123
+ 18432,
1124
+ 2304
1125
+ ],
1126
+ "dtype": "float16",
1127
+ "format": "f32-to-bf16",
1128
+ "nbytes": 84934656,
1129
+ "byteOffset": 0
1130
+ }
1131
+ ],
1132
+ "md5sum": "d3b3716b7ebcf5cba77aab9b2626a8c5"
1133
+ },
1134
+ {
1135
+ "dataPath": "params_shard_33.bin",
1136
+ "format": "raw-shard",
1137
+ "nbytes": 28329984,
1138
+ "records": [
1139
+ {
1140
+ "name": "model.layers.17.self_attn.o_proj.weight",
1141
+ "shape": [
1142
+ 2304,
1143
+ 2048
1144
+ ],
1145
+ "dtype": "float16",
1146
+ "format": "f32-to-bf16",
1147
+ "nbytes": 9437184,
1148
+ "byteOffset": 0
1149
+ },
1150
+ {
1151
+ "name": "model.layers.18.input_layernorm.weight",
1152
+ "shape": [
1153
+ 2304
1154
+ ],
1155
+ "dtype": "float16",
1156
+ "format": "f32-to-bf16",
1157
+ "nbytes": 4608,
1158
+ "byteOffset": 9437184
1159
+ },
1160
+ {
1161
+ "name": "model.layers.18.post_attention_layernorm.weight",
1162
+ "shape": [
1163
+ 2304
1164
+ ],
1165
+ "dtype": "float16",
1166
+ "format": "f32-to-bf16",
1167
+ "nbytes": 4608,
1168
+ "byteOffset": 9441792
1169
+ },
1170
+ {
1171
+ "name": "model.layers.18.post_feedforward_layernorm.weight",
1172
+ "shape": [
1173
+ 2304
1174
+ ],
1175
+ "dtype": "float16",
1176
+ "format": "f32-to-bf16",
1177
+ "nbytes": 4608,
1178
+ "byteOffset": 9446400
1179
+ },
1180
+ {
1181
+ "name": "model.layers.18.pre_feedforward_layernorm.weight",
1182
+ "shape": [
1183
+ 2304
1184
+ ],
1185
+ "dtype": "float16",
1186
+ "format": "f32-to-bf16",
1187
+ "nbytes": 4608,
1188
+ "byteOffset": 9451008
1189
+ },
1190
+ {
1191
+ "name": "model.layers.18.self_attn.qkv_proj.weight",
1192
+ "shape": [
1193
+ 4096,
1194
+ 2304
1195
+ ],
1196
+ "dtype": "float16",
1197
+ "format": "f32-to-bf16",
1198
+ "nbytes": 18874368,
1199
+ "byteOffset": 9455616
1200
+ }
1201
+ ],
1202
+ "md5sum": "841c546d5ae8142476b339d199bb3b8f"
1203
+ },
1204
+ {
1205
+ "dataPath": "params_shard_34.bin",
1206
+ "format": "raw-shard",
1207
+ "nbytes": 42467328,
1208
+ "records": [
1209
+ {
1210
+ "name": "model.layers.19.mlp.down_proj.weight",
1211
+ "shape": [
1212
+ 2304,
1213
+ 9216
1214
+ ],
1215
+ "dtype": "float16",
1216
+ "format": "f32-to-bf16",
1217
+ "nbytes": 42467328,
1218
+ "byteOffset": 0
1219
+ }
1220
+ ],
1221
+ "md5sum": "726fd1886383ab5cf39ad8eed73c7877"
1222
+ },
1223
+ {
1224
+ "dataPath": "params_shard_35.bin",
1225
+ "format": "raw-shard",
1226
+ "nbytes": 84934656,
1227
+ "records": [
1228
+ {
1229
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
1230
+ "shape": [
1231
+ 18432,
1232
+ 2304
1233
+ ],
1234
+ "dtype": "float16",
1235
+ "format": "f32-to-bf16",
1236
+ "nbytes": 84934656,
1237
+ "byteOffset": 0
1238
+ }
1239
+ ],
1240
+ "md5sum": "f7be1bd0e675520d79c615ccffff940b"
1241
+ },
1242
+ {
1243
+ "dataPath": "params_shard_36.bin",
1244
+ "format": "raw-shard",
1245
+ "nbytes": 28329984,
1246
+ "records": [
1247
+ {
1248
+ "name": "model.layers.18.self_attn.o_proj.weight",
1249
+ "shape": [
1250
+ 2304,
1251
+ 2048
1252
+ ],
1253
+ "dtype": "float16",
1254
+ "format": "f32-to-bf16",
1255
+ "nbytes": 9437184,
1256
+ "byteOffset": 0
1257
+ },
1258
+ {
1259
+ "name": "model.layers.19.input_layernorm.weight",
1260
+ "shape": [
1261
+ 2304
1262
+ ],
1263
+ "dtype": "float16",
1264
+ "format": "f32-to-bf16",
1265
+ "nbytes": 4608,
1266
+ "byteOffset": 9437184
1267
+ },
1268
+ {
1269
+ "name": "model.layers.19.post_attention_layernorm.weight",
1270
+ "shape": [
1271
+ 2304
1272
+ ],
1273
+ "dtype": "float16",
1274
+ "format": "f32-to-bf16",
1275
+ "nbytes": 4608,
1276
+ "byteOffset": 9441792
1277
+ },
1278
+ {
1279
+ "name": "model.layers.19.post_feedforward_layernorm.weight",
1280
+ "shape": [
1281
+ 2304
1282
+ ],
1283
+ "dtype": "float16",
1284
+ "format": "f32-to-bf16",
1285
+ "nbytes": 4608,
1286
+ "byteOffset": 9446400
1287
+ },
1288
+ {
1289
+ "name": "model.layers.19.pre_feedforward_layernorm.weight",
1290
+ "shape": [
1291
+ 2304
1292
+ ],
1293
+ "dtype": "float16",
1294
+ "format": "f32-to-bf16",
1295
+ "nbytes": 4608,
1296
+ "byteOffset": 9451008
1297
+ },
1298
+ {
1299
+ "name": "model.layers.19.self_attn.qkv_proj.weight",
1300
+ "shape": [
1301
+ 4096,
1302
+ 2304
1303
+ ],
1304
+ "dtype": "float16",
1305
+ "format": "f32-to-bf16",
1306
+ "nbytes": 18874368,
1307
+ "byteOffset": 9455616
1308
+ }
1309
+ ],
1310
+ "md5sum": "73c1238e23a0631ea0448ccfa3f0b711"
1311
+ },
1312
+ {
1313
+ "dataPath": "params_shard_37.bin",
1314
+ "format": "raw-shard",
1315
+ "nbytes": 42467328,
1316
+ "records": [
1317
+ {
1318
+ "name": "model.layers.2.mlp.down_proj.weight",
1319
+ "shape": [
1320
+ 2304,
1321
+ 9216
1322
+ ],
1323
+ "dtype": "float16",
1324
+ "format": "f32-to-bf16",
1325
+ "nbytes": 42467328,
1326
+ "byteOffset": 0
1327
+ }
1328
+ ],
1329
+ "md5sum": "87df5c0eba26b631961c2dd5996904ac"
1330
+ },
1331
+ {
1332
+ "dataPath": "params_shard_38.bin",
1333
+ "format": "raw-shard",
1334
+ "nbytes": 84934656,
1335
+ "records": [
1336
+ {
1337
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
1338
+ "shape": [
1339
+ 18432,
1340
+ 2304
1341
+ ],
1342
+ "dtype": "float16",
1343
+ "format": "f32-to-bf16",
1344
+ "nbytes": 84934656,
1345
+ "byteOffset": 0
1346
+ }
1347
+ ],
1348
+ "md5sum": "7abb06539525d99d286f23f2510a6a15"
1349
+ },
1350
+ {
1351
+ "dataPath": "params_shard_39.bin",
1352
+ "format": "raw-shard",
1353
+ "nbytes": 28329984,
1354
+ "records": [
1355
+ {
1356
+ "name": "model.layers.19.self_attn.o_proj.weight",
1357
+ "shape": [
1358
+ 2304,
1359
+ 2048
1360
+ ],
1361
+ "dtype": "float16",
1362
+ "format": "f32-to-bf16",
1363
+ "nbytes": 9437184,
1364
+ "byteOffset": 0
1365
+ },
1366
+ {
1367
+ "name": "model.layers.2.input_layernorm.weight",
1368
+ "shape": [
1369
+ 2304
1370
+ ],
1371
+ "dtype": "float16",
1372
+ "format": "f32-to-bf16",
1373
+ "nbytes": 4608,
1374
+ "byteOffset": 9437184
1375
+ },
1376
+ {
1377
+ "name": "model.layers.2.post_attention_layernorm.weight",
1378
+ "shape": [
1379
+ 2304
1380
+ ],
1381
+ "dtype": "float16",
1382
+ "format": "f32-to-bf16",
1383
+ "nbytes": 4608,
1384
+ "byteOffset": 9441792
1385
+ },
1386
+ {
1387
+ "name": "model.layers.2.post_feedforward_layernorm.weight",
1388
+ "shape": [
1389
+ 2304
1390
+ ],
1391
+ "dtype": "float16",
1392
+ "format": "f32-to-bf16",
1393
+ "nbytes": 4608,
1394
+ "byteOffset": 9446400
1395
+ },
1396
+ {
1397
+ "name": "model.layers.2.pre_feedforward_layernorm.weight",
1398
+ "shape": [
1399
+ 2304
1400
+ ],
1401
+ "dtype": "float16",
1402
+ "format": "f32-to-bf16",
1403
+ "nbytes": 4608,
1404
+ "byteOffset": 9451008
1405
+ },
1406
+ {
1407
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
1408
+ "shape": [
1409
+ 4096,
1410
+ 2304
1411
+ ],
1412
+ "dtype": "float16",
1413
+ "format": "f32-to-bf16",
1414
+ "nbytes": 18874368,
1415
+ "byteOffset": 9455616
1416
+ }
1417
+ ],
1418
+ "md5sum": "8b3f6503f497847b70bef7b8fdcec329"
1419
+ },
1420
+ {
1421
+ "dataPath": "params_shard_40.bin",
1422
+ "format": "raw-shard",
1423
+ "nbytes": 42467328,
1424
+ "records": [
1425
+ {
1426
+ "name": "model.layers.20.mlp.down_proj.weight",
1427
+ "shape": [
1428
+ 2304,
1429
+ 9216
1430
+ ],
1431
+ "dtype": "float16",
1432
+ "format": "f32-to-bf16",
1433
+ "nbytes": 42467328,
1434
+ "byteOffset": 0
1435
+ }
1436
+ ],
1437
+ "md5sum": "8a6d2c22089f5697a6cf7ceac805ecb9"
1438
+ },
1439
+ {
1440
+ "dataPath": "params_shard_41.bin",
1441
+ "format": "raw-shard",
1442
+ "nbytes": 84934656,
1443
+ "records": [
1444
+ {
1445
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1446
+ "shape": [
1447
+ 18432,
1448
+ 2304
1449
+ ],
1450
+ "dtype": "float16",
1451
+ "format": "f32-to-bf16",
1452
+ "nbytes": 84934656,
1453
+ "byteOffset": 0
1454
+ }
1455
+ ],
1456
+ "md5sum": "76ab3741b10d638ffb2cbd0385f5c726"
1457
+ },
1458
+ {
1459
+ "dataPath": "params_shard_42.bin",
1460
+ "format": "raw-shard",
1461
+ "nbytes": 28329984,
1462
+ "records": [
1463
+ {
1464
+ "name": "model.layers.2.self_attn.o_proj.weight",
1465
+ "shape": [
1466
+ 2304,
1467
+ 2048
1468
+ ],
1469
+ "dtype": "float16",
1470
+ "format": "f32-to-bf16",
1471
+ "nbytes": 9437184,
1472
+ "byteOffset": 0
1473
+ },
1474
+ {
1475
+ "name": "model.layers.20.input_layernorm.weight",
1476
+ "shape": [
1477
+ 2304
1478
+ ],
1479
+ "dtype": "float16",
1480
+ "format": "f32-to-bf16",
1481
+ "nbytes": 4608,
1482
+ "byteOffset": 9437184
1483
+ },
1484
+ {
1485
+ "name": "model.layers.20.post_attention_layernorm.weight",
1486
+ "shape": [
1487
+ 2304
1488
+ ],
1489
+ "dtype": "float16",
1490
+ "format": "f32-to-bf16",
1491
+ "nbytes": 4608,
1492
+ "byteOffset": 9441792
1493
+ },
1494
+ {
1495
+ "name": "model.layers.20.post_feedforward_layernorm.weight",
1496
+ "shape": [
1497
+ 2304
1498
+ ],
1499
+ "dtype": "float16",
1500
+ "format": "f32-to-bf16",
1501
+ "nbytes": 4608,
1502
+ "byteOffset": 9446400
1503
+ },
1504
+ {
1505
+ "name": "model.layers.20.pre_feedforward_layernorm.weight",
1506
+ "shape": [
1507
+ 2304
1508
+ ],
1509
+ "dtype": "float16",
1510
+ "format": "f32-to-bf16",
1511
+ "nbytes": 4608,
1512
+ "byteOffset": 9451008
1513
+ },
1514
+ {
1515
+ "name": "model.layers.20.self_attn.qkv_proj.weight",
1516
+ "shape": [
1517
+ 4096,
1518
+ 2304
1519
+ ],
1520
+ "dtype": "float16",
1521
+ "format": "f32-to-bf16",
1522
+ "nbytes": 18874368,
1523
+ "byteOffset": 9455616
1524
+ }
1525
+ ],
1526
+ "md5sum": "be881a12c18c80ccf6543bc1abe3aa0e"
1527
+ },
1528
+ {
1529
+ "dataPath": "params_shard_43.bin",
1530
+ "format": "raw-shard",
1531
+ "nbytes": 42467328,
1532
+ "records": [
1533
+ {
1534
+ "name": "model.layers.21.mlp.down_proj.weight",
1535
+ "shape": [
1536
+ 2304,
1537
+ 9216
1538
+ ],
1539
+ "dtype": "float16",
1540
+ "format": "f32-to-bf16",
1541
+ "nbytes": 42467328,
1542
+ "byteOffset": 0
1543
+ }
1544
+ ],
1545
+ "md5sum": "8719df36a1f011f693a6f34addd7cdad"
1546
+ },
1547
+ {
1548
+ "dataPath": "params_shard_44.bin",
1549
+ "format": "raw-shard",
1550
+ "nbytes": 84934656,
1551
+ "records": [
1552
+ {
1553
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1554
+ "shape": [
1555
+ 18432,
1556
+ 2304
1557
+ ],
1558
+ "dtype": "float16",
1559
+ "format": "f32-to-bf16",
1560
+ "nbytes": 84934656,
1561
+ "byteOffset": 0
1562
+ }
1563
+ ],
1564
+ "md5sum": "3c5634e209687d6a6366acc5fe9429f4"
1565
+ },
1566
+ {
1567
+ "dataPath": "params_shard_45.bin",
1568
+ "format": "raw-shard",
1569
+ "nbytes": 28329984,
1570
+ "records": [
1571
+ {
1572
+ "name": "model.layers.20.self_attn.o_proj.weight",
1573
+ "shape": [
1574
+ 2304,
1575
+ 2048
1576
+ ],
1577
+ "dtype": "float16",
1578
+ "format": "f32-to-bf16",
1579
+ "nbytes": 9437184,
1580
+ "byteOffset": 0
1581
+ },
1582
+ {
1583
+ "name": "model.layers.21.input_layernorm.weight",
1584
+ "shape": [
1585
+ 2304
1586
+ ],
1587
+ "dtype": "float16",
1588
+ "format": "f32-to-bf16",
1589
+ "nbytes": 4608,
1590
+ "byteOffset": 9437184
1591
+ },
1592
+ {
1593
+ "name": "model.layers.21.post_attention_layernorm.weight",
1594
+ "shape": [
1595
+ 2304
1596
+ ],
1597
+ "dtype": "float16",
1598
+ "format": "f32-to-bf16",
1599
+ "nbytes": 4608,
1600
+ "byteOffset": 9441792
1601
+ },
1602
+ {
1603
+ "name": "model.layers.21.post_feedforward_layernorm.weight",
1604
+ "shape": [
1605
+ 2304
1606
+ ],
1607
+ "dtype": "float16",
1608
+ "format": "f32-to-bf16",
1609
+ "nbytes": 4608,
1610
+ "byteOffset": 9446400
1611
+ },
1612
+ {
1613
+ "name": "model.layers.21.pre_feedforward_layernorm.weight",
1614
+ "shape": [
1615
+ 2304
1616
+ ],
1617
+ "dtype": "float16",
1618
+ "format": "f32-to-bf16",
1619
+ "nbytes": 4608,
1620
+ "byteOffset": 9451008
1621
+ },
1622
+ {
1623
+ "name": "model.layers.21.self_attn.qkv_proj.weight",
1624
+ "shape": [
1625
+ 4096,
1626
+ 2304
1627
+ ],
1628
+ "dtype": "float16",
1629
+ "format": "f32-to-bf16",
1630
+ "nbytes": 18874368,
1631
+ "byteOffset": 9455616
1632
+ }
1633
+ ],
1634
+ "md5sum": "91877a5e7f27b0fb59bd8d0920a7b4bb"
1635
+ },
1636
+ {
1637
+ "dataPath": "params_shard_46.bin",
1638
+ "format": "raw-shard",
1639
+ "nbytes": 42467328,
1640
+ "records": [
1641
+ {
1642
+ "name": "model.layers.22.mlp.down_proj.weight",
1643
+ "shape": [
1644
+ 2304,
1645
+ 9216
1646
+ ],
1647
+ "dtype": "float16",
1648
+ "format": "f32-to-bf16",
1649
+ "nbytes": 42467328,
1650
+ "byteOffset": 0
1651
+ }
1652
+ ],
1653
+ "md5sum": "9934f4c2b69a4091dc3861bcfb836a2f"
1654
+ },
1655
+ {
1656
+ "dataPath": "params_shard_47.bin",
1657
+ "format": "raw-shard",
1658
+ "nbytes": 84934656,
1659
+ "records": [
1660
+ {
1661
+ "name": "model.layers.22.mlp.gate_up_proj.weight",
1662
+ "shape": [
1663
+ 18432,
1664
+ 2304
1665
+ ],
1666
+ "dtype": "float16",
1667
+ "format": "f32-to-bf16",
1668
+ "nbytes": 84934656,
1669
+ "byteOffset": 0
1670
+ }
1671
+ ],
1672
+ "md5sum": "3ec6e7ec45fa16cf40c46036c1719c1f"
1673
+ },
1674
+ {
1675
+ "dataPath": "params_shard_48.bin",
1676
+ "format": "raw-shard",
1677
+ "nbytes": 28329984,
1678
+ "records": [
1679
+ {
1680
+ "name": "model.layers.21.self_attn.o_proj.weight",
1681
+ "shape": [
1682
+ 2304,
1683
+ 2048
1684
+ ],
1685
+ "dtype": "float16",
1686
+ "format": "f32-to-bf16",
1687
+ "nbytes": 9437184,
1688
+ "byteOffset": 0
1689
+ },
1690
+ {
1691
+ "name": "model.layers.22.input_layernorm.weight",
1692
+ "shape": [
1693
+ 2304
1694
+ ],
1695
+ "dtype": "float16",
1696
+ "format": "f32-to-bf16",
1697
+ "nbytes": 4608,
1698
+ "byteOffset": 9437184
1699
+ },
1700
+ {
1701
+ "name": "model.layers.22.post_attention_layernorm.weight",
1702
+ "shape": [
1703
+ 2304
1704
+ ],
1705
+ "dtype": "float16",
1706
+ "format": "f32-to-bf16",
1707
+ "nbytes": 4608,
1708
+ "byteOffset": 9441792
1709
+ },
1710
+ {
1711
+ "name": "model.layers.22.post_feedforward_layernorm.weight",
1712
+ "shape": [
1713
+ 2304
1714
+ ],
1715
+ "dtype": "float16",
1716
+ "format": "f32-to-bf16",
1717
+ "nbytes": 4608,
1718
+ "byteOffset": 9446400
1719
+ },
1720
+ {
1721
+ "name": "model.layers.22.pre_feedforward_layernorm.weight",
1722
+ "shape": [
1723
+ 2304
1724
+ ],
1725
+ "dtype": "float16",
1726
+ "format": "f32-to-bf16",
1727
+ "nbytes": 4608,
1728
+ "byteOffset": 9451008
1729
+ },
1730
+ {
1731
+ "name": "model.layers.22.self_attn.qkv_proj.weight",
1732
+ "shape": [
1733
+ 4096,
1734
+ 2304
1735
+ ],
1736
+ "dtype": "float16",
1737
+ "format": "f32-to-bf16",
1738
+ "nbytes": 18874368,
1739
+ "byteOffset": 9455616
1740
+ }
1741
+ ],
1742
+ "md5sum": "640babc79070007be0aa91cdd22a80cf"
1743
+ },
1744
+ {
1745
+ "dataPath": "params_shard_49.bin",
1746
+ "format": "raw-shard",
1747
+ "nbytes": 42467328,
1748
+ "records": [
1749
+ {
1750
+ "name": "model.layers.23.mlp.down_proj.weight",
1751
+ "shape": [
1752
+ 2304,
1753
+ 9216
1754
+ ],
1755
+ "dtype": "float16",
1756
+ "format": "f32-to-bf16",
1757
+ "nbytes": 42467328,
1758
+ "byteOffset": 0
1759
+ }
1760
+ ],
1761
+ "md5sum": "446558eee16932ce64b00d7591b2ea79"
1762
+ },
1763
+ {
1764
+ "dataPath": "params_shard_50.bin",
1765
+ "format": "raw-shard",
1766
+ "nbytes": 84934656,
1767
+ "records": [
1768
+ {
1769
+ "name": "model.layers.23.mlp.gate_up_proj.weight",
1770
+ "shape": [
1771
+ 18432,
1772
+ 2304
1773
+ ],
1774
+ "dtype": "float16",
1775
+ "format": "f32-to-bf16",
1776
+ "nbytes": 84934656,
1777
+ "byteOffset": 0
1778
+ }
1779
+ ],
1780
+ "md5sum": "029005c06e461193cabd99d94578f76a"
1781
+ },
1782
+ {
1783
+ "dataPath": "params_shard_51.bin",
1784
+ "format": "raw-shard",
1785
+ "nbytes": 28329984,
1786
+ "records": [
1787
+ {
1788
+ "name": "model.layers.22.self_attn.o_proj.weight",
1789
+ "shape": [
1790
+ 2304,
1791
+ 2048
1792
+ ],
1793
+ "dtype": "float16",
1794
+ "format": "f32-to-bf16",
1795
+ "nbytes": 9437184,
1796
+ "byteOffset": 0
1797
+ },
1798
+ {
1799
+ "name": "model.layers.23.input_layernorm.weight",
1800
+ "shape": [
1801
+ 2304
1802
+ ],
1803
+ "dtype": "float16",
1804
+ "format": "f32-to-bf16",
1805
+ "nbytes": 4608,
1806
+ "byteOffset": 9437184
1807
+ },
1808
+ {
1809
+ "name": "model.layers.23.post_attention_layernorm.weight",
1810
+ "shape": [
1811
+ 2304
1812
+ ],
1813
+ "dtype": "float16",
1814
+ "format": "f32-to-bf16",
1815
+ "nbytes": 4608,
1816
+ "byteOffset": 9441792
1817
+ },
1818
+ {
1819
+ "name": "model.layers.23.post_feedforward_layernorm.weight",
1820
+ "shape": [
1821
+ 2304
1822
+ ],
1823
+ "dtype": "float16",
1824
+ "format": "f32-to-bf16",
1825
+ "nbytes": 4608,
1826
+ "byteOffset": 9446400
1827
+ },
1828
+ {
1829
+ "name": "model.layers.23.pre_feedforward_layernorm.weight",
1830
+ "shape": [
1831
+ 2304
1832
+ ],
1833
+ "dtype": "float16",
1834
+ "format": "f32-to-bf16",
1835
+ "nbytes": 4608,
1836
+ "byteOffset": 9451008
1837
+ },
1838
+ {
1839
+ "name": "model.layers.23.self_attn.qkv_proj.weight",
1840
+ "shape": [
1841
+ 4096,
1842
+ 2304
1843
+ ],
1844
+ "dtype": "float16",
1845
+ "format": "f32-to-bf16",
1846
+ "nbytes": 18874368,
1847
+ "byteOffset": 9455616
1848
+ }
1849
+ ],
1850
+ "md5sum": "f10d50ea8b06958a6179cdad2bfe63f1"
1851
+ },
1852
+ {
1853
+ "dataPath": "params_shard_52.bin",
1854
+ "format": "raw-shard",
1855
+ "nbytes": 84934656,
1856
+ "records": [
1857
+ {
1858
+ "name": "model.layers.24.mlp.gate_up_proj.weight",
1859
+ "shape": [
1860
+ 18432,
1861
+ 2304
1862
+ ],
1863
+ "dtype": "float16",
1864
+ "format": "f32-to-bf16",
1865
+ "nbytes": 84934656,
1866
+ "byteOffset": 0
1867
+ }
1868
+ ],
1869
+ "md5sum": "ce18a53950c7a65819790ac40f331460"
1870
+ },
1871
+ {
1872
+ "dataPath": "params_shard_53.bin",
1873
+ "format": "raw-shard",
1874
+ "nbytes": 28311552,
1875
+ "records": [
1876
+ {
1877
+ "name": "model.layers.23.self_attn.o_proj.weight",
1878
+ "shape": [
1879
+ 2304,
1880
+ 2048
1881
+ ],
1882
+ "dtype": "float16",
1883
+ "format": "f32-to-bf16",
1884
+ "nbytes": 9437184,
1885
+ "byteOffset": 0
1886
+ },
1887
+ {
1888
+ "name": "model.layers.24.self_attn.qkv_proj.weight",
1889
+ "shape": [
1890
+ 4096,
1891
+ 2304
1892
+ ],
1893
+ "dtype": "float16",
1894
+ "format": "f32-to-bf16",
1895
+ "nbytes": 18874368,
1896
+ "byteOffset": 9437184
1897
+ }
1898
+ ],
1899
+ "md5sum": "74a0b41986d05bcf56b5b848b2540032"
1900
+ },
1901
+ {
1902
+ "dataPath": "params_shard_54.bin",
1903
+ "format": "raw-shard",
1904
+ "nbytes": 42467328,
1905
+ "records": [
1906
+ {
1907
+ "name": "model.layers.3.mlp.down_proj.weight",
1908
+ "shape": [
1909
+ 2304,
1910
+ 9216
1911
+ ],
1912
+ "dtype": "float16",
1913
+ "format": "f32-to-bf16",
1914
+ "nbytes": 42467328,
1915
+ "byteOffset": 0
1916
+ }
1917
+ ],
1918
+ "md5sum": "1fa8a2ad0ac76df5408405c499185019"
1919
+ },
1920
+ {
1921
+ "dataPath": "params_shard_55.bin",
1922
+ "format": "raw-shard",
1923
+ "nbytes": 84934656,
1924
+ "records": [
1925
+ {
1926
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
1927
+ "shape": [
1928
+ 18432,
1929
+ 2304
1930
+ ],
1931
+ "dtype": "float16",
1932
+ "format": "f32-to-bf16",
1933
+ "nbytes": 84934656,
1934
+ "byteOffset": 0
1935
+ }
1936
+ ],
1937
+ "md5sum": "33157e305be4722b41078fb1e886437a"
1938
+ },
1939
+ {
1940
+ "dataPath": "params_shard_56.bin",
1941
+ "format": "raw-shard",
1942
+ "nbytes": 28329984,
1943
+ "records": [
1944
+ {
1945
+ "name": "model.layers.24.self_attn.o_proj.weight",
1946
+ "shape": [
1947
+ 2304,
1948
+ 2048
1949
+ ],
1950
+ "dtype": "float16",
1951
+ "format": "f32-to-bf16",
1952
+ "nbytes": 9437184,
1953
+ "byteOffset": 0
1954
+ },
1955
+ {
1956
+ "name": "model.layers.3.input_layernorm.weight",
1957
+ "shape": [
1958
+ 2304
1959
+ ],
1960
+ "dtype": "float16",
1961
+ "format": "f32-to-bf16",
1962
+ "nbytes": 4608,
1963
+ "byteOffset": 9437184
1964
+ },
1965
+ {
1966
+ "name": "model.layers.3.post_attention_layernorm.weight",
1967
+ "shape": [
1968
+ 2304
1969
+ ],
1970
+ "dtype": "float16",
1971
+ "format": "f32-to-bf16",
1972
+ "nbytes": 4608,
1973
+ "byteOffset": 9441792
1974
+ },
1975
+ {
1976
+ "name": "model.layers.3.post_feedforward_layernorm.weight",
1977
+ "shape": [
1978
+ 2304
1979
+ ],
1980
+ "dtype": "float16",
1981
+ "format": "f32-to-bf16",
1982
+ "nbytes": 4608,
1983
+ "byteOffset": 9446400
1984
+ },
1985
+ {
1986
+ "name": "model.layers.3.pre_feedforward_layernorm.weight",
1987
+ "shape": [
1988
+ 2304
1989
+ ],
1990
+ "dtype": "float16",
1991
+ "format": "f32-to-bf16",
1992
+ "nbytes": 4608,
1993
+ "byteOffset": 9451008
1994
+ },
1995
+ {
1996
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
1997
+ "shape": [
1998
+ 4096,
1999
+ 2304
2000
+ ],
2001
+ "dtype": "float16",
2002
+ "format": "f32-to-bf16",
2003
+ "nbytes": 18874368,
2004
+ "byteOffset": 9455616
2005
+ }
2006
+ ],
2007
+ "md5sum": "7dc6a94ae1e14a228cc4f529f3721f73"
2008
+ },
2009
+ {
2010
+ "dataPath": "params_shard_57.bin",
2011
+ "format": "raw-shard",
2012
+ "nbytes": 42467328,
2013
+ "records": [
2014
+ {
2015
+ "name": "model.layers.4.mlp.down_proj.weight",
2016
+ "shape": [
2017
+ 2304,
2018
+ 9216
2019
+ ],
2020
+ "dtype": "float16",
2021
+ "format": "f32-to-bf16",
2022
+ "nbytes": 42467328,
2023
+ "byteOffset": 0
2024
+ }
2025
+ ],
2026
+ "md5sum": "ebf0dffa00892d933edbdfd9ccb4a7d9"
2027
+ },
2028
+ {
2029
+ "dataPath": "params_shard_58.bin",
2030
+ "format": "raw-shard",
2031
+ "nbytes": 84934656,
2032
+ "records": [
2033
+ {
2034
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
2035
+ "shape": [
2036
+ 18432,
2037
+ 2304
2038
+ ],
2039
+ "dtype": "float16",
2040
+ "format": "f32-to-bf16",
2041
+ "nbytes": 84934656,
2042
+ "byteOffset": 0
2043
+ }
2044
+ ],
2045
+ "md5sum": "e056b67a5781c92a0ddc6609770c93a6"
2046
+ },
2047
+ {
2048
+ "dataPath": "params_shard_59.bin",
2049
+ "format": "raw-shard",
2050
+ "nbytes": 28329984,
2051
+ "records": [
2052
+ {
2053
+ "name": "model.layers.3.self_attn.o_proj.weight",
2054
+ "shape": [
2055
+ 2304,
2056
+ 2048
2057
+ ],
2058
+ "dtype": "float16",
2059
+ "format": "f32-to-bf16",
2060
+ "nbytes": 9437184,
2061
+ "byteOffset": 0
2062
+ },
2063
+ {
2064
+ "name": "model.layers.4.input_layernorm.weight",
2065
+ "shape": [
2066
+ 2304
2067
+ ],
2068
+ "dtype": "float16",
2069
+ "format": "f32-to-bf16",
2070
+ "nbytes": 4608,
2071
+ "byteOffset": 9437184
2072
+ },
2073
+ {
2074
+ "name": "model.layers.4.post_attention_layernorm.weight",
2075
+ "shape": [
2076
+ 2304
2077
+ ],
2078
+ "dtype": "float16",
2079
+ "format": "f32-to-bf16",
2080
+ "nbytes": 4608,
2081
+ "byteOffset": 9441792
2082
+ },
2083
+ {
2084
+ "name": "model.layers.4.post_feedforward_layernorm.weight",
2085
+ "shape": [
2086
+ 2304
2087
+ ],
2088
+ "dtype": "float16",
2089
+ "format": "f32-to-bf16",
2090
+ "nbytes": 4608,
2091
+ "byteOffset": 9446400
2092
+ },
2093
+ {
2094
+ "name": "model.layers.4.pre_feedforward_layernorm.weight",
2095
+ "shape": [
2096
+ 2304
2097
+ ],
2098
+ "dtype": "float16",
2099
+ "format": "f32-to-bf16",
2100
+ "nbytes": 4608,
2101
+ "byteOffset": 9451008
2102
+ },
2103
+ {
2104
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
2105
+ "shape": [
2106
+ 4096,
2107
+ 2304
2108
+ ],
2109
+ "dtype": "float16",
2110
+ "format": "f32-to-bf16",
2111
+ "nbytes": 18874368,
2112
+ "byteOffset": 9455616
2113
+ }
2114
+ ],
2115
+ "md5sum": "bc00e8fb6d0ea2f03defa30fb0c0f1cf"
2116
+ },
2117
+ {
2118
+ "dataPath": "params_shard_60.bin",
2119
+ "format": "raw-shard",
2120
+ "nbytes": 42467328,
2121
+ "records": [
2122
+ {
2123
+ "name": "model.layers.5.mlp.down_proj.weight",
2124
+ "shape": [
2125
+ 2304,
2126
+ 9216
2127
+ ],
2128
+ "dtype": "float16",
2129
+ "format": "f32-to-bf16",
2130
+ "nbytes": 42467328,
2131
+ "byteOffset": 0
2132
+ }
2133
+ ],
2134
+ "md5sum": "5fca07b60463941528c09b6ba6e78c5d"
2135
+ },
2136
+ {
2137
+ "dataPath": "params_shard_61.bin",
2138
+ "format": "raw-shard",
2139
+ "nbytes": 84934656,
2140
+ "records": [
2141
+ {
2142
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
2143
+ "shape": [
2144
+ 18432,
2145
+ 2304
2146
+ ],
2147
+ "dtype": "float16",
2148
+ "format": "f32-to-bf16",
2149
+ "nbytes": 84934656,
2150
+ "byteOffset": 0
2151
+ }
2152
+ ],
2153
+ "md5sum": "0693937fe395959db6116fb3bf0714d4"
2154
+ },
2155
+ {
2156
+ "dataPath": "params_shard_62.bin",
2157
+ "format": "raw-shard",
2158
+ "nbytes": 28329984,
2159
+ "records": [
2160
+ {
2161
+ "name": "model.layers.4.self_attn.o_proj.weight",
2162
+ "shape": [
2163
+ 2304,
2164
+ 2048
2165
+ ],
2166
+ "dtype": "float16",
2167
+ "format": "f32-to-bf16",
2168
+ "nbytes": 9437184,
2169
+ "byteOffset": 0
2170
+ },
2171
+ {
2172
+ "name": "model.layers.5.input_layernorm.weight",
2173
+ "shape": [
2174
+ 2304
2175
+ ],
2176
+ "dtype": "float16",
2177
+ "format": "f32-to-bf16",
2178
+ "nbytes": 4608,
2179
+ "byteOffset": 9437184
2180
+ },
2181
+ {
2182
+ "name": "model.layers.5.post_attention_layernorm.weight",
2183
+ "shape": [
2184
+ 2304
2185
+ ],
2186
+ "dtype": "float16",
2187
+ "format": "f32-to-bf16",
2188
+ "nbytes": 4608,
2189
+ "byteOffset": 9441792
2190
+ },
2191
+ {
2192
+ "name": "model.layers.5.post_feedforward_layernorm.weight",
2193
+ "shape": [
2194
+ 2304
2195
+ ],
2196
+ "dtype": "float16",
2197
+ "format": "f32-to-bf16",
2198
+ "nbytes": 4608,
2199
+ "byteOffset": 9446400
2200
+ },
2201
+ {
2202
+ "name": "model.layers.5.pre_feedforward_layernorm.weight",
2203
+ "shape": [
2204
+ 2304
2205
+ ],
2206
+ "dtype": "float16",
2207
+ "format": "f32-to-bf16",
2208
+ "nbytes": 4608,
2209
+ "byteOffset": 9451008
2210
+ },
2211
+ {
2212
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
2213
+ "shape": [
2214
+ 4096,
2215
+ 2304
2216
+ ],
2217
+ "dtype": "float16",
2218
+ "format": "f32-to-bf16",
2219
+ "nbytes": 18874368,
2220
+ "byteOffset": 9455616
2221
+ }
2222
+ ],
2223
+ "md5sum": "09a7c0a5c60b343993bbc19a564cd35b"
2224
+ },
2225
+ {
2226
+ "dataPath": "params_shard_63.bin",
2227
+ "format": "raw-shard",
2228
+ "nbytes": 42467328,
2229
+ "records": [
2230
+ {
2231
+ "name": "model.layers.6.mlp.down_proj.weight",
2232
+ "shape": [
2233
+ 2304,
2234
+ 9216
2235
+ ],
2236
+ "dtype": "float16",
2237
+ "format": "f32-to-bf16",
2238
+ "nbytes": 42467328,
2239
+ "byteOffset": 0
2240
+ }
2241
+ ],
2242
+ "md5sum": "75d6548187e3bf06464b603fca7ec22e"
2243
+ },
2244
+ {
2245
+ "dataPath": "params_shard_64.bin",
2246
+ "format": "raw-shard",
2247
+ "nbytes": 84934656,
2248
+ "records": [
2249
+ {
2250
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
2251
+ "shape": [
2252
+ 18432,
2253
+ 2304
2254
+ ],
2255
+ "dtype": "float16",
2256
+ "format": "f32-to-bf16",
2257
+ "nbytes": 84934656,
2258
+ "byteOffset": 0
2259
+ }
2260
+ ],
2261
+ "md5sum": "a444e56b6147a9658bd9b27f2dce58ff"
2262
+ },
2263
+ {
2264
+ "dataPath": "params_shard_65.bin",
2265
+ "format": "raw-shard",
2266
+ "nbytes": 28329984,
2267
+ "records": [
2268
+ {
2269
+ "name": "model.layers.5.self_attn.o_proj.weight",
2270
+ "shape": [
2271
+ 2304,
2272
+ 2048
2273
+ ],
2274
+ "dtype": "float16",
2275
+ "format": "f32-to-bf16",
2276
+ "nbytes": 9437184,
2277
+ "byteOffset": 0
2278
+ },
2279
+ {
2280
+ "name": "model.layers.6.input_layernorm.weight",
2281
+ "shape": [
2282
+ 2304
2283
+ ],
2284
+ "dtype": "float16",
2285
+ "format": "f32-to-bf16",
2286
+ "nbytes": 4608,
2287
+ "byteOffset": 9437184
2288
+ },
2289
+ {
2290
+ "name": "model.layers.6.post_attention_layernorm.weight",
2291
+ "shape": [
2292
+ 2304
2293
+ ],
2294
+ "dtype": "float16",
2295
+ "format": "f32-to-bf16",
2296
+ "nbytes": 4608,
2297
+ "byteOffset": 9441792
2298
+ },
2299
+ {
2300
+ "name": "model.layers.6.post_feedforward_layernorm.weight",
2301
+ "shape": [
2302
+ 2304
2303
+ ],
2304
+ "dtype": "float16",
2305
+ "format": "f32-to-bf16",
2306
+ "nbytes": 4608,
2307
+ "byteOffset": 9446400
2308
+ },
2309
+ {
2310
+ "name": "model.layers.6.pre_feedforward_layernorm.weight",
2311
+ "shape": [
2312
+ 2304
2313
+ ],
2314
+ "dtype": "float16",
2315
+ "format": "f32-to-bf16",
2316
+ "nbytes": 4608,
2317
+ "byteOffset": 9451008
2318
+ },
2319
+ {
2320
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
2321
+ "shape": [
2322
+ 4096,
2323
+ 2304
2324
+ ],
2325
+ "dtype": "float16",
2326
+ "format": "f32-to-bf16",
2327
+ "nbytes": 18874368,
2328
+ "byteOffset": 9455616
2329
+ }
2330
+ ],
2331
+ "md5sum": "1eab15611ac12561bdee2869ce01cd04"
2332
+ },
2333
+ {
2334
+ "dataPath": "params_shard_66.bin",
2335
+ "format": "raw-shard",
2336
+ "nbytes": 42467328,
2337
+ "records": [
2338
+ {
2339
+ "name": "model.layers.7.mlp.down_proj.weight",
2340
+ "shape": [
2341
+ 2304,
2342
+ 9216
2343
+ ],
2344
+ "dtype": "float16",
2345
+ "format": "f32-to-bf16",
2346
+ "nbytes": 42467328,
2347
+ "byteOffset": 0
2348
+ }
2349
+ ],
2350
+ "md5sum": "cd255cc3726f542b8ab8791b02b951af"
2351
+ },
2352
+ {
2353
+ "dataPath": "params_shard_67.bin",
2354
+ "format": "raw-shard",
2355
+ "nbytes": 84934656,
2356
+ "records": [
2357
+ {
2358
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
2359
+ "shape": [
2360
+ 18432,
2361
+ 2304
2362
+ ],
2363
+ "dtype": "float16",
2364
+ "format": "f32-to-bf16",
2365
+ "nbytes": 84934656,
2366
+ "byteOffset": 0
2367
+ }
2368
+ ],
2369
+ "md5sum": "9282b473f3de1d9729d9649d6fac10ee"
2370
+ },
2371
+ {
2372
+ "dataPath": "params_shard_68.bin",
2373
+ "format": "raw-shard",
2374
+ "nbytes": 28329984,
2375
+ "records": [
2376
+ {
2377
+ "name": "model.layers.6.self_attn.o_proj.weight",
2378
+ "shape": [
2379
+ 2304,
2380
+ 2048
2381
+ ],
2382
+ "dtype": "float16",
2383
+ "format": "f32-to-bf16",
2384
+ "nbytes": 9437184,
2385
+ "byteOffset": 0
2386
+ },
2387
+ {
2388
+ "name": "model.layers.7.input_layernorm.weight",
2389
+ "shape": [
2390
+ 2304
2391
+ ],
2392
+ "dtype": "float16",
2393
+ "format": "f32-to-bf16",
2394
+ "nbytes": 4608,
2395
+ "byteOffset": 9437184
2396
+ },
2397
+ {
2398
+ "name": "model.layers.7.post_attention_layernorm.weight",
2399
+ "shape": [
2400
+ 2304
2401
+ ],
2402
+ "dtype": "float16",
2403
+ "format": "f32-to-bf16",
2404
+ "nbytes": 4608,
2405
+ "byteOffset": 9441792
2406
+ },
2407
+ {
2408
+ "name": "model.layers.7.post_feedforward_layernorm.weight",
2409
+ "shape": [
2410
+ 2304
2411
+ ],
2412
+ "dtype": "float16",
2413
+ "format": "f32-to-bf16",
2414
+ "nbytes": 4608,
2415
+ "byteOffset": 9446400
2416
+ },
2417
+ {
2418
+ "name": "model.layers.7.pre_feedforward_layernorm.weight",
2419
+ "shape": [
2420
+ 2304
2421
+ ],
2422
+ "dtype": "float16",
2423
+ "format": "f32-to-bf16",
2424
+ "nbytes": 4608,
2425
+ "byteOffset": 9451008
2426
+ },
2427
+ {
2428
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
2429
+ "shape": [
2430
+ 4096,
2431
+ 2304
2432
+ ],
2433
+ "dtype": "float16",
2434
+ "format": "f32-to-bf16",
2435
+ "nbytes": 18874368,
2436
+ "byteOffset": 9455616
2437
+ }
2438
+ ],
2439
+ "md5sum": "20cd72177436a84015feb09ada96beb9"
2440
+ },
2441
+ {
2442
+ "dataPath": "params_shard_69.bin",
2443
+ "format": "raw-shard",
2444
+ "nbytes": 42467328,
2445
+ "records": [
2446
+ {
2447
+ "name": "model.layers.8.mlp.down_proj.weight",
2448
+ "shape": [
2449
+ 2304,
2450
+ 9216
2451
+ ],
2452
+ "dtype": "float16",
2453
+ "format": "f32-to-bf16",
2454
+ "nbytes": 42467328,
2455
+ "byteOffset": 0
2456
+ }
2457
+ ],
2458
+ "md5sum": "c2ddb40da3ce09337472d346b81f1b1f"
2459
+ },
2460
+ {
2461
+ "dataPath": "params_shard_70.bin",
2462
+ "format": "raw-shard",
2463
+ "nbytes": 84934656,
2464
+ "records": [
2465
+ {
2466
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
2467
+ "shape": [
2468
+ 18432,
2469
+ 2304
2470
+ ],
2471
+ "dtype": "float16",
2472
+ "format": "f32-to-bf16",
2473
+ "nbytes": 84934656,
2474
+ "byteOffset": 0
2475
+ }
2476
+ ],
2477
+ "md5sum": "90369159e754aa884ceafe344d5c651b"
2478
+ },
2479
+ {
2480
+ "dataPath": "params_shard_71.bin",
2481
+ "format": "raw-shard",
2482
+ "nbytes": 28329984,
2483
+ "records": [
2484
+ {
2485
+ "name": "model.layers.7.self_attn.o_proj.weight",
2486
+ "shape": [
2487
+ 2304,
2488
+ 2048
2489
+ ],
2490
+ "dtype": "float16",
2491
+ "format": "f32-to-bf16",
2492
+ "nbytes": 9437184,
2493
+ "byteOffset": 0
2494
+ },
2495
+ {
2496
+ "name": "model.layers.8.input_layernorm.weight",
2497
+ "shape": [
2498
+ 2304
2499
+ ],
2500
+ "dtype": "float16",
2501
+ "format": "f32-to-bf16",
2502
+ "nbytes": 4608,
2503
+ "byteOffset": 9437184
2504
+ },
2505
+ {
2506
+ "name": "model.layers.8.post_attention_layernorm.weight",
2507
+ "shape": [
2508
+ 2304
2509
+ ],
2510
+ "dtype": "float16",
2511
+ "format": "f32-to-bf16",
2512
+ "nbytes": 4608,
2513
+ "byteOffset": 9441792
2514
+ },
2515
+ {
2516
+ "name": "model.layers.8.post_feedforward_layernorm.weight",
2517
+ "shape": [
2518
+ 2304
2519
+ ],
2520
+ "dtype": "float16",
2521
+ "format": "f32-to-bf16",
2522
+ "nbytes": 4608,
2523
+ "byteOffset": 9446400
2524
+ },
2525
+ {
2526
+ "name": "model.layers.8.pre_feedforward_layernorm.weight",
2527
+ "shape": [
2528
+ 2304
2529
+ ],
2530
+ "dtype": "float16",
2531
+ "format": "f32-to-bf16",
2532
+ "nbytes": 4608,
2533
+ "byteOffset": 9451008
2534
+ },
2535
+ {
2536
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
2537
+ "shape": [
2538
+ 4096,
2539
+ 2304
2540
+ ],
2541
+ "dtype": "float16",
2542
+ "format": "f32-to-bf16",
2543
+ "nbytes": 18874368,
2544
+ "byteOffset": 9455616
2545
+ }
2546
+ ],
2547
+ "md5sum": "e8277fa029e6bf6a5d7a3b54ea17faef"
2548
+ },
2549
+ {
2550
+ "dataPath": "params_shard_72.bin",
2551
+ "format": "raw-shard",
2552
+ "nbytes": 42467328,
2553
+ "records": [
2554
+ {
2555
+ "name": "model.layers.9.mlp.down_proj.weight",
2556
+ "shape": [
2557
+ 2304,
2558
+ 9216
2559
+ ],
2560
+ "dtype": "float16",
2561
+ "format": "f32-to-bf16",
2562
+ "nbytes": 42467328,
2563
+ "byteOffset": 0
2564
+ }
2565
+ ],
2566
+ "md5sum": "fd7ba589c0c05f01cf741093a6a8ad87"
2567
+ },
2568
+ {
2569
+ "dataPath": "params_shard_73.bin",
2570
+ "format": "raw-shard",
2571
+ "nbytes": 84934656,
2572
+ "records": [
2573
+ {
2574
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
2575
+ "shape": [
2576
+ 18432,
2577
+ 2304
2578
+ ],
2579
+ "dtype": "float16",
2580
+ "format": "f32-to-bf16",
2581
+ "nbytes": 84934656,
2582
+ "byteOffset": 0
2583
+ }
2584
+ ],
2585
+ "md5sum": "6deb54d82c855a1df96e50051743bc86"
2586
+ },
2587
+ {
2588
+ "dataPath": "params_shard_74.bin",
2589
+ "format": "raw-shard",
2590
+ "nbytes": 28329984,
2591
+ "records": [
2592
+ {
2593
+ "name": "model.layers.8.self_attn.o_proj.weight",
2594
+ "shape": [
2595
+ 2304,
2596
+ 2048
2597
+ ],
2598
+ "dtype": "float16",
2599
+ "format": "f32-to-bf16",
2600
+ "nbytes": 9437184,
2601
+ "byteOffset": 0
2602
+ },
2603
+ {
2604
+ "name": "model.layers.9.input_layernorm.weight",
2605
+ "shape": [
2606
+ 2304
2607
+ ],
2608
+ "dtype": "float16",
2609
+ "format": "f32-to-bf16",
2610
+ "nbytes": 4608,
2611
+ "byteOffset": 9437184
2612
+ },
2613
+ {
2614
+ "name": "model.layers.9.post_attention_layernorm.weight",
2615
+ "shape": [
2616
+ 2304
2617
+ ],
2618
+ "dtype": "float16",
2619
+ "format": "f32-to-bf16",
2620
+ "nbytes": 4608,
2621
+ "byteOffset": 9441792
2622
+ },
2623
+ {
2624
+ "name": "model.layers.9.post_feedforward_layernorm.weight",
2625
+ "shape": [
2626
+ 2304
2627
+ ],
2628
+ "dtype": "float16",
2629
+ "format": "f32-to-bf16",
2630
+ "nbytes": 4608,
2631
+ "byteOffset": 9446400
2632
+ },
2633
+ {
2634
+ "name": "model.layers.9.pre_feedforward_layernorm.weight",
2635
+ "shape": [
2636
+ 2304
2637
+ ],
2638
+ "dtype": "float16",
2639
+ "format": "f32-to-bf16",
2640
+ "nbytes": 4608,
2641
+ "byteOffset": 9451008
2642
+ },
2643
+ {
2644
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
2645
+ "shape": [
2646
+ 4096,
2647
+ 2304
2648
+ ],
2649
+ "dtype": "float16",
2650
+ "format": "f32-to-bf16",
2651
+ "nbytes": 18874368,
2652
+ "byteOffset": 9455616
2653
+ }
2654
+ ],
2655
+ "md5sum": "ae01dc53fec55e10cff4c54c7cb5fbf8"
2656
+ },
2657
+ {
2658
+ "dataPath": "params_shard_75.bin",
2659
+ "format": "raw-shard",
2660
+ "nbytes": 42467328,
2661
+ "records": [
2662
+ {
2663
+ "name": "model.layers.24.mlp.down_proj.weight",
2664
+ "shape": [
2665
+ 2304,
2666
+ 9216
2667
+ ],
2668
+ "dtype": "float16",
2669
+ "format": "f32-to-bf16",
2670
+ "nbytes": 42467328,
2671
+ "byteOffset": 0
2672
+ }
2673
+ ],
2674
+ "md5sum": "e62d5779311e4b1e79d5542e97bf9215"
2675
+ },
2676
+ {
2677
+ "dataPath": "params_shard_76.bin",
2678
+ "format": "raw-shard",
2679
+ "nbytes": 42467328,
2680
+ "records": [
2681
+ {
2682
+ "name": "model.layers.25.mlp.down_proj.weight",
2683
+ "shape": [
2684
+ 2304,
2685
+ 9216
2686
+ ],
2687
+ "dtype": "float16",
2688
+ "format": "f32-to-bf16",
2689
+ "nbytes": 42467328,
2690
+ "byteOffset": 0
2691
+ }
2692
+ ],
2693
+ "md5sum": "dadde1e874a9f29f03da804b70baebd7"
2694
+ },
2695
+ {
2696
+ "dataPath": "params_shard_77.bin",
2697
+ "format": "raw-shard",
2698
+ "nbytes": 84934656,
2699
+ "records": [
2700
+ {
2701
+ "name": "model.layers.25.mlp.gate_up_proj.weight",
2702
+ "shape": [
2703
+ 18432,
2704
+ 2304
2705
+ ],
2706
+ "dtype": "float16",
2707
+ "format": "f32-to-bf16",
2708
+ "nbytes": 84934656,
2709
+ "byteOffset": 0
2710
+ }
2711
+ ],
2712
+ "md5sum": "8d88737e379b8e2a75e9d3140c54bf60"
2713
+ },
2714
+ {
2715
+ "dataPath": "params_shard_78.bin",
2716
+ "format": "raw-shard",
2717
+ "nbytes": 28348416,
2718
+ "records": [
2719
+ {
2720
+ "name": "model.layers.9.self_attn.o_proj.weight",
2721
+ "shape": [
2722
+ 2304,
2723
+ 2048
2724
+ ],
2725
+ "dtype": "float16",
2726
+ "format": "f32-to-bf16",
2727
+ "nbytes": 9437184,
2728
+ "byteOffset": 0
2729
+ },
2730
+ {
2731
+ "name": "model.layers.24.input_layernorm.weight",
2732
+ "shape": [
2733
+ 2304
2734
+ ],
2735
+ "dtype": "float16",
2736
+ "format": "f32-to-bf16",
2737
+ "nbytes": 4608,
2738
+ "byteOffset": 9437184
2739
+ },
2740
+ {
2741
+ "name": "model.layers.24.post_attention_layernorm.weight",
2742
+ "shape": [
2743
+ 2304
2744
+ ],
2745
+ "dtype": "float16",
2746
+ "format": "f32-to-bf16",
2747
+ "nbytes": 4608,
2748
+ "byteOffset": 9441792
2749
+ },
2750
+ {
2751
+ "name": "model.layers.24.post_feedforward_layernorm.weight",
2752
+ "shape": [
2753
+ 2304
2754
+ ],
2755
+ "dtype": "float16",
2756
+ "format": "f32-to-bf16",
2757
+ "nbytes": 4608,
2758
+ "byteOffset": 9446400
2759
+ },
2760
+ {
2761
+ "name": "model.layers.24.pre_feedforward_layernorm.weight",
2762
+ "shape": [
2763
+ 2304
2764
+ ],
2765
+ "dtype": "float16",
2766
+ "format": "f32-to-bf16",
2767
+ "nbytes": 4608,
2768
+ "byteOffset": 9451008
2769
+ },
2770
+ {
2771
+ "name": "model.layers.25.input_layernorm.weight",
2772
+ "shape": [
2773
+ 2304
2774
+ ],
2775
+ "dtype": "float16",
2776
+ "format": "f32-to-bf16",
2777
+ "nbytes": 4608,
2778
+ "byteOffset": 9455616
2779
+ },
2780
+ {
2781
+ "name": "model.layers.25.post_attention_layernorm.weight",
2782
+ "shape": [
2783
+ 2304
2784
+ ],
2785
+ "dtype": "float16",
2786
+ "format": "f32-to-bf16",
2787
+ "nbytes": 4608,
2788
+ "byteOffset": 9460224
2789
+ },
2790
+ {
2791
+ "name": "model.layers.25.post_feedforward_layernorm.weight",
2792
+ "shape": [
2793
+ 2304
2794
+ ],
2795
+ "dtype": "float16",
2796
+ "format": "f32-to-bf16",
2797
+ "nbytes": 4608,
2798
+ "byteOffset": 9464832
2799
+ },
2800
+ {
2801
+ "name": "model.layers.25.pre_feedforward_layernorm.weight",
2802
+ "shape": [
2803
+ 2304
2804
+ ],
2805
+ "dtype": "float16",
2806
+ "format": "f32-to-bf16",
2807
+ "nbytes": 4608,
2808
+ "byteOffset": 9469440
2809
+ },
2810
+ {
2811
+ "name": "model.layers.25.self_attn.qkv_proj.weight",
2812
+ "shape": [
2813
+ 4096,
2814
+ 2304
2815
+ ],
2816
+ "dtype": "float16",
2817
+ "format": "f32-to-bf16",
2818
+ "nbytes": 18874368,
2819
+ "byteOffset": 9474048
2820
+ }
2821
+ ],
2822
+ "md5sum": "811a932b1be1b38f1095c9e505a59ff1"
2823
+ },
2824
+ {
2825
+ "dataPath": "params_shard_79.bin",
2826
+ "format": "raw-shard",
2827
+ "nbytes": 9441792,
2828
+ "records": [
2829
+ {
2830
+ "name": "model.layers.25.self_attn.o_proj.weight",
2831
+ "shape": [
2832
+ 2304,
2833
+ 2048
2834
+ ],
2835
+ "dtype": "float16",
2836
+ "format": "f32-to-bf16",
2837
+ "nbytes": 9437184,
2838
+ "byteOffset": 0
2839
+ },
2840
+ {
2841
+ "name": "model.norm.weight",
2842
+ "shape": [
2843
+ 2304
2844
+ ],
2845
+ "dtype": "float16",
2846
+ "format": "f32-to-bf16",
2847
+ "nbytes": 4608,
2848
+ "byteOffset": 9437184
2849
+ }
2850
+ ],
2851
+ "md5sum": "cf0010eca37346feab454f386195a617"
2852
+ }
2853
+ ]
2854
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a9d30707e659e2e673656d71f5aef7a9fb9fd83bb9a77558dfc5abbe218a05
3
+ size 1179648000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51eb4d962189e945a84e94e0dc1aad3f8f90cc1a11e18029670afcd0ea0acb1b
3
+ size 42467328
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b86902f4e36868421e5228b9445051f8290b292df22a6d1af836dcecc1f25c3
3
+ size 42467328
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ddf38a72fa5caafb544ac458fb82ea9911a0c04a4144560c26052d5ffd6729b
3
+ size 84934656
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:729bd9b0aeb21c20ed7e158d6257ac6689bbe202bc989423ac0ccc655eb1cd8a
3
+ size 28329984
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578f39f8f9fc2f09138afc884a952d7cc3a9a31de4216acd10e88e19e0b75f8c
3
+ size 42467328
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7160079ce3edf07e98fb2fc1e341e00833f17297df013bd33949d4c9cfbbc537
3
+ size 84934656
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26c0cdccf78c75753be8d382f127bd2a83cd3f6f4052457a941a10ca163e0b99
3
+ size 28329984
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5514e0c8e7b3ed1cbcc1605eb5be1733b6ab3514cf8a0508fc72f7d05ed8bcb
3
+ size 42467328
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78dd6aa0956e528bd824a0c1c66d1f83c22b57923650bd4dd09a77e20fff13dd
3
+ size 84934656
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff1e49052aaac05e170b32164631be481bf7f545bfd971a127c43cfb65b0b49e
3
+ size 28329984
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e29960965b384ae5ab3d898a4dbaa8fddd28fa0e477ac28bcac49dec12a5ac67
3
+ size 42467328
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b860ae6eb64c9ff97f4a4f0827ae4004da7d1db7ea6b5113bb9000f8966142aa
3
+ size 84934656
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfaaee1a95354241b3d0ee19b6c2ff83d159e5381a541f842c897404120ef0e5
3
+ size 84934656
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d1cd33738b0e058dd30136025f5e6e038b5f68cd7f755900d74cc9eaccd197
3
+ size 28329984
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1f97a65dde6fa2c1e5397afb612266944b343f2eaa868b635ddd25829f8a42
3
+ size 42467328
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7f3e807e071f87059fada1bf3c52ea1ee164a3de3bf03a69b2f60f2cb9eadb
3
+ size 84934656
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2862ddc67ee91737032cc23defd79e554f3959b495a3448d3b23242755f08adc
3
+ size 28329984
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1c813eb5e7da3d6194569d6cb21602fc6eff2dc8e1b0eb753f2d5df148189c
3
+ size 42467328
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73d90e644138b0e76f1fd37059fcaad2fd7906a56c37d414e645bc1e18c927ce
3
+ size 84934656
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc17063d5daf5299e2ce360b7f60855d6016af1dda2154db8c55201d1a9ab955
3
+ size 28329984
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff9a7cccaa3776434a9d895aae4fb5c36c736bf2ec98784226b4c234940fbb0
3
+ size 42467328
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e7f9eead173e1da3a7f8b33c245618d2fbd6ec0a899f0042ffa1b2d3e965330
3
+ size 84934656
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66f30763a8bbbcaea609a0087ed75fadb5e771c06378dd2cea94cf17e492e8cf
3
+ size 42467328
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778078026e97540cfb252443bcd187498460ac184abb723536b9968205b63775
3
+ size 28329984
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e9d09b13a33525e14bdaee6efc65c551ac7cf7680e534b940ab122a3a7c1ac9
3
+ size 42467328
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff8053b595fe88109bda0af664ada2247db59e66b17955a1bf6000ddf8ddd7a
3
+ size 84934656
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf88756bc62968b1bc9a6df58c9a690fbd04fff68e8a0a73692af51cdb929001
3
+ size 28329984
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7f2d7827ee01c2dd41401c7b3b1700ad3a4ff620e8bb734f92630d342dcc7f
3
+ size 42467328
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6728cd9d15f8317b5d2f531e8c9880a0700bf105dbe508cd53b63528f0bf638
3
+ size 84934656
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05df72ab189498465949abc5b018e61df06db1933a95a9e6f067a7856d0676a8
3
+ size 28329984
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea204fd04e0d2fc728a9861a459216bbfec629c152004ba625f52cd8837bd51e
3
+ size 42467328
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4944be99943438af00e1591faf33affd3c60b4004c8316ef7a3c539283cb26d
3
+ size 84934656
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efcaad0057bbea1479562622b205be7919205eff51f3d7a73ea8745a1a83b45f
3
+ size 28329984
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d1f8d138a69ae44e339b0098c094917a7bc7e250d101135640ab7aac609ecb
3
+ size 84934656
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97efb608ac44cc804198faec3ee66eafe56ced6b7ca5359700c6f1df75b7205e
3
+ size 42467328
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:accc13d34cbe37cbe6563161fb90470b5d376e8d7d3650980b1a10cec2bbf9d4
3
+ size 84934656
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61be0d51fd43724e4b270a79901549bb4c23b7269ba099fbc0d0fd813633e99d
3
+ size 28329984
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd106556fb721b1c28ae3f4026bc83eb1b08ed910f2ba5f466c6b5f327d91cb
3
+ size 42467328
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df30196f56b19a5bce54a9510d8eca6157457436bc4fa5a7057ff284daeee452
3
+ size 84934656
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51d573c85277a481c98e513525d32fe77a1cea04d1dbe3fba2231f42c24d0399
3
+ size 28329984
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00cb8939f03e5817d6d412de8cf2c923c9568d5493e382cec7faf5718fb034eb
3
+ size 42467328
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d60bee8a526a133c83f6a8dd7b618dcb2cd0dc3dc95ce6ff1289646fc11479
3
+ size 84934656
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f6c55eee3b0cb0cb3974a8736926b5810e5189536f21afbf81d9676bb418b0
3
+ size 28329984
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf547eccb1b82aa64f208cee9682d7f558ca84e0aead7d9d3d1420d90f3d992
3
+ size 42467328
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:373acf6cd141e5f31e1b1509866c88a5acf91f22bd66f3ee787b33c1882cfe8a
3
+ size 18874368
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb7a7ebded7dee460b85b3f84ae4d210b05aacac9313320e36c57c39f5994f8
3
+ size 84934656