Qwen3-4B-Thinking-2507-q4f32_1-MLC / ndarray-cache-b16.json
vladbuinceanu's picture
Upload Qwen3-4B-Thinking-2507-q4f32_1-MLC model
151dcd3 verified
{
"metadata": {
"ParamSize": 435,
"ParamBytes": 2514704384.0,
"BitsPerParam": 5.00131625456651
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 194478080,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 194478080,
"byteOffset": 0
}
],
"md5sum": "e004de5abf2491b3521a7bb043b9f7e6"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 24314880,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 24309760,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 24309760
}
],
"md5sum": "06ed0337079bad2c9322996d1829d6d5"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "a77c0fbd6fd6c340af17e4281264f61e"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.0.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.0.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "5f838637667c7f8ba3c6433f02402814"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "1e15cd2dfc07dd2e37bd523280be8b0f"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.1.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.1.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "960ff50f39d1b91b2d7895bdb6bceeeb"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "5f43b4141dd86539c285b7287b7f6355"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.10.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.10.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "02ef8a8d548f5386be079248de9187e3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "722d4b7cece53c9081c82fe0537de646"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.11.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.11.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "562eed898e6792499f87629b4e39ca40"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "7648854ef78b64f802de7852662a4db0"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.12.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.12.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "df63c8c2a01dfeb072d30fa0e8ed3041"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "436d7b40dced3b05e38081b5b62b5d87"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.13.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.13.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "08eea3ae328d5e166407a9f310d4fe07"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "0b73ad4ab968397c45937bf9e65b1698"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "ed53f86df872420591c7923f6548e28c"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 31872512,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.14.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.14.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
}
],
"md5sum": "df5b7bf56662a6cfe21a42ad2a30a3dd"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "0a1b6b99e5542e19268518acf8607a48"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 31872512,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3112960
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3113216
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10977536
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11960576
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17203456
},
{
"name": "model.layers.15.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17858816
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17859072
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17864192
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30316032
}
],
"md5sum": "38d22cf335c41a64282355f8b95c29ed"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "7a52796dbaaf7bf3441c7756cdcd5fbb"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.2.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.2.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "27a68e2d13a1d78069e18420d08fb362"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "a233b9442e36ea13eaee91fc19ed7dbf"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.3.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.3.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "1e8ba81acf20d1163d7f155d524b583b"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "c1c2b03a7609fa94b05022da0ccd1dfd"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.4.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.4.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "2306cffde5b97f50d6c8668efa087ec9"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "416278ad1d8fd99bd80a79118518e998"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.5.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.5.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "644e488088d1f4c676a61b23056ee62f"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "a1c05446c097ee867f88605d4a2a8e0a"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.6.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.6.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "d104a0280624988486ae21f246580e72"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "2b94f7444bad3c3586769329dcb95ab0"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.7.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.7.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "6cfdc386277b521499bd620ab9589bcc"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "763df4f11dfc00be977dfec5281c42f0"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.8.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.8.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "538b74c3dc55b68d7b48cbc5f7d985f9"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 31887872,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.9.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.9.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30321152
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31877632
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31882752
}
],
"md5sum": "5678cf6f79c61604ff3fefe0fc02ffb2"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "615b51e487d519e19e311c8bd9e43487"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.16.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.16.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "77883c2d13f3aa1444e025d36bdc3de3"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "f714cf9f1830f924d231a181f4a174d3"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.17.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.17.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "636fa238cd1ce89f9a1afcc91f69ffac"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "49b687c6214a65a5c5f2735498db9669"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.18.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.18.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "ed3e55282c6e4b743fa1dc80cf388d77"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "1554b39e461ee97d1a6cfe97394ee2e8"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.19.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.19.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "b02ceff2aa138cf13f20e3a9eeb78aa2"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "502ef5630c557a24e76b2fa7841c0931"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.20.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.20.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "00ccfaa0a6ae67ea7798923d8ffe74b4"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "0132e891e284d391b5acf41a5af5ef20"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.21.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.21.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "cda36846a72ebdb4a300939ee31cbf46"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "974341f96eedd701ddfe58f35808a9e0"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.22.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.22.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "714dc1f70dc41e00d6194f923bcb6dc5"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "c5ccd2131593dd9eb89b64ce9c99c06e"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.23.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.23.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "66639cf7c41ea68d25a4ecf185e06b03"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "b64810776170649c20c2f1a20e6de42f"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.24.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.24.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "efb5d7772481ebb74c49f7f263838a1e"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "341c80a185ec952dc6471d027aa64032"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.25.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.25.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "7c56ff574dba11b3ce60588912c87960"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "d285c486bdf03714d5c1ffd989596ad9"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.26.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.26.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "5ab172380d500a5c6cfc730a812da7b4"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "c63166b5f418b0472af6e52da2d60aeb"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.27.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.27.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "d2d8ef2cf6d181fbf4309c0345f60db8"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "05e1293ebb71929498ccc423433eac2b"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.28.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.28.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "99041d6977cc28725dbecf56ece5bd13"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "f02cc7026a061d1828283a852df720b3"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.29.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.29.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "d0cbf6279461a1f462b222d58f515cc7"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "f0dea03fcd61243fe96d65f42e1210f7"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.30.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.30.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "b243b590fcc66adc3c69b314679c2bcf"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "6c338a39a69307fe054c6e8163c6ad14"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.31.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.31.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "7cb3e6f4f072c45917d83e4658b59121"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "6131175d804b5531bc85dd971e4e09e8"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.32.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.32.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "a673c7f9f5643f1ae3a8c81f4c3119d4"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "9bf56aa689a85c855d253fb1134cf52d"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.33.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.33.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "9614bc361c55100c0c12ba34f8e730c5"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "80ddb084bd2d09ea0866f7242e07e50f"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "6c2c417da897b8375c3a246f8cc1af1a"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 31872512,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.34.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.34.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 31872256
}
],
"md5sum": "cd5447ab6e2d5fcf7cc81a281b0a47a5"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 31882752,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 3112960
},
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3113216
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 983040,
"byteOffset": 10977536
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11960576
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 655360,
"byteOffset": 17203456
},
{
"name": "model.layers.35.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 256,
"byteOffset": 17858816
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 17859072
},
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17864192
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1556480,
"byteOffset": 30316032
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31872512
},
{
"name": "model.norm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31877632
}
],
"md5sum": "0048ee4def0352ba9e72789f94d71639"
}
]
}