{ "metadata": { "ParamSize": 283, "ParamBytes": 1807423488.0, "BitsPerParam": 4.500626782697164 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "7e77bcae2f042139e5607f67c99af5a1" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 24631296, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24625152, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 24625152 } ], "md5sum": "5ac94bd3abfff44fc23c41e36d66afaa" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c96ba4d7b3101e06cbb22dfa01d35c66" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "07349b52666756a409032744362831a4" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "50b1df46041bf15408bef1eb66213343" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "9fefe8f5d682d78785203bd289a92eb3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f92b2d911ea3321528522b25f6429b31" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "10fec9b4c29a1491975dfad6677e6234" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9de71b776ff9b823fd936e6e1c8ba9cc" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "550b834c66228a6377e8f66ee0a4f4d4" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "eca63c4770b97f9aefe33341e54a66f0" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "20530871e6bbb4de991c16ae6af4e1c2" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "da2ddea73bf9a08b7d3f6ed412ebcdd7" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "49fc4fede1e326b0efc9df13afe3197d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5bf15a126abdd727e2a4c595b4ca52be" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "e22ce0f793e50be6b5084be54661efbd" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bb5a180ef65a127b3dbeed3e2016d76b" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "fd06ef3e6a50bff5fb44c83d36ab8610" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f74e7c5897c6b9c690669dc837903b54" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "96a8305213773433dbac3114750ed2ba" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "74e45abb0f933302ba31f0de681e1051" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "41780a2c8f86ecd0e1799575e95908bb" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c00675d578e02c751cbbfa82b213b796" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "84838295299e603a0308d1040940d954" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "eff515aac146cac12f99b3beb00998ef" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "6f248c3a6e31dbe5845c2bba43b665ff" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ada81113ce2733bdfe1c67ab19e85e9c" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "14b8f657d06b29591c1316fbfd0a367b" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 31463424, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 } ], "md5sum": "bed77e094bc505081a7c3b15b75c1912" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8e1e087790dd1e34bf55dddef6c5951f" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 31463424, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3145728 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11010048 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11993088 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16711680 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17307648 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29890560 } ], "md5sum": "5588936c99fd53bfcef820ed6f80c24c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8617ee34392ee736c71b272190ac3358" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "1bbd1013a36e95fd7c0ae1129fc12f37" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "651912f91cabae237862d970db372d3f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "987e558ee6c270535ebf4dca2d97ef67" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "02670d8630104ce84b7f3e41e62531a3" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "e68200d8e036ce031d99ad6ce8c835a8" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0b131466afe36d1d315457252af36af4" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "22b1c32a5faa81346dbbb718fdb8194f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ff3f877ca2d5b8b9fe89a5e418b88352" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "915f6a8c5f0ed678d82b7144126e7a6b" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "da8f5b9051662247c3a409053e4384f7" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 } ], "md5sum": "fd54de57a9fde97781eb3e765b2464f1" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 31481856, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3145728 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 3151872 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 11016192 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11999232 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16717824 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17307648 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 17313792 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 29896704 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31469568 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31475712 } ], "md5sum": "4174c0219f14e5ec71f5a3b2f73a6977" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "054e94cbbec412570d12cfc8acf2c891" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "d03768b2d106213a918bb8e03abdc66b" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "227bc07af02185cec6503b0a73bc1a29" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "c2b4ba4b5ff89375f1a82b5727519bf7" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "26328e19d0ad9554576b81d0c58d8cd8" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "fef3794b536e67ec9e06deda81a11a23" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7d7c20d16c4d5802d25893042c4b7913" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "7011ce62421183a44e89ca9d7ecf6e2a" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4270c0a3f8ea192c8ae62cfc6d502f27" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "d1e1fd6cd1aa0903d1b79b9cf1b261a5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cd71562d87f5d99378d1bb2e73026bda" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "a3360688b8eaf9b9d3628f457ad036bb" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8b2c34e66e18fe933e3c1241dd3600dc" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 17307648 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 96, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 25171968 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26155008 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 30873600 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31463424 } ], "md5sum": "345f6bb2c0de8dd3a728bda35a60ff69" } ] }