sh2orc commited on
Commit
9ee1aef
·
verified ·
1 Parent(s): a8d5bc3

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -52,168 +52,6 @@
52
  "format": "float-quantized",
53
  "global_compression_ratio": null,
54
  "ignore": [
55
- "visual.blocks.0.attn.qkv",
56
- "visual.blocks.0.attn.proj",
57
- "visual.blocks.0.mlp.gate_proj",
58
- "visual.blocks.0.mlp.up_proj",
59
- "visual.blocks.0.mlp.down_proj",
60
- "visual.blocks.1.attn.qkv",
61
- "visual.blocks.1.attn.proj",
62
- "visual.blocks.1.mlp.gate_proj",
63
- "visual.blocks.1.mlp.up_proj",
64
- "visual.blocks.1.mlp.down_proj",
65
- "visual.blocks.2.attn.qkv",
66
- "visual.blocks.2.attn.proj",
67
- "visual.blocks.2.mlp.gate_proj",
68
- "visual.blocks.2.mlp.up_proj",
69
- "visual.blocks.2.mlp.down_proj",
70
- "visual.blocks.3.attn.qkv",
71
- "visual.blocks.3.attn.proj",
72
- "visual.blocks.3.mlp.gate_proj",
73
- "visual.blocks.3.mlp.up_proj",
74
- "visual.blocks.3.mlp.down_proj",
75
- "visual.blocks.4.attn.qkv",
76
- "visual.blocks.4.attn.proj",
77
- "visual.blocks.4.mlp.gate_proj",
78
- "visual.blocks.4.mlp.up_proj",
79
- "visual.blocks.4.mlp.down_proj",
80
- "visual.blocks.5.attn.qkv",
81
- "visual.blocks.5.attn.proj",
82
- "visual.blocks.5.mlp.gate_proj",
83
- "visual.blocks.5.mlp.up_proj",
84
- "visual.blocks.5.mlp.down_proj",
85
- "visual.blocks.6.attn.qkv",
86
- "visual.blocks.6.attn.proj",
87
- "visual.blocks.6.mlp.gate_proj",
88
- "visual.blocks.6.mlp.up_proj",
89
- "visual.blocks.6.mlp.down_proj",
90
- "visual.blocks.7.attn.qkv",
91
- "visual.blocks.7.attn.proj",
92
- "visual.blocks.7.mlp.gate_proj",
93
- "visual.blocks.7.mlp.up_proj",
94
- "visual.blocks.7.mlp.down_proj",
95
- "visual.blocks.8.attn.qkv",
96
- "visual.blocks.8.attn.proj",
97
- "visual.blocks.8.mlp.gate_proj",
98
- "visual.blocks.8.mlp.up_proj",
99
- "visual.blocks.8.mlp.down_proj",
100
- "visual.blocks.9.attn.qkv",
101
- "visual.blocks.9.attn.proj",
102
- "visual.blocks.9.mlp.gate_proj",
103
- "visual.blocks.9.mlp.up_proj",
104
- "visual.blocks.9.mlp.down_proj",
105
- "visual.blocks.10.attn.qkv",
106
- "visual.blocks.10.attn.proj",
107
- "visual.blocks.10.mlp.gate_proj",
108
- "visual.blocks.10.mlp.up_proj",
109
- "visual.blocks.10.mlp.down_proj",
110
- "visual.blocks.11.attn.qkv",
111
- "visual.blocks.11.attn.proj",
112
- "visual.blocks.11.mlp.gate_proj",
113
- "visual.blocks.11.mlp.up_proj",
114
- "visual.blocks.11.mlp.down_proj",
115
- "visual.blocks.12.attn.qkv",
116
- "visual.blocks.12.attn.proj",
117
- "visual.blocks.12.mlp.gate_proj",
118
- "visual.blocks.12.mlp.up_proj",
119
- "visual.blocks.12.mlp.down_proj",
120
- "visual.blocks.13.attn.qkv",
121
- "visual.blocks.13.attn.proj",
122
- "visual.blocks.13.mlp.gate_proj",
123
- "visual.blocks.13.mlp.up_proj",
124
- "visual.blocks.13.mlp.down_proj",
125
- "visual.blocks.14.attn.qkv",
126
- "visual.blocks.14.attn.proj",
127
- "visual.blocks.14.mlp.gate_proj",
128
- "visual.blocks.14.mlp.up_proj",
129
- "visual.blocks.14.mlp.down_proj",
130
- "visual.blocks.15.attn.qkv",
131
- "visual.blocks.15.attn.proj",
132
- "visual.blocks.15.mlp.gate_proj",
133
- "visual.blocks.15.mlp.up_proj",
134
- "visual.blocks.15.mlp.down_proj",
135
- "visual.blocks.16.attn.qkv",
136
- "visual.blocks.16.attn.proj",
137
- "visual.blocks.16.mlp.gate_proj",
138
- "visual.blocks.16.mlp.up_proj",
139
- "visual.blocks.16.mlp.down_proj",
140
- "visual.blocks.17.attn.qkv",
141
- "visual.blocks.17.attn.proj",
142
- "visual.blocks.17.mlp.gate_proj",
143
- "visual.blocks.17.mlp.up_proj",
144
- "visual.blocks.17.mlp.down_proj",
145
- "visual.blocks.18.attn.qkv",
146
- "visual.blocks.18.attn.proj",
147
- "visual.blocks.18.mlp.gate_proj",
148
- "visual.blocks.18.mlp.up_proj",
149
- "visual.blocks.18.mlp.down_proj",
150
- "visual.blocks.19.attn.qkv",
151
- "visual.blocks.19.attn.proj",
152
- "visual.blocks.19.mlp.gate_proj",
153
- "visual.blocks.19.mlp.up_proj",
154
- "visual.blocks.19.mlp.down_proj",
155
- "visual.blocks.20.attn.qkv",
156
- "visual.blocks.20.attn.proj",
157
- "visual.blocks.20.mlp.gate_proj",
158
- "visual.blocks.20.mlp.up_proj",
159
- "visual.blocks.20.mlp.down_proj",
160
- "visual.blocks.21.attn.qkv",
161
- "visual.blocks.21.attn.proj",
162
- "visual.blocks.21.mlp.gate_proj",
163
- "visual.blocks.21.mlp.up_proj",
164
- "visual.blocks.21.mlp.down_proj",
165
- "visual.blocks.22.attn.qkv",
166
- "visual.blocks.22.attn.proj",
167
- "visual.blocks.22.mlp.gate_proj",
168
- "visual.blocks.22.mlp.up_proj",
169
- "visual.blocks.22.mlp.down_proj",
170
- "visual.blocks.23.attn.qkv",
171
- "visual.blocks.23.attn.proj",
172
- "visual.blocks.23.mlp.gate_proj",
173
- "visual.blocks.23.mlp.up_proj",
174
- "visual.blocks.23.mlp.down_proj",
175
- "visual.blocks.24.attn.qkv",
176
- "visual.blocks.24.attn.proj",
177
- "visual.blocks.24.mlp.gate_proj",
178
- "visual.blocks.24.mlp.up_proj",
179
- "visual.blocks.24.mlp.down_proj",
180
- "visual.blocks.25.attn.qkv",
181
- "visual.blocks.25.attn.proj",
182
- "visual.blocks.25.mlp.gate_proj",
183
- "visual.blocks.25.mlp.up_proj",
184
- "visual.blocks.25.mlp.down_proj",
185
- "visual.blocks.26.attn.qkv",
186
- "visual.blocks.26.attn.proj",
187
- "visual.blocks.26.mlp.gate_proj",
188
- "visual.blocks.26.mlp.up_proj",
189
- "visual.blocks.26.mlp.down_proj",
190
- "visual.blocks.27.attn.qkv",
191
- "visual.blocks.27.attn.proj",
192
- "visual.blocks.27.mlp.gate_proj",
193
- "visual.blocks.27.mlp.up_proj",
194
- "visual.blocks.27.mlp.down_proj",
195
- "visual.blocks.28.attn.qkv",
196
- "visual.blocks.28.attn.proj",
197
- "visual.blocks.28.mlp.gate_proj",
198
- "visual.blocks.28.mlp.up_proj",
199
- "visual.blocks.28.mlp.down_proj",
200
- "visual.blocks.29.attn.qkv",
201
- "visual.blocks.29.attn.proj",
202
- "visual.blocks.29.mlp.gate_proj",
203
- "visual.blocks.29.mlp.up_proj",
204
- "visual.blocks.29.mlp.down_proj",
205
- "visual.blocks.30.attn.qkv",
206
- "visual.blocks.30.attn.proj",
207
- "visual.blocks.30.mlp.gate_proj",
208
- "visual.blocks.30.mlp.up_proj",
209
- "visual.blocks.30.mlp.down_proj",
210
- "visual.blocks.31.attn.qkv",
211
- "visual.blocks.31.attn.proj",
212
- "visual.blocks.31.mlp.gate_proj",
213
- "visual.blocks.31.mlp.up_proj",
214
- "visual.blocks.31.mlp.down_proj",
215
- "visual.merger.mlp.0",
216
- "visual.merger.mlp.2",
217
  "lm_head"
218
  ],
219
  "kv_cache_scheme": null,
 
52
  "format": "float-quantized",
53
  "global_compression_ratio": null,
54
  "ignore": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "lm_head"
56
  ],
57
  "kv_cache_scheme": null,
model-00001-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6b117abae89ab9be3d722f163819124dd20fd83ca9ff26f4f798fdaaddc3492
3
- size 4948897768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3531038d3ab1495875f5f036edcc59016fa42bb56e65f832c43ee793b5155372
3
+ size 4892355856
model-00002-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55f5407681a8df33db666b4fb7fc6ca626870cf56aa2197019c7b133ed646ce9
3
- size 4877702120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4487f44ec59f9008edbac2871f4f4daeb85f81f9eb6a4860343b96523bb78e4c
3
+ size 4877702136
model-00003-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66c6713d607f9103288abc2243b1120579080aa85ffdbf2555b1147408191ee6
3
- size 4877702248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726d4573ba37e78117f99ec61b23787b20df8d8ed4dd498cbd030879749ce35b
3
+ size 4877702224
model-00004-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b08fa688c3e7d49c3ac13a7d5bfccd50a2d5349491ee732fb8078a9a7c3a54b8
3
- size 4877702248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bebc333febad4b94a9a358735ef62143e6f013245dcb792f50736a20a300e6d
3
+ size 4877702224
model-00005-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbafcbb791038b1150102ac042976763a3cf0edb936f5e1708e098531081035c
3
- size 4877702248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79dfd105f75b72744321a18e373052828983663ca3a04e9b8ce37793af66b14
3
+ size 4877702224
model-00006-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4dbd7027154098bedcbe006df1e37aa0fdf1a8709df7fac54b8906ac422a3fb
3
- size 4877702248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026fa97c113a3dd9479517f6873938f33c7055dbeb8f9c48fc0547f0f4c3e4ac
3
+ size 4877702224
model-00007-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00dedc2e879f8a3ae6490fb2797d651227325f43df48aba17be92ea8d5f857e4
3
- size 4814757840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d68e4c7628a12d706b467220083cc8b51d5fb1c5aaec855d8eec9c76bbb0e711
3
+ size 4185374296
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 35709121024
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00008-of-00008.safetensors",
@@ -119,14 +119,14 @@
119
  "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00008.safetensors",
120
  "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
121
  "model.layers.13.self_attn.v_proj.weight_scale": "model-00002-of-00008.safetensors",
122
- "model.layers.14.input_layernorm.weight": "model-00003-of-00008.safetensors",
123
- "model.layers.14.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
124
- "model.layers.14.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
125
- "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
126
- "model.layers.14.mlp.gate_proj.weight_scale": "model-00003-of-00008.safetensors",
127
- "model.layers.14.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
128
- "model.layers.14.mlp.up_proj.weight_scale": "model-00003-of-00008.safetensors",
129
- "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
130
  "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00008.safetensors",
131
  "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
132
  "model.layers.14.self_attn.k_proj.weight_scale": "model-00002-of-00008.safetensors",
@@ -141,22 +141,22 @@
141
  "model.layers.15.input_layernorm.weight": "model-00003-of-00008.safetensors",
142
  "model.layers.15.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
143
  "model.layers.15.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
144
- "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
145
- "model.layers.15.mlp.gate_proj.weight_scale": "model-00003-of-00008.safetensors",
146
  "model.layers.15.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
147
  "model.layers.15.mlp.up_proj.weight_scale": "model-00003-of-00008.safetensors",
148
  "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
149
- "model.layers.15.self_attn.k_proj.bias": "model-00003-of-00008.safetensors",
150
- "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
151
- "model.layers.15.self_attn.k_proj.weight_scale": "model-00003-of-00008.safetensors",
152
- "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
153
- "model.layers.15.self_attn.o_proj.weight_scale": "model-00003-of-00008.safetensors",
154
- "model.layers.15.self_attn.q_proj.bias": "model-00003-of-00008.safetensors",
155
- "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
156
- "model.layers.15.self_attn.q_proj.weight_scale": "model-00003-of-00008.safetensors",
157
- "model.layers.15.self_attn.v_proj.bias": "model-00003-of-00008.safetensors",
158
- "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
159
- "model.layers.15.self_attn.v_proj.weight_scale": "model-00003-of-00008.safetensors",
160
  "model.layers.16.input_layernorm.weight": "model-00003-of-00008.safetensors",
161
  "model.layers.16.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
162
  "model.layers.16.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
@@ -328,14 +328,14 @@
328
  "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00008.safetensors",
329
  "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
330
  "model.layers.23.self_attn.v_proj.weight_scale": "model-00003-of-00008.safetensors",
331
- "model.layers.24.input_layernorm.weight": "model-00004-of-00008.safetensors",
332
- "model.layers.24.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
333
- "model.layers.24.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
334
- "model.layers.24.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
335
- "model.layers.24.mlp.gate_proj.weight_scale": "model-00004-of-00008.safetensors",
336
- "model.layers.24.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
337
- "model.layers.24.mlp.up_proj.weight_scale": "model-00004-of-00008.safetensors",
338
- "model.layers.24.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
339
  "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00008.safetensors",
340
  "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
341
  "model.layers.24.self_attn.k_proj.weight_scale": "model-00003-of-00008.safetensors",
@@ -350,22 +350,22 @@
350
  "model.layers.25.input_layernorm.weight": "model-00004-of-00008.safetensors",
351
  "model.layers.25.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
352
  "model.layers.25.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
353
- "model.layers.25.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
354
- "model.layers.25.mlp.gate_proj.weight_scale": "model-00004-of-00008.safetensors",
355
  "model.layers.25.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
356
  "model.layers.25.mlp.up_proj.weight_scale": "model-00004-of-00008.safetensors",
357
  "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
358
- "model.layers.25.self_attn.k_proj.bias": "model-00004-of-00008.safetensors",
359
- "model.layers.25.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
360
- "model.layers.25.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
361
- "model.layers.25.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
362
- "model.layers.25.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
363
- "model.layers.25.self_attn.q_proj.bias": "model-00004-of-00008.safetensors",
364
- "model.layers.25.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
365
- "model.layers.25.self_attn.q_proj.weight_scale": "model-00004-of-00008.safetensors",
366
- "model.layers.25.self_attn.v_proj.bias": "model-00004-of-00008.safetensors",
367
- "model.layers.25.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
368
- "model.layers.25.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
369
  "model.layers.26.input_layernorm.weight": "model-00004-of-00008.safetensors",
370
  "model.layers.26.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
371
  "model.layers.26.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
@@ -537,14 +537,14 @@
537
  "model.layers.33.self_attn.v_proj.bias": "model-00004-of-00008.safetensors",
538
  "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
539
  "model.layers.33.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
540
- "model.layers.34.input_layernorm.weight": "model-00005-of-00008.safetensors",
541
- "model.layers.34.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
542
- "model.layers.34.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
543
- "model.layers.34.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
544
- "model.layers.34.mlp.gate_proj.weight_scale": "model-00005-of-00008.safetensors",
545
- "model.layers.34.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
546
- "model.layers.34.mlp.up_proj.weight_scale": "model-00005-of-00008.safetensors",
547
- "model.layers.34.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
548
  "model.layers.34.self_attn.k_proj.bias": "model-00004-of-00008.safetensors",
549
  "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
550
  "model.layers.34.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
@@ -559,22 +559,22 @@
559
  "model.layers.35.input_layernorm.weight": "model-00005-of-00008.safetensors",
560
  "model.layers.35.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
561
  "model.layers.35.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
562
- "model.layers.35.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
563
- "model.layers.35.mlp.gate_proj.weight_scale": "model-00005-of-00008.safetensors",
564
  "model.layers.35.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
565
  "model.layers.35.mlp.up_proj.weight_scale": "model-00005-of-00008.safetensors",
566
  "model.layers.35.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
567
- "model.layers.35.self_attn.k_proj.bias": "model-00005-of-00008.safetensors",
568
- "model.layers.35.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
569
- "model.layers.35.self_attn.k_proj.weight_scale": "model-00005-of-00008.safetensors",
570
- "model.layers.35.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
571
- "model.layers.35.self_attn.o_proj.weight_scale": "model-00005-of-00008.safetensors",
572
- "model.layers.35.self_attn.q_proj.bias": "model-00005-of-00008.safetensors",
573
- "model.layers.35.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
574
- "model.layers.35.self_attn.q_proj.weight_scale": "model-00005-of-00008.safetensors",
575
- "model.layers.35.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
576
- "model.layers.35.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
577
- "model.layers.35.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
578
  "model.layers.36.input_layernorm.weight": "model-00005-of-00008.safetensors",
579
  "model.layers.36.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
580
  "model.layers.36.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
@@ -651,14 +651,14 @@
651
  "model.layers.39.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
652
  "model.layers.39.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
653
  "model.layers.39.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
654
- "model.layers.4.input_layernorm.weight": "model-00002-of-00008.safetensors",
655
- "model.layers.4.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
656
- "model.layers.4.mlp.down_proj.weight_scale": "model-00002-of-00008.safetensors",
657
- "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
658
- "model.layers.4.mlp.gate_proj.weight_scale": "model-00002-of-00008.safetensors",
659
- "model.layers.4.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
660
- "model.layers.4.mlp.up_proj.weight_scale": "model-00002-of-00008.safetensors",
661
- "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
662
  "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00008.safetensors",
663
  "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
664
  "model.layers.4.self_attn.k_proj.weight_scale": "model-00001-of-00008.safetensors",
@@ -746,14 +746,14 @@
746
  "model.layers.43.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
747
  "model.layers.43.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
748
  "model.layers.43.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
749
- "model.layers.44.input_layernorm.weight": "model-00006-of-00008.safetensors",
750
- "model.layers.44.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
751
- "model.layers.44.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
752
- "model.layers.44.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
753
- "model.layers.44.mlp.gate_proj.weight_scale": "model-00006-of-00008.safetensors",
754
- "model.layers.44.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
755
- "model.layers.44.mlp.up_proj.weight_scale": "model-00006-of-00008.safetensors",
756
- "model.layers.44.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
757
  "model.layers.44.self_attn.k_proj.bias": "model-00005-of-00008.safetensors",
758
  "model.layers.44.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
759
  "model.layers.44.self_attn.k_proj.weight_scale": "model-00005-of-00008.safetensors",
@@ -768,22 +768,22 @@
768
  "model.layers.45.input_layernorm.weight": "model-00006-of-00008.safetensors",
769
  "model.layers.45.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
770
  "model.layers.45.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
771
- "model.layers.45.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
772
- "model.layers.45.mlp.gate_proj.weight_scale": "model-00006-of-00008.safetensors",
773
  "model.layers.45.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
774
  "model.layers.45.mlp.up_proj.weight_scale": "model-00006-of-00008.safetensors",
775
  "model.layers.45.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
776
- "model.layers.45.self_attn.k_proj.bias": "model-00006-of-00008.safetensors",
777
- "model.layers.45.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
778
- "model.layers.45.self_attn.k_proj.weight_scale": "model-00006-of-00008.safetensors",
779
- "model.layers.45.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
780
- "model.layers.45.self_attn.o_proj.weight_scale": "model-00006-of-00008.safetensors",
781
- "model.layers.45.self_attn.q_proj.bias": "model-00006-of-00008.safetensors",
782
- "model.layers.45.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
783
- "model.layers.45.self_attn.q_proj.weight_scale": "model-00006-of-00008.safetensors",
784
- "model.layers.45.self_attn.v_proj.bias": "model-00006-of-00008.safetensors",
785
- "model.layers.45.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
786
- "model.layers.45.self_attn.v_proj.weight_scale": "model-00006-of-00008.safetensors",
787
  "model.layers.46.input_layernorm.weight": "model-00006-of-00008.safetensors",
788
  "model.layers.46.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
789
  "model.layers.46.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
@@ -863,22 +863,22 @@
863
  "model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
864
  "model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
865
  "model.layers.5.mlp.down_proj.weight_scale": "model-00002-of-00008.safetensors",
866
- "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
867
- "model.layers.5.mlp.gate_proj.weight_scale": "model-00002-of-00008.safetensors",
868
  "model.layers.5.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
869
  "model.layers.5.mlp.up_proj.weight_scale": "model-00002-of-00008.safetensors",
870
  "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
871
- "model.layers.5.self_attn.k_proj.bias": "model-00002-of-00008.safetensors",
872
- "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
873
- "model.layers.5.self_attn.k_proj.weight_scale": "model-00002-of-00008.safetensors",
874
- "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
875
- "model.layers.5.self_attn.o_proj.weight_scale": "model-00002-of-00008.safetensors",
876
- "model.layers.5.self_attn.q_proj.bias": "model-00002-of-00008.safetensors",
877
- "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
878
- "model.layers.5.self_attn.q_proj.weight_scale": "model-00002-of-00008.safetensors",
879
- "model.layers.5.self_attn.v_proj.bias": "model-00002-of-00008.safetensors",
880
- "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
881
- "model.layers.5.self_attn.v_proj.weight_scale": "model-00002-of-00008.safetensors",
882
  "model.layers.50.input_layernorm.weight": "model-00006-of-00008.safetensors",
883
  "model.layers.50.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
884
  "model.layers.50.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
@@ -955,14 +955,14 @@
955
  "model.layers.53.self_attn.v_proj.bias": "model-00006-of-00008.safetensors",
956
  "model.layers.53.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
957
  "model.layers.53.self_attn.v_proj.weight_scale": "model-00006-of-00008.safetensors",
958
- "model.layers.54.input_layernorm.weight": "model-00007-of-00008.safetensors",
959
- "model.layers.54.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
960
- "model.layers.54.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
961
- "model.layers.54.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
962
- "model.layers.54.mlp.gate_proj.weight_scale": "model-00007-of-00008.safetensors",
963
- "model.layers.54.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
964
- "model.layers.54.mlp.up_proj.weight_scale": "model-00007-of-00008.safetensors",
965
- "model.layers.54.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
966
  "model.layers.54.self_attn.k_proj.bias": "model-00006-of-00008.safetensors",
967
  "model.layers.54.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
968
  "model.layers.54.self_attn.k_proj.weight_scale": "model-00006-of-00008.safetensors",
@@ -977,22 +977,22 @@
977
  "model.layers.55.input_layernorm.weight": "model-00007-of-00008.safetensors",
978
  "model.layers.55.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
979
  "model.layers.55.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
980
- "model.layers.55.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
981
- "model.layers.55.mlp.gate_proj.weight_scale": "model-00007-of-00008.safetensors",
982
  "model.layers.55.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
983
  "model.layers.55.mlp.up_proj.weight_scale": "model-00007-of-00008.safetensors",
984
  "model.layers.55.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
985
- "model.layers.55.self_attn.k_proj.bias": "model-00007-of-00008.safetensors",
986
- "model.layers.55.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
987
- "model.layers.55.self_attn.k_proj.weight_scale": "model-00007-of-00008.safetensors",
988
- "model.layers.55.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
989
- "model.layers.55.self_attn.o_proj.weight_scale": "model-00007-of-00008.safetensors",
990
- "model.layers.55.self_attn.q_proj.bias": "model-00007-of-00008.safetensors",
991
- "model.layers.55.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
992
- "model.layers.55.self_attn.q_proj.weight_scale": "model-00007-of-00008.safetensors",
993
- "model.layers.55.self_attn.v_proj.bias": "model-00007-of-00008.safetensors",
994
- "model.layers.55.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
995
- "model.layers.55.self_attn.v_proj.weight_scale": "model-00007-of-00008.safetensors",
996
  "model.layers.56.input_layernorm.weight": "model-00007-of-00008.safetensors",
997
  "model.layers.56.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
998
  "model.layers.56.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
@@ -1224,393 +1224,555 @@
1224
  "model.norm.weight": "model-00007-of-00008.safetensors",
1225
  "visual.blocks.0.attn.proj.bias": "model-00001-of-00008.safetensors",
1226
  "visual.blocks.0.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1227
  "visual.blocks.0.attn.qkv.bias": "model-00001-of-00008.safetensors",
1228
  "visual.blocks.0.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1229
  "visual.blocks.0.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1230
  "visual.blocks.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1231
  "visual.blocks.0.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1232
  "visual.blocks.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1233
  "visual.blocks.0.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1234
  "visual.blocks.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1235
  "visual.blocks.0.norm1.weight": "model-00001-of-00008.safetensors",
1236
  "visual.blocks.0.norm2.weight": "model-00001-of-00008.safetensors",
1237
  "visual.blocks.1.attn.proj.bias": "model-00001-of-00008.safetensors",
1238
  "visual.blocks.1.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1239
  "visual.blocks.1.attn.qkv.bias": "model-00001-of-00008.safetensors",
1240
  "visual.blocks.1.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1241
  "visual.blocks.1.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1242
  "visual.blocks.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1243
  "visual.blocks.1.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1244
  "visual.blocks.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1245
  "visual.blocks.1.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1246
  "visual.blocks.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1247
  "visual.blocks.1.norm1.weight": "model-00001-of-00008.safetensors",
1248
  "visual.blocks.1.norm2.weight": "model-00001-of-00008.safetensors",
1249
  "visual.blocks.10.attn.proj.bias": "model-00001-of-00008.safetensors",
1250
  "visual.blocks.10.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1251
  "visual.blocks.10.attn.qkv.bias": "model-00001-of-00008.safetensors",
1252
  "visual.blocks.10.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1253
  "visual.blocks.10.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1254
  "visual.blocks.10.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1255
  "visual.blocks.10.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1256
  "visual.blocks.10.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1257
  "visual.blocks.10.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1258
  "visual.blocks.10.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1259
  "visual.blocks.10.norm1.weight": "model-00001-of-00008.safetensors",
1260
  "visual.blocks.10.norm2.weight": "model-00001-of-00008.safetensors",
1261
  "visual.blocks.11.attn.proj.bias": "model-00001-of-00008.safetensors",
1262
  "visual.blocks.11.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1263
  "visual.blocks.11.attn.qkv.bias": "model-00001-of-00008.safetensors",
1264
  "visual.blocks.11.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1265
  "visual.blocks.11.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1266
  "visual.blocks.11.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1267
  "visual.blocks.11.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1268
  "visual.blocks.11.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1269
  "visual.blocks.11.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1270
  "visual.blocks.11.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1271
  "visual.blocks.11.norm1.weight": "model-00001-of-00008.safetensors",
1272
  "visual.blocks.11.norm2.weight": "model-00001-of-00008.safetensors",
1273
  "visual.blocks.12.attn.proj.bias": "model-00001-of-00008.safetensors",
1274
  "visual.blocks.12.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1275
  "visual.blocks.12.attn.qkv.bias": "model-00001-of-00008.safetensors",
1276
  "visual.blocks.12.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1277
  "visual.blocks.12.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1278
  "visual.blocks.12.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1279
  "visual.blocks.12.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1280
  "visual.blocks.12.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1281
  "visual.blocks.12.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1282
  "visual.blocks.12.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1283
  "visual.blocks.12.norm1.weight": "model-00001-of-00008.safetensors",
1284
  "visual.blocks.12.norm2.weight": "model-00001-of-00008.safetensors",
1285
  "visual.blocks.13.attn.proj.bias": "model-00001-of-00008.safetensors",
1286
  "visual.blocks.13.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1287
  "visual.blocks.13.attn.qkv.bias": "model-00001-of-00008.safetensors",
1288
  "visual.blocks.13.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1289
  "visual.blocks.13.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1290
  "visual.blocks.13.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1291
  "visual.blocks.13.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1292
  "visual.blocks.13.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1293
  "visual.blocks.13.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1294
  "visual.blocks.13.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1295
  "visual.blocks.13.norm1.weight": "model-00001-of-00008.safetensors",
1296
  "visual.blocks.13.norm2.weight": "model-00001-of-00008.safetensors",
1297
  "visual.blocks.14.attn.proj.bias": "model-00001-of-00008.safetensors",
1298
  "visual.blocks.14.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1299
  "visual.blocks.14.attn.qkv.bias": "model-00001-of-00008.safetensors",
1300
  "visual.blocks.14.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1301
  "visual.blocks.14.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1302
  "visual.blocks.14.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1303
  "visual.blocks.14.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1304
  "visual.blocks.14.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1305
  "visual.blocks.14.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1306
  "visual.blocks.14.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1307
  "visual.blocks.14.norm1.weight": "model-00001-of-00008.safetensors",
1308
  "visual.blocks.14.norm2.weight": "model-00001-of-00008.safetensors",
1309
  "visual.blocks.15.attn.proj.bias": "model-00001-of-00008.safetensors",
1310
  "visual.blocks.15.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1311
  "visual.blocks.15.attn.qkv.bias": "model-00001-of-00008.safetensors",
1312
  "visual.blocks.15.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1313
  "visual.blocks.15.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1314
  "visual.blocks.15.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1315
  "visual.blocks.15.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1316
  "visual.blocks.15.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1317
  "visual.blocks.15.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1318
  "visual.blocks.15.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1319
  "visual.blocks.15.norm1.weight": "model-00001-of-00008.safetensors",
1320
  "visual.blocks.15.norm2.weight": "model-00001-of-00008.safetensors",
1321
  "visual.blocks.16.attn.proj.bias": "model-00001-of-00008.safetensors",
1322
  "visual.blocks.16.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1323
  "visual.blocks.16.attn.qkv.bias": "model-00001-of-00008.safetensors",
1324
  "visual.blocks.16.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1325
  "visual.blocks.16.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1326
  "visual.blocks.16.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1327
  "visual.blocks.16.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1328
  "visual.blocks.16.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1329
  "visual.blocks.16.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1330
  "visual.blocks.16.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1331
  "visual.blocks.16.norm1.weight": "model-00001-of-00008.safetensors",
1332
  "visual.blocks.16.norm2.weight": "model-00001-of-00008.safetensors",
1333
  "visual.blocks.17.attn.proj.bias": "model-00001-of-00008.safetensors",
1334
  "visual.blocks.17.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1335
  "visual.blocks.17.attn.qkv.bias": "model-00001-of-00008.safetensors",
1336
  "visual.blocks.17.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1337
  "visual.blocks.17.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1338
  "visual.blocks.17.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1339
  "visual.blocks.17.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1340
  "visual.blocks.17.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1341
  "visual.blocks.17.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1342
  "visual.blocks.17.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1343
  "visual.blocks.17.norm1.weight": "model-00001-of-00008.safetensors",
1344
  "visual.blocks.17.norm2.weight": "model-00001-of-00008.safetensors",
1345
  "visual.blocks.18.attn.proj.bias": "model-00001-of-00008.safetensors",
1346
  "visual.blocks.18.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1347
  "visual.blocks.18.attn.qkv.bias": "model-00001-of-00008.safetensors",
1348
  "visual.blocks.18.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1349
  "visual.blocks.18.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1350
  "visual.blocks.18.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1351
  "visual.blocks.18.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1352
  "visual.blocks.18.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1353
  "visual.blocks.18.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1354
  "visual.blocks.18.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1355
  "visual.blocks.18.norm1.weight": "model-00001-of-00008.safetensors",
1356
  "visual.blocks.18.norm2.weight": "model-00001-of-00008.safetensors",
1357
  "visual.blocks.19.attn.proj.bias": "model-00001-of-00008.safetensors",
1358
  "visual.blocks.19.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1359
  "visual.blocks.19.attn.qkv.bias": "model-00001-of-00008.safetensors",
1360
  "visual.blocks.19.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1361
  "visual.blocks.19.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1362
  "visual.blocks.19.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1363
  "visual.blocks.19.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1364
  "visual.blocks.19.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1365
  "visual.blocks.19.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1366
  "visual.blocks.19.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1367
  "visual.blocks.19.norm1.weight": "model-00001-of-00008.safetensors",
1368
  "visual.blocks.19.norm2.weight": "model-00001-of-00008.safetensors",
1369
  "visual.blocks.2.attn.proj.bias": "model-00001-of-00008.safetensors",
1370
  "visual.blocks.2.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1371
  "visual.blocks.2.attn.qkv.bias": "model-00001-of-00008.safetensors",
1372
  "visual.blocks.2.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1373
  "visual.blocks.2.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1374
  "visual.blocks.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1375
  "visual.blocks.2.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1376
  "visual.blocks.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1377
  "visual.blocks.2.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1378
  "visual.blocks.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1379
  "visual.blocks.2.norm1.weight": "model-00001-of-00008.safetensors",
1380
  "visual.blocks.2.norm2.weight": "model-00001-of-00008.safetensors",
1381
  "visual.blocks.20.attn.proj.bias": "model-00001-of-00008.safetensors",
1382
  "visual.blocks.20.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1383
  "visual.blocks.20.attn.qkv.bias": "model-00001-of-00008.safetensors",
1384
  "visual.blocks.20.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1385
  "visual.blocks.20.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1386
  "visual.blocks.20.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1387
  "visual.blocks.20.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1388
  "visual.blocks.20.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1389
  "visual.blocks.20.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1390
  "visual.blocks.20.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1391
  "visual.blocks.20.norm1.weight": "model-00001-of-00008.safetensors",
1392
  "visual.blocks.20.norm2.weight": "model-00001-of-00008.safetensors",
1393
  "visual.blocks.21.attn.proj.bias": "model-00001-of-00008.safetensors",
1394
  "visual.blocks.21.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1395
  "visual.blocks.21.attn.qkv.bias": "model-00001-of-00008.safetensors",
1396
  "visual.blocks.21.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1397
  "visual.blocks.21.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1398
  "visual.blocks.21.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1399
  "visual.blocks.21.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1400
  "visual.blocks.21.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1401
  "visual.blocks.21.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1402
  "visual.blocks.21.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1403
  "visual.blocks.21.norm1.weight": "model-00001-of-00008.safetensors",
1404
  "visual.blocks.21.norm2.weight": "model-00001-of-00008.safetensors",
1405
  "visual.blocks.22.attn.proj.bias": "model-00001-of-00008.safetensors",
1406
  "visual.blocks.22.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1407
  "visual.blocks.22.attn.qkv.bias": "model-00001-of-00008.safetensors",
1408
  "visual.blocks.22.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1409
  "visual.blocks.22.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1410
  "visual.blocks.22.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1411
  "visual.blocks.22.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1412
  "visual.blocks.22.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1413
  "visual.blocks.22.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1414
  "visual.blocks.22.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1415
  "visual.blocks.22.norm1.weight": "model-00001-of-00008.safetensors",
1416
  "visual.blocks.22.norm2.weight": "model-00001-of-00008.safetensors",
1417
  "visual.blocks.23.attn.proj.bias": "model-00001-of-00008.safetensors",
1418
  "visual.blocks.23.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1419
  "visual.blocks.23.attn.qkv.bias": "model-00001-of-00008.safetensors",
1420
  "visual.blocks.23.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1421
  "visual.blocks.23.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1422
  "visual.blocks.23.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1423
  "visual.blocks.23.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1424
  "visual.blocks.23.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1425
  "visual.blocks.23.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1426
  "visual.blocks.23.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1427
  "visual.blocks.23.norm1.weight": "model-00001-of-00008.safetensors",
1428
  "visual.blocks.23.norm2.weight": "model-00001-of-00008.safetensors",
1429
  "visual.blocks.24.attn.proj.bias": "model-00001-of-00008.safetensors",
1430
  "visual.blocks.24.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1431
  "visual.blocks.24.attn.qkv.bias": "model-00001-of-00008.safetensors",
1432
  "visual.blocks.24.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1433
  "visual.blocks.24.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1434
  "visual.blocks.24.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1435
  "visual.blocks.24.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1436
  "visual.blocks.24.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1437
  "visual.blocks.24.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1438
  "visual.blocks.24.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1439
  "visual.blocks.24.norm1.weight": "model-00001-of-00008.safetensors",
1440
  "visual.blocks.24.norm2.weight": "model-00001-of-00008.safetensors",
1441
  "visual.blocks.25.attn.proj.bias": "model-00001-of-00008.safetensors",
1442
  "visual.blocks.25.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1443
  "visual.blocks.25.attn.qkv.bias": "model-00001-of-00008.safetensors",
1444
  "visual.blocks.25.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1445
  "visual.blocks.25.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1446
  "visual.blocks.25.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1447
  "visual.blocks.25.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1448
  "visual.blocks.25.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1449
  "visual.blocks.25.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1450
  "visual.blocks.25.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1451
  "visual.blocks.25.norm1.weight": "model-00001-of-00008.safetensors",
1452
  "visual.blocks.25.norm2.weight": "model-00001-of-00008.safetensors",
1453
  "visual.blocks.26.attn.proj.bias": "model-00001-of-00008.safetensors",
1454
  "visual.blocks.26.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1455
  "visual.blocks.26.attn.qkv.bias": "model-00001-of-00008.safetensors",
1456
  "visual.blocks.26.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1457
  "visual.blocks.26.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1458
  "visual.blocks.26.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1459
  "visual.blocks.26.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1460
  "visual.blocks.26.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1461
  "visual.blocks.26.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1462
  "visual.blocks.26.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1463
  "visual.blocks.26.norm1.weight": "model-00001-of-00008.safetensors",
1464
  "visual.blocks.26.norm2.weight": "model-00001-of-00008.safetensors",
1465
  "visual.blocks.27.attn.proj.bias": "model-00001-of-00008.safetensors",
1466
  "visual.blocks.27.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1467
  "visual.blocks.27.attn.qkv.bias": "model-00001-of-00008.safetensors",
1468
  "visual.blocks.27.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1469
  "visual.blocks.27.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1470
  "visual.blocks.27.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1471
  "visual.blocks.27.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1472
  "visual.blocks.27.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1473
  "visual.blocks.27.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1474
  "visual.blocks.27.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1475
  "visual.blocks.27.norm1.weight": "model-00001-of-00008.safetensors",
1476
  "visual.blocks.27.norm2.weight": "model-00001-of-00008.safetensors",
1477
  "visual.blocks.28.attn.proj.bias": "model-00001-of-00008.safetensors",
1478
  "visual.blocks.28.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1479
  "visual.blocks.28.attn.qkv.bias": "model-00001-of-00008.safetensors",
1480
  "visual.blocks.28.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1481
  "visual.blocks.28.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1482
  "visual.blocks.28.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1483
  "visual.blocks.28.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1484
  "visual.blocks.28.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1485
  "visual.blocks.28.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1486
  "visual.blocks.28.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1487
  "visual.blocks.28.norm1.weight": "model-00001-of-00008.safetensors",
1488
  "visual.blocks.28.norm2.weight": "model-00001-of-00008.safetensors",
1489
  "visual.blocks.29.attn.proj.bias": "model-00001-of-00008.safetensors",
1490
  "visual.blocks.29.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1491
  "visual.blocks.29.attn.qkv.bias": "model-00001-of-00008.safetensors",
1492
  "visual.blocks.29.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1493
  "visual.blocks.29.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1494
  "visual.blocks.29.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1495
  "visual.blocks.29.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1496
  "visual.blocks.29.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1497
  "visual.blocks.29.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1498
  "visual.blocks.29.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1499
  "visual.blocks.29.norm1.weight": "model-00001-of-00008.safetensors",
1500
  "visual.blocks.29.norm2.weight": "model-00001-of-00008.safetensors",
1501
  "visual.blocks.3.attn.proj.bias": "model-00001-of-00008.safetensors",
1502
  "visual.blocks.3.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1503
  "visual.blocks.3.attn.qkv.bias": "model-00001-of-00008.safetensors",
1504
  "visual.blocks.3.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1505
  "visual.blocks.3.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1506
  "visual.blocks.3.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1507
  "visual.blocks.3.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1508
  "visual.blocks.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1509
  "visual.blocks.3.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1510
  "visual.blocks.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1511
  "visual.blocks.3.norm1.weight": "model-00001-of-00008.safetensors",
1512
  "visual.blocks.3.norm2.weight": "model-00001-of-00008.safetensors",
1513
  "visual.blocks.30.attn.proj.bias": "model-00001-of-00008.safetensors",
1514
  "visual.blocks.30.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1515
  "visual.blocks.30.attn.qkv.bias": "model-00001-of-00008.safetensors",
1516
  "visual.blocks.30.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1517
  "visual.blocks.30.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1518
  "visual.blocks.30.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1519
  "visual.blocks.30.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1520
  "visual.blocks.30.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1521
  "visual.blocks.30.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1522
  "visual.blocks.30.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1523
  "visual.blocks.30.norm1.weight": "model-00001-of-00008.safetensors",
1524
  "visual.blocks.30.norm2.weight": "model-00001-of-00008.safetensors",
1525
  "visual.blocks.31.attn.proj.bias": "model-00001-of-00008.safetensors",
1526
  "visual.blocks.31.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1527
  "visual.blocks.31.attn.qkv.bias": "model-00001-of-00008.safetensors",
1528
  "visual.blocks.31.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1529
  "visual.blocks.31.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1530
  "visual.blocks.31.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1531
  "visual.blocks.31.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1532
  "visual.blocks.31.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1533
  "visual.blocks.31.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1534
  "visual.blocks.31.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1535
  "visual.blocks.31.norm1.weight": "model-00001-of-00008.safetensors",
1536
  "visual.blocks.31.norm2.weight": "model-00001-of-00008.safetensors",
1537
  "visual.blocks.4.attn.proj.bias": "model-00001-of-00008.safetensors",
1538
  "visual.blocks.4.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1539
  "visual.blocks.4.attn.qkv.bias": "model-00001-of-00008.safetensors",
1540
  "visual.blocks.4.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1541
  "visual.blocks.4.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1542
  "visual.blocks.4.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1543
  "visual.blocks.4.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1544
  "visual.blocks.4.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1545
  "visual.blocks.4.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1546
  "visual.blocks.4.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1547
  "visual.blocks.4.norm1.weight": "model-00001-of-00008.safetensors",
1548
  "visual.blocks.4.norm2.weight": "model-00001-of-00008.safetensors",
1549
  "visual.blocks.5.attn.proj.bias": "model-00001-of-00008.safetensors",
1550
  "visual.blocks.5.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1551
  "visual.blocks.5.attn.qkv.bias": "model-00001-of-00008.safetensors",
1552
  "visual.blocks.5.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1553
  "visual.blocks.5.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1554
  "visual.blocks.5.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1555
  "visual.blocks.5.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1556
  "visual.blocks.5.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1557
  "visual.blocks.5.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1558
  "visual.blocks.5.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1559
  "visual.blocks.5.norm1.weight": "model-00001-of-00008.safetensors",
1560
  "visual.blocks.5.norm2.weight": "model-00001-of-00008.safetensors",
1561
  "visual.blocks.6.attn.proj.bias": "model-00001-of-00008.safetensors",
1562
  "visual.blocks.6.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1563
  "visual.blocks.6.attn.qkv.bias": "model-00001-of-00008.safetensors",
1564
  "visual.blocks.6.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1565
  "visual.blocks.6.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1566
  "visual.blocks.6.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1567
  "visual.blocks.6.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1568
  "visual.blocks.6.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1569
  "visual.blocks.6.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1570
  "visual.blocks.6.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1571
  "visual.blocks.6.norm1.weight": "model-00001-of-00008.safetensors",
1572
  "visual.blocks.6.norm2.weight": "model-00001-of-00008.safetensors",
1573
  "visual.blocks.7.attn.proj.bias": "model-00001-of-00008.safetensors",
1574
  "visual.blocks.7.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1575
  "visual.blocks.7.attn.qkv.bias": "model-00001-of-00008.safetensors",
1576
  "visual.blocks.7.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1577
  "visual.blocks.7.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1578
  "visual.blocks.7.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1579
  "visual.blocks.7.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1580
  "visual.blocks.7.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1581
  "visual.blocks.7.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1582
  "visual.blocks.7.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1583
  "visual.blocks.7.norm1.weight": "model-00001-of-00008.safetensors",
1584
  "visual.blocks.7.norm2.weight": "model-00001-of-00008.safetensors",
1585
  "visual.blocks.8.attn.proj.bias": "model-00001-of-00008.safetensors",
1586
  "visual.blocks.8.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1587
  "visual.blocks.8.attn.qkv.bias": "model-00001-of-00008.safetensors",
1588
  "visual.blocks.8.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1589
  "visual.blocks.8.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1590
  "visual.blocks.8.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1591
  "visual.blocks.8.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1592
  "visual.blocks.8.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1593
  "visual.blocks.8.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1594
  "visual.blocks.8.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1595
  "visual.blocks.8.norm1.weight": "model-00001-of-00008.safetensors",
1596
  "visual.blocks.8.norm2.weight": "model-00001-of-00008.safetensors",
1597
  "visual.blocks.9.attn.proj.bias": "model-00001-of-00008.safetensors",
1598
  "visual.blocks.9.attn.proj.weight": "model-00001-of-00008.safetensors",
 
1599
  "visual.blocks.9.attn.qkv.bias": "model-00001-of-00008.safetensors",
1600
  "visual.blocks.9.attn.qkv.weight": "model-00001-of-00008.safetensors",
 
1601
  "visual.blocks.9.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1602
  "visual.blocks.9.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
 
1603
  "visual.blocks.9.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1604
  "visual.blocks.9.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
 
1605
  "visual.blocks.9.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1606
  "visual.blocks.9.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
 
1607
  "visual.blocks.9.norm1.weight": "model-00001-of-00008.safetensors",
1608
  "visual.blocks.9.norm2.weight": "model-00001-of-00008.safetensors",
1609
  "visual.merger.ln_q.weight": "model-00001-of-00008.safetensors",
1610
  "visual.merger.mlp.0.bias": "model-00001-of-00008.safetensors",
1611
  "visual.merger.mlp.0.weight": "model-00001-of-00008.safetensors",
 
1612
  "visual.merger.mlp.2.bias": "model-00001-of-00008.safetensors",
1613
  "visual.merger.mlp.2.weight": "model-00001-of-00008.safetensors",
 
1614
  "visual.patch_embed.proj.weight": "model-00001-of-00008.safetensors"
1615
  }
1616
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 35023176192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00008-of-00008.safetensors",
 
119
  "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00008.safetensors",
120
  "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
121
  "model.layers.13.self_attn.v_proj.weight_scale": "model-00002-of-00008.safetensors",
122
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00008.safetensors",
123
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
124
+ "model.layers.14.mlp.down_proj.weight_scale": "model-00002-of-00008.safetensors",
125
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
126
+ "model.layers.14.mlp.gate_proj.weight_scale": "model-00002-of-00008.safetensors",
127
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
128
+ "model.layers.14.mlp.up_proj.weight_scale": "model-00002-of-00008.safetensors",
129
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
130
  "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00008.safetensors",
131
  "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
132
  "model.layers.14.self_attn.k_proj.weight_scale": "model-00002-of-00008.safetensors",
 
141
  "model.layers.15.input_layernorm.weight": "model-00003-of-00008.safetensors",
142
  "model.layers.15.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
143
  "model.layers.15.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
144
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
145
+ "model.layers.15.mlp.gate_proj.weight_scale": "model-00002-of-00008.safetensors",
146
  "model.layers.15.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
147
  "model.layers.15.mlp.up_proj.weight_scale": "model-00003-of-00008.safetensors",
148
  "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
149
+ "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00008.safetensors",
150
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
151
+ "model.layers.15.self_attn.k_proj.weight_scale": "model-00002-of-00008.safetensors",
152
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
153
+ "model.layers.15.self_attn.o_proj.weight_scale": "model-00002-of-00008.safetensors",
154
+ "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00008.safetensors",
155
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
156
+ "model.layers.15.self_attn.q_proj.weight_scale": "model-00002-of-00008.safetensors",
157
+ "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00008.safetensors",
158
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
159
+ "model.layers.15.self_attn.v_proj.weight_scale": "model-00002-of-00008.safetensors",
160
  "model.layers.16.input_layernorm.weight": "model-00003-of-00008.safetensors",
161
  "model.layers.16.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
162
  "model.layers.16.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
 
328
  "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00008.safetensors",
329
  "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
330
  "model.layers.23.self_attn.v_proj.weight_scale": "model-00003-of-00008.safetensors",
331
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00008.safetensors",
332
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
333
+ "model.layers.24.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
334
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
335
+ "model.layers.24.mlp.gate_proj.weight_scale": "model-00003-of-00008.safetensors",
336
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
337
+ "model.layers.24.mlp.up_proj.weight_scale": "model-00003-of-00008.safetensors",
338
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
339
  "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00008.safetensors",
340
  "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
341
  "model.layers.24.self_attn.k_proj.weight_scale": "model-00003-of-00008.safetensors",
 
350
  "model.layers.25.input_layernorm.weight": "model-00004-of-00008.safetensors",
351
  "model.layers.25.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
352
  "model.layers.25.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
353
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
354
+ "model.layers.25.mlp.gate_proj.weight_scale": "model-00003-of-00008.safetensors",
355
  "model.layers.25.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
356
  "model.layers.25.mlp.up_proj.weight_scale": "model-00004-of-00008.safetensors",
357
  "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
358
+ "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00008.safetensors",
359
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
360
+ "model.layers.25.self_attn.k_proj.weight_scale": "model-00003-of-00008.safetensors",
361
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
362
+ "model.layers.25.self_attn.o_proj.weight_scale": "model-00003-of-00008.safetensors",
363
+ "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00008.safetensors",
364
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
365
+ "model.layers.25.self_attn.q_proj.weight_scale": "model-00003-of-00008.safetensors",
366
+ "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00008.safetensors",
367
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
368
+ "model.layers.25.self_attn.v_proj.weight_scale": "model-00003-of-00008.safetensors",
369
  "model.layers.26.input_layernorm.weight": "model-00004-of-00008.safetensors",
370
  "model.layers.26.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
371
  "model.layers.26.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
 
537
  "model.layers.33.self_attn.v_proj.bias": "model-00004-of-00008.safetensors",
538
  "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
539
  "model.layers.33.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
540
+ "model.layers.34.input_layernorm.weight": "model-00004-of-00008.safetensors",
541
+ "model.layers.34.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
542
+ "model.layers.34.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
543
+ "model.layers.34.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
544
+ "model.layers.34.mlp.gate_proj.weight_scale": "model-00004-of-00008.safetensors",
545
+ "model.layers.34.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
546
+ "model.layers.34.mlp.up_proj.weight_scale": "model-00004-of-00008.safetensors",
547
+ "model.layers.34.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
548
  "model.layers.34.self_attn.k_proj.bias": "model-00004-of-00008.safetensors",
549
  "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
550
  "model.layers.34.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
 
559
  "model.layers.35.input_layernorm.weight": "model-00005-of-00008.safetensors",
560
  "model.layers.35.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
561
  "model.layers.35.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
562
+ "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
563
+ "model.layers.35.mlp.gate_proj.weight_scale": "model-00004-of-00008.safetensors",
564
  "model.layers.35.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
565
  "model.layers.35.mlp.up_proj.weight_scale": "model-00005-of-00008.safetensors",
566
  "model.layers.35.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
567
+ "model.layers.35.self_attn.k_proj.bias": "model-00004-of-00008.safetensors",
568
+ "model.layers.35.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
569
+ "model.layers.35.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
570
+ "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
571
+ "model.layers.35.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
572
+ "model.layers.35.self_attn.q_proj.bias": "model-00004-of-00008.safetensors",
573
+ "model.layers.35.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
574
+ "model.layers.35.self_attn.q_proj.weight_scale": "model-00004-of-00008.safetensors",
575
+ "model.layers.35.self_attn.v_proj.bias": "model-00004-of-00008.safetensors",
576
+ "model.layers.35.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
577
+ "model.layers.35.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
578
  "model.layers.36.input_layernorm.weight": "model-00005-of-00008.safetensors",
579
  "model.layers.36.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
580
  "model.layers.36.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
 
651
  "model.layers.39.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
652
  "model.layers.39.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
653
  "model.layers.39.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
654
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00008.safetensors",
655
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
656
+ "model.layers.4.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
657
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
658
+ "model.layers.4.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
659
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
660
+ "model.layers.4.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
661
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
662
  "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00008.safetensors",
663
  "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
664
  "model.layers.4.self_attn.k_proj.weight_scale": "model-00001-of-00008.safetensors",
 
746
  "model.layers.43.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
747
  "model.layers.43.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
748
  "model.layers.43.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
749
+ "model.layers.44.input_layernorm.weight": "model-00005-of-00008.safetensors",
750
+ "model.layers.44.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
751
+ "model.layers.44.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
752
+ "model.layers.44.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
753
+ "model.layers.44.mlp.gate_proj.weight_scale": "model-00005-of-00008.safetensors",
754
+ "model.layers.44.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
755
+ "model.layers.44.mlp.up_proj.weight_scale": "model-00005-of-00008.safetensors",
756
+ "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
757
  "model.layers.44.self_attn.k_proj.bias": "model-00005-of-00008.safetensors",
758
  "model.layers.44.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
759
  "model.layers.44.self_attn.k_proj.weight_scale": "model-00005-of-00008.safetensors",
 
768
  "model.layers.45.input_layernorm.weight": "model-00006-of-00008.safetensors",
769
  "model.layers.45.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
770
  "model.layers.45.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
771
+ "model.layers.45.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
772
+ "model.layers.45.mlp.gate_proj.weight_scale": "model-00005-of-00008.safetensors",
773
  "model.layers.45.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
774
  "model.layers.45.mlp.up_proj.weight_scale": "model-00006-of-00008.safetensors",
775
  "model.layers.45.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
776
+ "model.layers.45.self_attn.k_proj.bias": "model-00005-of-00008.safetensors",
777
+ "model.layers.45.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
778
+ "model.layers.45.self_attn.k_proj.weight_scale": "model-00005-of-00008.safetensors",
779
+ "model.layers.45.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
780
+ "model.layers.45.self_attn.o_proj.weight_scale": "model-00005-of-00008.safetensors",
781
+ "model.layers.45.self_attn.q_proj.bias": "model-00005-of-00008.safetensors",
782
+ "model.layers.45.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
783
+ "model.layers.45.self_attn.q_proj.weight_scale": "model-00005-of-00008.safetensors",
784
+ "model.layers.45.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
785
+ "model.layers.45.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
786
+ "model.layers.45.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
787
  "model.layers.46.input_layernorm.weight": "model-00006-of-00008.safetensors",
788
  "model.layers.46.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
789
  "model.layers.46.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
 
863
  "model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
864
  "model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
865
  "model.layers.5.mlp.down_proj.weight_scale": "model-00002-of-00008.safetensors",
866
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
867
+ "model.layers.5.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
868
  "model.layers.5.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
869
  "model.layers.5.mlp.up_proj.weight_scale": "model-00002-of-00008.safetensors",
870
  "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
871
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00008.safetensors",
872
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
873
+ "model.layers.5.self_attn.k_proj.weight_scale": "model-00001-of-00008.safetensors",
874
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
875
+ "model.layers.5.self_attn.o_proj.weight_scale": "model-00001-of-00008.safetensors",
876
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00008.safetensors",
877
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
878
+ "model.layers.5.self_attn.q_proj.weight_scale": "model-00001-of-00008.safetensors",
879
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00008.safetensors",
880
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
881
+ "model.layers.5.self_attn.v_proj.weight_scale": "model-00001-of-00008.safetensors",
882
  "model.layers.50.input_layernorm.weight": "model-00006-of-00008.safetensors",
883
  "model.layers.50.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
884
  "model.layers.50.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
 
955
  "model.layers.53.self_attn.v_proj.bias": "model-00006-of-00008.safetensors",
956
  "model.layers.53.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
957
  "model.layers.53.self_attn.v_proj.weight_scale": "model-00006-of-00008.safetensors",
958
+ "model.layers.54.input_layernorm.weight": "model-00006-of-00008.safetensors",
959
+ "model.layers.54.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
960
+ "model.layers.54.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
961
+ "model.layers.54.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
962
+ "model.layers.54.mlp.gate_proj.weight_scale": "model-00006-of-00008.safetensors",
963
+ "model.layers.54.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
964
+ "model.layers.54.mlp.up_proj.weight_scale": "model-00006-of-00008.safetensors",
965
+ "model.layers.54.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
966
  "model.layers.54.self_attn.k_proj.bias": "model-00006-of-00008.safetensors",
967
  "model.layers.54.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
968
  "model.layers.54.self_attn.k_proj.weight_scale": "model-00006-of-00008.safetensors",
 
977
  "model.layers.55.input_layernorm.weight": "model-00007-of-00008.safetensors",
978
  "model.layers.55.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
979
  "model.layers.55.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
980
+ "model.layers.55.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
981
+ "model.layers.55.mlp.gate_proj.weight_scale": "model-00006-of-00008.safetensors",
982
  "model.layers.55.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
983
  "model.layers.55.mlp.up_proj.weight_scale": "model-00007-of-00008.safetensors",
984
  "model.layers.55.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
985
+ "model.layers.55.self_attn.k_proj.bias": "model-00006-of-00008.safetensors",
986
+ "model.layers.55.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
987
+ "model.layers.55.self_attn.k_proj.weight_scale": "model-00006-of-00008.safetensors",
988
+ "model.layers.55.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
989
+ "model.layers.55.self_attn.o_proj.weight_scale": "model-00006-of-00008.safetensors",
990
+ "model.layers.55.self_attn.q_proj.bias": "model-00006-of-00008.safetensors",
991
+ "model.layers.55.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
992
+ "model.layers.55.self_attn.q_proj.weight_scale": "model-00006-of-00008.safetensors",
993
+ "model.layers.55.self_attn.v_proj.bias": "model-00006-of-00008.safetensors",
994
+ "model.layers.55.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
995
+ "model.layers.55.self_attn.v_proj.weight_scale": "model-00006-of-00008.safetensors",
996
  "model.layers.56.input_layernorm.weight": "model-00007-of-00008.safetensors",
997
  "model.layers.56.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
998
  "model.layers.56.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
 
1224
  "model.norm.weight": "model-00007-of-00008.safetensors",
1225
  "visual.blocks.0.attn.proj.bias": "model-00001-of-00008.safetensors",
1226
  "visual.blocks.0.attn.proj.weight": "model-00001-of-00008.safetensors",
1227
+ "visual.blocks.0.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1228
  "visual.blocks.0.attn.qkv.bias": "model-00001-of-00008.safetensors",
1229
  "visual.blocks.0.attn.qkv.weight": "model-00001-of-00008.safetensors",
1230
+ "visual.blocks.0.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1231
  "visual.blocks.0.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1232
  "visual.blocks.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1233
+ "visual.blocks.0.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1234
  "visual.blocks.0.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1235
  "visual.blocks.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1236
+ "visual.blocks.0.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1237
  "visual.blocks.0.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1238
  "visual.blocks.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1239
+ "visual.blocks.0.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1240
  "visual.blocks.0.norm1.weight": "model-00001-of-00008.safetensors",
1241
  "visual.blocks.0.norm2.weight": "model-00001-of-00008.safetensors",
1242
  "visual.blocks.1.attn.proj.bias": "model-00001-of-00008.safetensors",
1243
  "visual.blocks.1.attn.proj.weight": "model-00001-of-00008.safetensors",
1244
+ "visual.blocks.1.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1245
  "visual.blocks.1.attn.qkv.bias": "model-00001-of-00008.safetensors",
1246
  "visual.blocks.1.attn.qkv.weight": "model-00001-of-00008.safetensors",
1247
+ "visual.blocks.1.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1248
  "visual.blocks.1.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1249
  "visual.blocks.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1250
+ "visual.blocks.1.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1251
  "visual.blocks.1.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1252
  "visual.blocks.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1253
+ "visual.blocks.1.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1254
  "visual.blocks.1.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1255
  "visual.blocks.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1256
+ "visual.blocks.1.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1257
  "visual.blocks.1.norm1.weight": "model-00001-of-00008.safetensors",
1258
  "visual.blocks.1.norm2.weight": "model-00001-of-00008.safetensors",
1259
  "visual.blocks.10.attn.proj.bias": "model-00001-of-00008.safetensors",
1260
  "visual.blocks.10.attn.proj.weight": "model-00001-of-00008.safetensors",
1261
+ "visual.blocks.10.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1262
  "visual.blocks.10.attn.qkv.bias": "model-00001-of-00008.safetensors",
1263
  "visual.blocks.10.attn.qkv.weight": "model-00001-of-00008.safetensors",
1264
+ "visual.blocks.10.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1265
  "visual.blocks.10.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1266
  "visual.blocks.10.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1267
+ "visual.blocks.10.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1268
  "visual.blocks.10.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1269
  "visual.blocks.10.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1270
+ "visual.blocks.10.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1271
  "visual.blocks.10.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1272
  "visual.blocks.10.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1273
+ "visual.blocks.10.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1274
  "visual.blocks.10.norm1.weight": "model-00001-of-00008.safetensors",
1275
  "visual.blocks.10.norm2.weight": "model-00001-of-00008.safetensors",
1276
  "visual.blocks.11.attn.proj.bias": "model-00001-of-00008.safetensors",
1277
  "visual.blocks.11.attn.proj.weight": "model-00001-of-00008.safetensors",
1278
+ "visual.blocks.11.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1279
  "visual.blocks.11.attn.qkv.bias": "model-00001-of-00008.safetensors",
1280
  "visual.blocks.11.attn.qkv.weight": "model-00001-of-00008.safetensors",
1281
+ "visual.blocks.11.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1282
  "visual.blocks.11.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1283
  "visual.blocks.11.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1284
+ "visual.blocks.11.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1285
  "visual.blocks.11.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1286
  "visual.blocks.11.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1287
+ "visual.blocks.11.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1288
  "visual.blocks.11.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1289
  "visual.blocks.11.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1290
+ "visual.blocks.11.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1291
  "visual.blocks.11.norm1.weight": "model-00001-of-00008.safetensors",
1292
  "visual.blocks.11.norm2.weight": "model-00001-of-00008.safetensors",
1293
  "visual.blocks.12.attn.proj.bias": "model-00001-of-00008.safetensors",
1294
  "visual.blocks.12.attn.proj.weight": "model-00001-of-00008.safetensors",
1295
+ "visual.blocks.12.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1296
  "visual.blocks.12.attn.qkv.bias": "model-00001-of-00008.safetensors",
1297
  "visual.blocks.12.attn.qkv.weight": "model-00001-of-00008.safetensors",
1298
+ "visual.blocks.12.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1299
  "visual.blocks.12.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1300
  "visual.blocks.12.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1301
+ "visual.blocks.12.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1302
  "visual.blocks.12.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1303
  "visual.blocks.12.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1304
+ "visual.blocks.12.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1305
  "visual.blocks.12.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1306
  "visual.blocks.12.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1307
+ "visual.blocks.12.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1308
  "visual.blocks.12.norm1.weight": "model-00001-of-00008.safetensors",
1309
  "visual.blocks.12.norm2.weight": "model-00001-of-00008.safetensors",
1310
  "visual.blocks.13.attn.proj.bias": "model-00001-of-00008.safetensors",
1311
  "visual.blocks.13.attn.proj.weight": "model-00001-of-00008.safetensors",
1312
+ "visual.blocks.13.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1313
  "visual.blocks.13.attn.qkv.bias": "model-00001-of-00008.safetensors",
1314
  "visual.blocks.13.attn.qkv.weight": "model-00001-of-00008.safetensors",
1315
+ "visual.blocks.13.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1316
  "visual.blocks.13.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1317
  "visual.blocks.13.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1318
+ "visual.blocks.13.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1319
  "visual.blocks.13.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1320
  "visual.blocks.13.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1321
+ "visual.blocks.13.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1322
  "visual.blocks.13.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1323
  "visual.blocks.13.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1324
+ "visual.blocks.13.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1325
  "visual.blocks.13.norm1.weight": "model-00001-of-00008.safetensors",
1326
  "visual.blocks.13.norm2.weight": "model-00001-of-00008.safetensors",
1327
  "visual.blocks.14.attn.proj.bias": "model-00001-of-00008.safetensors",
1328
  "visual.blocks.14.attn.proj.weight": "model-00001-of-00008.safetensors",
1329
+ "visual.blocks.14.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1330
  "visual.blocks.14.attn.qkv.bias": "model-00001-of-00008.safetensors",
1331
  "visual.blocks.14.attn.qkv.weight": "model-00001-of-00008.safetensors",
1332
+ "visual.blocks.14.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1333
  "visual.blocks.14.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1334
  "visual.blocks.14.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1335
+ "visual.blocks.14.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1336
  "visual.blocks.14.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1337
  "visual.blocks.14.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1338
+ "visual.blocks.14.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1339
  "visual.blocks.14.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1340
  "visual.blocks.14.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1341
+ "visual.blocks.14.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1342
  "visual.blocks.14.norm1.weight": "model-00001-of-00008.safetensors",
1343
  "visual.blocks.14.norm2.weight": "model-00001-of-00008.safetensors",
1344
  "visual.blocks.15.attn.proj.bias": "model-00001-of-00008.safetensors",
1345
  "visual.blocks.15.attn.proj.weight": "model-00001-of-00008.safetensors",
1346
+ "visual.blocks.15.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1347
  "visual.blocks.15.attn.qkv.bias": "model-00001-of-00008.safetensors",
1348
  "visual.blocks.15.attn.qkv.weight": "model-00001-of-00008.safetensors",
1349
+ "visual.blocks.15.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1350
  "visual.blocks.15.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1351
  "visual.blocks.15.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1352
+ "visual.blocks.15.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1353
  "visual.blocks.15.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1354
  "visual.blocks.15.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1355
+ "visual.blocks.15.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1356
  "visual.blocks.15.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1357
  "visual.blocks.15.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1358
+ "visual.blocks.15.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1359
  "visual.blocks.15.norm1.weight": "model-00001-of-00008.safetensors",
1360
  "visual.blocks.15.norm2.weight": "model-00001-of-00008.safetensors",
1361
  "visual.blocks.16.attn.proj.bias": "model-00001-of-00008.safetensors",
1362
  "visual.blocks.16.attn.proj.weight": "model-00001-of-00008.safetensors",
1363
+ "visual.blocks.16.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1364
  "visual.blocks.16.attn.qkv.bias": "model-00001-of-00008.safetensors",
1365
  "visual.blocks.16.attn.qkv.weight": "model-00001-of-00008.safetensors",
1366
+ "visual.blocks.16.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1367
  "visual.blocks.16.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1368
  "visual.blocks.16.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1369
+ "visual.blocks.16.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1370
  "visual.blocks.16.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1371
  "visual.blocks.16.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1372
+ "visual.blocks.16.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1373
  "visual.blocks.16.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1374
  "visual.blocks.16.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1375
+ "visual.blocks.16.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1376
  "visual.blocks.16.norm1.weight": "model-00001-of-00008.safetensors",
1377
  "visual.blocks.16.norm2.weight": "model-00001-of-00008.safetensors",
1378
  "visual.blocks.17.attn.proj.bias": "model-00001-of-00008.safetensors",
1379
  "visual.blocks.17.attn.proj.weight": "model-00001-of-00008.safetensors",
1380
+ "visual.blocks.17.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1381
  "visual.blocks.17.attn.qkv.bias": "model-00001-of-00008.safetensors",
1382
  "visual.blocks.17.attn.qkv.weight": "model-00001-of-00008.safetensors",
1383
+ "visual.blocks.17.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1384
  "visual.blocks.17.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1385
  "visual.blocks.17.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1386
+ "visual.blocks.17.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1387
  "visual.blocks.17.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1388
  "visual.blocks.17.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1389
+ "visual.blocks.17.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1390
  "visual.blocks.17.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1391
  "visual.blocks.17.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1392
+ "visual.blocks.17.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1393
  "visual.blocks.17.norm1.weight": "model-00001-of-00008.safetensors",
1394
  "visual.blocks.17.norm2.weight": "model-00001-of-00008.safetensors",
1395
  "visual.blocks.18.attn.proj.bias": "model-00001-of-00008.safetensors",
1396
  "visual.blocks.18.attn.proj.weight": "model-00001-of-00008.safetensors",
1397
+ "visual.blocks.18.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1398
  "visual.blocks.18.attn.qkv.bias": "model-00001-of-00008.safetensors",
1399
  "visual.blocks.18.attn.qkv.weight": "model-00001-of-00008.safetensors",
1400
+ "visual.blocks.18.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1401
  "visual.blocks.18.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1402
  "visual.blocks.18.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1403
+ "visual.blocks.18.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1404
  "visual.blocks.18.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1405
  "visual.blocks.18.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1406
+ "visual.blocks.18.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1407
  "visual.blocks.18.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1408
  "visual.blocks.18.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1409
+ "visual.blocks.18.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1410
  "visual.blocks.18.norm1.weight": "model-00001-of-00008.safetensors",
1411
  "visual.blocks.18.norm2.weight": "model-00001-of-00008.safetensors",
1412
  "visual.blocks.19.attn.proj.bias": "model-00001-of-00008.safetensors",
1413
  "visual.blocks.19.attn.proj.weight": "model-00001-of-00008.safetensors",
1414
+ "visual.blocks.19.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1415
  "visual.blocks.19.attn.qkv.bias": "model-00001-of-00008.safetensors",
1416
  "visual.blocks.19.attn.qkv.weight": "model-00001-of-00008.safetensors",
1417
+ "visual.blocks.19.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1418
  "visual.blocks.19.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1419
  "visual.blocks.19.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1420
+ "visual.blocks.19.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1421
  "visual.blocks.19.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1422
  "visual.blocks.19.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1423
+ "visual.blocks.19.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1424
  "visual.blocks.19.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1425
  "visual.blocks.19.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1426
+ "visual.blocks.19.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1427
  "visual.blocks.19.norm1.weight": "model-00001-of-00008.safetensors",
1428
  "visual.blocks.19.norm2.weight": "model-00001-of-00008.safetensors",
1429
  "visual.blocks.2.attn.proj.bias": "model-00001-of-00008.safetensors",
1430
  "visual.blocks.2.attn.proj.weight": "model-00001-of-00008.safetensors",
1431
+ "visual.blocks.2.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1432
  "visual.blocks.2.attn.qkv.bias": "model-00001-of-00008.safetensors",
1433
  "visual.blocks.2.attn.qkv.weight": "model-00001-of-00008.safetensors",
1434
+ "visual.blocks.2.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1435
  "visual.blocks.2.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1436
  "visual.blocks.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1437
+ "visual.blocks.2.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1438
  "visual.blocks.2.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1439
  "visual.blocks.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1440
+ "visual.blocks.2.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1441
  "visual.blocks.2.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1442
  "visual.blocks.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1443
+ "visual.blocks.2.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1444
  "visual.blocks.2.norm1.weight": "model-00001-of-00008.safetensors",
1445
  "visual.blocks.2.norm2.weight": "model-00001-of-00008.safetensors",
1446
  "visual.blocks.20.attn.proj.bias": "model-00001-of-00008.safetensors",
1447
  "visual.blocks.20.attn.proj.weight": "model-00001-of-00008.safetensors",
1448
+ "visual.blocks.20.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1449
  "visual.blocks.20.attn.qkv.bias": "model-00001-of-00008.safetensors",
1450
  "visual.blocks.20.attn.qkv.weight": "model-00001-of-00008.safetensors",
1451
+ "visual.blocks.20.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1452
  "visual.blocks.20.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1453
  "visual.blocks.20.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1454
+ "visual.blocks.20.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1455
  "visual.blocks.20.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1456
  "visual.blocks.20.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1457
+ "visual.blocks.20.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1458
  "visual.blocks.20.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1459
  "visual.blocks.20.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1460
+ "visual.blocks.20.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1461
  "visual.blocks.20.norm1.weight": "model-00001-of-00008.safetensors",
1462
  "visual.blocks.20.norm2.weight": "model-00001-of-00008.safetensors",
1463
  "visual.blocks.21.attn.proj.bias": "model-00001-of-00008.safetensors",
1464
  "visual.blocks.21.attn.proj.weight": "model-00001-of-00008.safetensors",
1465
+ "visual.blocks.21.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1466
  "visual.blocks.21.attn.qkv.bias": "model-00001-of-00008.safetensors",
1467
  "visual.blocks.21.attn.qkv.weight": "model-00001-of-00008.safetensors",
1468
+ "visual.blocks.21.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1469
  "visual.blocks.21.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1470
  "visual.blocks.21.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1471
+ "visual.blocks.21.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1472
  "visual.blocks.21.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1473
  "visual.blocks.21.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1474
+ "visual.blocks.21.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1475
  "visual.blocks.21.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1476
  "visual.blocks.21.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1477
+ "visual.blocks.21.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1478
  "visual.blocks.21.norm1.weight": "model-00001-of-00008.safetensors",
1479
  "visual.blocks.21.norm2.weight": "model-00001-of-00008.safetensors",
1480
  "visual.blocks.22.attn.proj.bias": "model-00001-of-00008.safetensors",
1481
  "visual.blocks.22.attn.proj.weight": "model-00001-of-00008.safetensors",
1482
+ "visual.blocks.22.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1483
  "visual.blocks.22.attn.qkv.bias": "model-00001-of-00008.safetensors",
1484
  "visual.blocks.22.attn.qkv.weight": "model-00001-of-00008.safetensors",
1485
+ "visual.blocks.22.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1486
  "visual.blocks.22.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1487
  "visual.blocks.22.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1488
+ "visual.blocks.22.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1489
  "visual.blocks.22.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1490
  "visual.blocks.22.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1491
+ "visual.blocks.22.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1492
  "visual.blocks.22.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1493
  "visual.blocks.22.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1494
+ "visual.blocks.22.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1495
  "visual.blocks.22.norm1.weight": "model-00001-of-00008.safetensors",
1496
  "visual.blocks.22.norm2.weight": "model-00001-of-00008.safetensors",
1497
  "visual.blocks.23.attn.proj.bias": "model-00001-of-00008.safetensors",
1498
  "visual.blocks.23.attn.proj.weight": "model-00001-of-00008.safetensors",
1499
+ "visual.blocks.23.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1500
  "visual.blocks.23.attn.qkv.bias": "model-00001-of-00008.safetensors",
1501
  "visual.blocks.23.attn.qkv.weight": "model-00001-of-00008.safetensors",
1502
+ "visual.blocks.23.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1503
  "visual.blocks.23.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1504
  "visual.blocks.23.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1505
+ "visual.blocks.23.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1506
  "visual.blocks.23.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1507
  "visual.blocks.23.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1508
+ "visual.blocks.23.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1509
  "visual.blocks.23.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1510
  "visual.blocks.23.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1511
+ "visual.blocks.23.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1512
  "visual.blocks.23.norm1.weight": "model-00001-of-00008.safetensors",
1513
  "visual.blocks.23.norm2.weight": "model-00001-of-00008.safetensors",
1514
  "visual.blocks.24.attn.proj.bias": "model-00001-of-00008.safetensors",
1515
  "visual.blocks.24.attn.proj.weight": "model-00001-of-00008.safetensors",
1516
+ "visual.blocks.24.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1517
  "visual.blocks.24.attn.qkv.bias": "model-00001-of-00008.safetensors",
1518
  "visual.blocks.24.attn.qkv.weight": "model-00001-of-00008.safetensors",
1519
+ "visual.blocks.24.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1520
  "visual.blocks.24.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1521
  "visual.blocks.24.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1522
+ "visual.blocks.24.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1523
  "visual.blocks.24.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1524
  "visual.blocks.24.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1525
+ "visual.blocks.24.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1526
  "visual.blocks.24.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1527
  "visual.blocks.24.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1528
+ "visual.blocks.24.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1529
  "visual.blocks.24.norm1.weight": "model-00001-of-00008.safetensors",
1530
  "visual.blocks.24.norm2.weight": "model-00001-of-00008.safetensors",
1531
  "visual.blocks.25.attn.proj.bias": "model-00001-of-00008.safetensors",
1532
  "visual.blocks.25.attn.proj.weight": "model-00001-of-00008.safetensors",
1533
+ "visual.blocks.25.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1534
  "visual.blocks.25.attn.qkv.bias": "model-00001-of-00008.safetensors",
1535
  "visual.blocks.25.attn.qkv.weight": "model-00001-of-00008.safetensors",
1536
+ "visual.blocks.25.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1537
  "visual.blocks.25.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1538
  "visual.blocks.25.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1539
+ "visual.blocks.25.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1540
  "visual.blocks.25.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1541
  "visual.blocks.25.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1542
+ "visual.blocks.25.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1543
  "visual.blocks.25.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1544
  "visual.blocks.25.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1545
+ "visual.blocks.25.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1546
  "visual.blocks.25.norm1.weight": "model-00001-of-00008.safetensors",
1547
  "visual.blocks.25.norm2.weight": "model-00001-of-00008.safetensors",
1548
  "visual.blocks.26.attn.proj.bias": "model-00001-of-00008.safetensors",
1549
  "visual.blocks.26.attn.proj.weight": "model-00001-of-00008.safetensors",
1550
+ "visual.blocks.26.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1551
  "visual.blocks.26.attn.qkv.bias": "model-00001-of-00008.safetensors",
1552
  "visual.blocks.26.attn.qkv.weight": "model-00001-of-00008.safetensors",
1553
+ "visual.blocks.26.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1554
  "visual.blocks.26.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1555
  "visual.blocks.26.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1556
+ "visual.blocks.26.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1557
  "visual.blocks.26.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1558
  "visual.blocks.26.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1559
+ "visual.blocks.26.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1560
  "visual.blocks.26.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1561
  "visual.blocks.26.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1562
+ "visual.blocks.26.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1563
  "visual.blocks.26.norm1.weight": "model-00001-of-00008.safetensors",
1564
  "visual.blocks.26.norm2.weight": "model-00001-of-00008.safetensors",
1565
  "visual.blocks.27.attn.proj.bias": "model-00001-of-00008.safetensors",
1566
  "visual.blocks.27.attn.proj.weight": "model-00001-of-00008.safetensors",
1567
+ "visual.blocks.27.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1568
  "visual.blocks.27.attn.qkv.bias": "model-00001-of-00008.safetensors",
1569
  "visual.blocks.27.attn.qkv.weight": "model-00001-of-00008.safetensors",
1570
+ "visual.blocks.27.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1571
  "visual.blocks.27.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1572
  "visual.blocks.27.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1573
+ "visual.blocks.27.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1574
  "visual.blocks.27.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1575
  "visual.blocks.27.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1576
+ "visual.blocks.27.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1577
  "visual.blocks.27.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1578
  "visual.blocks.27.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1579
+ "visual.blocks.27.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1580
  "visual.blocks.27.norm1.weight": "model-00001-of-00008.safetensors",
1581
  "visual.blocks.27.norm2.weight": "model-00001-of-00008.safetensors",
1582
  "visual.blocks.28.attn.proj.bias": "model-00001-of-00008.safetensors",
1583
  "visual.blocks.28.attn.proj.weight": "model-00001-of-00008.safetensors",
1584
+ "visual.blocks.28.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1585
  "visual.blocks.28.attn.qkv.bias": "model-00001-of-00008.safetensors",
1586
  "visual.blocks.28.attn.qkv.weight": "model-00001-of-00008.safetensors",
1587
+ "visual.blocks.28.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1588
  "visual.blocks.28.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1589
  "visual.blocks.28.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1590
+ "visual.blocks.28.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1591
  "visual.blocks.28.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1592
  "visual.blocks.28.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1593
+ "visual.blocks.28.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1594
  "visual.blocks.28.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1595
  "visual.blocks.28.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1596
+ "visual.blocks.28.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1597
  "visual.blocks.28.norm1.weight": "model-00001-of-00008.safetensors",
1598
  "visual.blocks.28.norm2.weight": "model-00001-of-00008.safetensors",
1599
  "visual.blocks.29.attn.proj.bias": "model-00001-of-00008.safetensors",
1600
  "visual.blocks.29.attn.proj.weight": "model-00001-of-00008.safetensors",
1601
+ "visual.blocks.29.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1602
  "visual.blocks.29.attn.qkv.bias": "model-00001-of-00008.safetensors",
1603
  "visual.blocks.29.attn.qkv.weight": "model-00001-of-00008.safetensors",
1604
+ "visual.blocks.29.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1605
  "visual.blocks.29.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1606
  "visual.blocks.29.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1607
+ "visual.blocks.29.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1608
  "visual.blocks.29.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1609
  "visual.blocks.29.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1610
+ "visual.blocks.29.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1611
  "visual.blocks.29.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1612
  "visual.blocks.29.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1613
+ "visual.blocks.29.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1614
  "visual.blocks.29.norm1.weight": "model-00001-of-00008.safetensors",
1615
  "visual.blocks.29.norm2.weight": "model-00001-of-00008.safetensors",
1616
  "visual.blocks.3.attn.proj.bias": "model-00001-of-00008.safetensors",
1617
  "visual.blocks.3.attn.proj.weight": "model-00001-of-00008.safetensors",
1618
+ "visual.blocks.3.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1619
  "visual.blocks.3.attn.qkv.bias": "model-00001-of-00008.safetensors",
1620
  "visual.blocks.3.attn.qkv.weight": "model-00001-of-00008.safetensors",
1621
+ "visual.blocks.3.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1622
  "visual.blocks.3.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1623
  "visual.blocks.3.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1624
+ "visual.blocks.3.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1625
  "visual.blocks.3.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1626
  "visual.blocks.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1627
+ "visual.blocks.3.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1628
  "visual.blocks.3.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1629
  "visual.blocks.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1630
+ "visual.blocks.3.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1631
  "visual.blocks.3.norm1.weight": "model-00001-of-00008.safetensors",
1632
  "visual.blocks.3.norm2.weight": "model-00001-of-00008.safetensors",
1633
  "visual.blocks.30.attn.proj.bias": "model-00001-of-00008.safetensors",
1634
  "visual.blocks.30.attn.proj.weight": "model-00001-of-00008.safetensors",
1635
+ "visual.blocks.30.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1636
  "visual.blocks.30.attn.qkv.bias": "model-00001-of-00008.safetensors",
1637
  "visual.blocks.30.attn.qkv.weight": "model-00001-of-00008.safetensors",
1638
+ "visual.blocks.30.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1639
  "visual.blocks.30.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1640
  "visual.blocks.30.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1641
+ "visual.blocks.30.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1642
  "visual.blocks.30.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1643
  "visual.blocks.30.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1644
+ "visual.blocks.30.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1645
  "visual.blocks.30.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1646
  "visual.blocks.30.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1647
+ "visual.blocks.30.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1648
  "visual.blocks.30.norm1.weight": "model-00001-of-00008.safetensors",
1649
  "visual.blocks.30.norm2.weight": "model-00001-of-00008.safetensors",
1650
  "visual.blocks.31.attn.proj.bias": "model-00001-of-00008.safetensors",
1651
  "visual.blocks.31.attn.proj.weight": "model-00001-of-00008.safetensors",
1652
+ "visual.blocks.31.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1653
  "visual.blocks.31.attn.qkv.bias": "model-00001-of-00008.safetensors",
1654
  "visual.blocks.31.attn.qkv.weight": "model-00001-of-00008.safetensors",
1655
+ "visual.blocks.31.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1656
  "visual.blocks.31.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1657
  "visual.blocks.31.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1658
+ "visual.blocks.31.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1659
  "visual.blocks.31.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1660
  "visual.blocks.31.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1661
+ "visual.blocks.31.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1662
  "visual.blocks.31.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1663
  "visual.blocks.31.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1664
+ "visual.blocks.31.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1665
  "visual.blocks.31.norm1.weight": "model-00001-of-00008.safetensors",
1666
  "visual.blocks.31.norm2.weight": "model-00001-of-00008.safetensors",
1667
  "visual.blocks.4.attn.proj.bias": "model-00001-of-00008.safetensors",
1668
  "visual.blocks.4.attn.proj.weight": "model-00001-of-00008.safetensors",
1669
+ "visual.blocks.4.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1670
  "visual.blocks.4.attn.qkv.bias": "model-00001-of-00008.safetensors",
1671
  "visual.blocks.4.attn.qkv.weight": "model-00001-of-00008.safetensors",
1672
+ "visual.blocks.4.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1673
  "visual.blocks.4.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1674
  "visual.blocks.4.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1675
+ "visual.blocks.4.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1676
  "visual.blocks.4.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1677
  "visual.blocks.4.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1678
+ "visual.blocks.4.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1679
  "visual.blocks.4.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1680
  "visual.blocks.4.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1681
+ "visual.blocks.4.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1682
  "visual.blocks.4.norm1.weight": "model-00001-of-00008.safetensors",
1683
  "visual.blocks.4.norm2.weight": "model-00001-of-00008.safetensors",
1684
  "visual.blocks.5.attn.proj.bias": "model-00001-of-00008.safetensors",
1685
  "visual.blocks.5.attn.proj.weight": "model-00001-of-00008.safetensors",
1686
+ "visual.blocks.5.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1687
  "visual.blocks.5.attn.qkv.bias": "model-00001-of-00008.safetensors",
1688
  "visual.blocks.5.attn.qkv.weight": "model-00001-of-00008.safetensors",
1689
+ "visual.blocks.5.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1690
  "visual.blocks.5.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1691
  "visual.blocks.5.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1692
+ "visual.blocks.5.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1693
  "visual.blocks.5.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1694
  "visual.blocks.5.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1695
+ "visual.blocks.5.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1696
  "visual.blocks.5.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1697
  "visual.blocks.5.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1698
+ "visual.blocks.5.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1699
  "visual.blocks.5.norm1.weight": "model-00001-of-00008.safetensors",
1700
  "visual.blocks.5.norm2.weight": "model-00001-of-00008.safetensors",
1701
  "visual.blocks.6.attn.proj.bias": "model-00001-of-00008.safetensors",
1702
  "visual.blocks.6.attn.proj.weight": "model-00001-of-00008.safetensors",
1703
+ "visual.blocks.6.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1704
  "visual.blocks.6.attn.qkv.bias": "model-00001-of-00008.safetensors",
1705
  "visual.blocks.6.attn.qkv.weight": "model-00001-of-00008.safetensors",
1706
+ "visual.blocks.6.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1707
  "visual.blocks.6.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1708
  "visual.blocks.6.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1709
+ "visual.blocks.6.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1710
  "visual.blocks.6.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1711
  "visual.blocks.6.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1712
+ "visual.blocks.6.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1713
  "visual.blocks.6.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1714
  "visual.blocks.6.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1715
+ "visual.blocks.6.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1716
  "visual.blocks.6.norm1.weight": "model-00001-of-00008.safetensors",
1717
  "visual.blocks.6.norm2.weight": "model-00001-of-00008.safetensors",
1718
  "visual.blocks.7.attn.proj.bias": "model-00001-of-00008.safetensors",
1719
  "visual.blocks.7.attn.proj.weight": "model-00001-of-00008.safetensors",
1720
+ "visual.blocks.7.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1721
  "visual.blocks.7.attn.qkv.bias": "model-00001-of-00008.safetensors",
1722
  "visual.blocks.7.attn.qkv.weight": "model-00001-of-00008.safetensors",
1723
+ "visual.blocks.7.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1724
  "visual.blocks.7.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1725
  "visual.blocks.7.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1726
+ "visual.blocks.7.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1727
  "visual.blocks.7.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1728
  "visual.blocks.7.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1729
+ "visual.blocks.7.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1730
  "visual.blocks.7.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1731
  "visual.blocks.7.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1732
+ "visual.blocks.7.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1733
  "visual.blocks.7.norm1.weight": "model-00001-of-00008.safetensors",
1734
  "visual.blocks.7.norm2.weight": "model-00001-of-00008.safetensors",
1735
  "visual.blocks.8.attn.proj.bias": "model-00001-of-00008.safetensors",
1736
  "visual.blocks.8.attn.proj.weight": "model-00001-of-00008.safetensors",
1737
+ "visual.blocks.8.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1738
  "visual.blocks.8.attn.qkv.bias": "model-00001-of-00008.safetensors",
1739
  "visual.blocks.8.attn.qkv.weight": "model-00001-of-00008.safetensors",
1740
+ "visual.blocks.8.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1741
  "visual.blocks.8.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1742
  "visual.blocks.8.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1743
+ "visual.blocks.8.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1744
  "visual.blocks.8.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1745
  "visual.blocks.8.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1746
+ "visual.blocks.8.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1747
  "visual.blocks.8.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1748
  "visual.blocks.8.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1749
+ "visual.blocks.8.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1750
  "visual.blocks.8.norm1.weight": "model-00001-of-00008.safetensors",
1751
  "visual.blocks.8.norm2.weight": "model-00001-of-00008.safetensors",
1752
  "visual.blocks.9.attn.proj.bias": "model-00001-of-00008.safetensors",
1753
  "visual.blocks.9.attn.proj.weight": "model-00001-of-00008.safetensors",
1754
+ "visual.blocks.9.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
1755
  "visual.blocks.9.attn.qkv.bias": "model-00001-of-00008.safetensors",
1756
  "visual.blocks.9.attn.qkv.weight": "model-00001-of-00008.safetensors",
1757
+ "visual.blocks.9.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
1758
  "visual.blocks.9.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
1759
  "visual.blocks.9.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
1760
+ "visual.blocks.9.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
1761
  "visual.blocks.9.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
1762
  "visual.blocks.9.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
1763
+ "visual.blocks.9.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
1764
  "visual.blocks.9.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
1765
  "visual.blocks.9.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
1766
+ "visual.blocks.9.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
1767
  "visual.blocks.9.norm1.weight": "model-00001-of-00008.safetensors",
1768
  "visual.blocks.9.norm2.weight": "model-00001-of-00008.safetensors",
1769
  "visual.merger.ln_q.weight": "model-00001-of-00008.safetensors",
1770
  "visual.merger.mlp.0.bias": "model-00001-of-00008.safetensors",
1771
  "visual.merger.mlp.0.weight": "model-00001-of-00008.safetensors",
1772
+ "visual.merger.mlp.0.weight_scale": "model-00001-of-00008.safetensors",
1773
  "visual.merger.mlp.2.bias": "model-00001-of-00008.safetensors",
1774
  "visual.merger.mlp.2.weight": "model-00001-of-00008.safetensors",
1775
+ "visual.merger.mlp.2.weight_scale": "model-00001-of-00008.safetensors",
1776
  "visual.patch_embed.proj.weight": "model-00001-of-00008.safetensors"
1777
  }
1778
  }
recipe.yaml CHANGED
@@ -1,6 +1,6 @@
1
  default_stage:
2
  default_modifiers:
3
  QuantizationModifier:
4
- ignore: [lm_head, 're:visual.*']
5
  targets: [Linear]
6
  scheme: FP8_DYNAMIC
 
1
  default_stage:
2
  default_modifiers:
3
  QuantizationModifier:
4
+ ignore: ['re:.*lm_head', 're:vision_tower.*', 're:multi_modal_projector.*']
5
  targets: [Linear]
6
  scheme: FP8_DYNAMIC