Upload folder using huggingface_hub
Browse files- config.json +0 -162
- model-00001-of-00008.safetensors +2 -2
- model-00002-of-00008.safetensors +2 -2
- model-00003-of-00008.safetensors +2 -2
- model-00004-of-00008.safetensors +2 -2
- model-00005-of-00008.safetensors +2 -2
- model-00006-of-00008.safetensors +2 -2
- model-00007-of-00008.safetensors +2 -2
- model.safetensors.index.json +289 -127
- recipe.yaml +1 -1
config.json
CHANGED
@@ -52,168 +52,6 @@
|
|
52 |
"format": "float-quantized",
|
53 |
"global_compression_ratio": null,
|
54 |
"ignore": [
|
55 |
-
"visual.blocks.0.attn.qkv",
|
56 |
-
"visual.blocks.0.attn.proj",
|
57 |
-
"visual.blocks.0.mlp.gate_proj",
|
58 |
-
"visual.blocks.0.mlp.up_proj",
|
59 |
-
"visual.blocks.0.mlp.down_proj",
|
60 |
-
"visual.blocks.1.attn.qkv",
|
61 |
-
"visual.blocks.1.attn.proj",
|
62 |
-
"visual.blocks.1.mlp.gate_proj",
|
63 |
-
"visual.blocks.1.mlp.up_proj",
|
64 |
-
"visual.blocks.1.mlp.down_proj",
|
65 |
-
"visual.blocks.2.attn.qkv",
|
66 |
-
"visual.blocks.2.attn.proj",
|
67 |
-
"visual.blocks.2.mlp.gate_proj",
|
68 |
-
"visual.blocks.2.mlp.up_proj",
|
69 |
-
"visual.blocks.2.mlp.down_proj",
|
70 |
-
"visual.blocks.3.attn.qkv",
|
71 |
-
"visual.blocks.3.attn.proj",
|
72 |
-
"visual.blocks.3.mlp.gate_proj",
|
73 |
-
"visual.blocks.3.mlp.up_proj",
|
74 |
-
"visual.blocks.3.mlp.down_proj",
|
75 |
-
"visual.blocks.4.attn.qkv",
|
76 |
-
"visual.blocks.4.attn.proj",
|
77 |
-
"visual.blocks.4.mlp.gate_proj",
|
78 |
-
"visual.blocks.4.mlp.up_proj",
|
79 |
-
"visual.blocks.4.mlp.down_proj",
|
80 |
-
"visual.blocks.5.attn.qkv",
|
81 |
-
"visual.blocks.5.attn.proj",
|
82 |
-
"visual.blocks.5.mlp.gate_proj",
|
83 |
-
"visual.blocks.5.mlp.up_proj",
|
84 |
-
"visual.blocks.5.mlp.down_proj",
|
85 |
-
"visual.blocks.6.attn.qkv",
|
86 |
-
"visual.blocks.6.attn.proj",
|
87 |
-
"visual.blocks.6.mlp.gate_proj",
|
88 |
-
"visual.blocks.6.mlp.up_proj",
|
89 |
-
"visual.blocks.6.mlp.down_proj",
|
90 |
-
"visual.blocks.7.attn.qkv",
|
91 |
-
"visual.blocks.7.attn.proj",
|
92 |
-
"visual.blocks.7.mlp.gate_proj",
|
93 |
-
"visual.blocks.7.mlp.up_proj",
|
94 |
-
"visual.blocks.7.mlp.down_proj",
|
95 |
-
"visual.blocks.8.attn.qkv",
|
96 |
-
"visual.blocks.8.attn.proj",
|
97 |
-
"visual.blocks.8.mlp.gate_proj",
|
98 |
-
"visual.blocks.8.mlp.up_proj",
|
99 |
-
"visual.blocks.8.mlp.down_proj",
|
100 |
-
"visual.blocks.9.attn.qkv",
|
101 |
-
"visual.blocks.9.attn.proj",
|
102 |
-
"visual.blocks.9.mlp.gate_proj",
|
103 |
-
"visual.blocks.9.mlp.up_proj",
|
104 |
-
"visual.blocks.9.mlp.down_proj",
|
105 |
-
"visual.blocks.10.attn.qkv",
|
106 |
-
"visual.blocks.10.attn.proj",
|
107 |
-
"visual.blocks.10.mlp.gate_proj",
|
108 |
-
"visual.blocks.10.mlp.up_proj",
|
109 |
-
"visual.blocks.10.mlp.down_proj",
|
110 |
-
"visual.blocks.11.attn.qkv",
|
111 |
-
"visual.blocks.11.attn.proj",
|
112 |
-
"visual.blocks.11.mlp.gate_proj",
|
113 |
-
"visual.blocks.11.mlp.up_proj",
|
114 |
-
"visual.blocks.11.mlp.down_proj",
|
115 |
-
"visual.blocks.12.attn.qkv",
|
116 |
-
"visual.blocks.12.attn.proj",
|
117 |
-
"visual.blocks.12.mlp.gate_proj",
|
118 |
-
"visual.blocks.12.mlp.up_proj",
|
119 |
-
"visual.blocks.12.mlp.down_proj",
|
120 |
-
"visual.blocks.13.attn.qkv",
|
121 |
-
"visual.blocks.13.attn.proj",
|
122 |
-
"visual.blocks.13.mlp.gate_proj",
|
123 |
-
"visual.blocks.13.mlp.up_proj",
|
124 |
-
"visual.blocks.13.mlp.down_proj",
|
125 |
-
"visual.blocks.14.attn.qkv",
|
126 |
-
"visual.blocks.14.attn.proj",
|
127 |
-
"visual.blocks.14.mlp.gate_proj",
|
128 |
-
"visual.blocks.14.mlp.up_proj",
|
129 |
-
"visual.blocks.14.mlp.down_proj",
|
130 |
-
"visual.blocks.15.attn.qkv",
|
131 |
-
"visual.blocks.15.attn.proj",
|
132 |
-
"visual.blocks.15.mlp.gate_proj",
|
133 |
-
"visual.blocks.15.mlp.up_proj",
|
134 |
-
"visual.blocks.15.mlp.down_proj",
|
135 |
-
"visual.blocks.16.attn.qkv",
|
136 |
-
"visual.blocks.16.attn.proj",
|
137 |
-
"visual.blocks.16.mlp.gate_proj",
|
138 |
-
"visual.blocks.16.mlp.up_proj",
|
139 |
-
"visual.blocks.16.mlp.down_proj",
|
140 |
-
"visual.blocks.17.attn.qkv",
|
141 |
-
"visual.blocks.17.attn.proj",
|
142 |
-
"visual.blocks.17.mlp.gate_proj",
|
143 |
-
"visual.blocks.17.mlp.up_proj",
|
144 |
-
"visual.blocks.17.mlp.down_proj",
|
145 |
-
"visual.blocks.18.attn.qkv",
|
146 |
-
"visual.blocks.18.attn.proj",
|
147 |
-
"visual.blocks.18.mlp.gate_proj",
|
148 |
-
"visual.blocks.18.mlp.up_proj",
|
149 |
-
"visual.blocks.18.mlp.down_proj",
|
150 |
-
"visual.blocks.19.attn.qkv",
|
151 |
-
"visual.blocks.19.attn.proj",
|
152 |
-
"visual.blocks.19.mlp.gate_proj",
|
153 |
-
"visual.blocks.19.mlp.up_proj",
|
154 |
-
"visual.blocks.19.mlp.down_proj",
|
155 |
-
"visual.blocks.20.attn.qkv",
|
156 |
-
"visual.blocks.20.attn.proj",
|
157 |
-
"visual.blocks.20.mlp.gate_proj",
|
158 |
-
"visual.blocks.20.mlp.up_proj",
|
159 |
-
"visual.blocks.20.mlp.down_proj",
|
160 |
-
"visual.blocks.21.attn.qkv",
|
161 |
-
"visual.blocks.21.attn.proj",
|
162 |
-
"visual.blocks.21.mlp.gate_proj",
|
163 |
-
"visual.blocks.21.mlp.up_proj",
|
164 |
-
"visual.blocks.21.mlp.down_proj",
|
165 |
-
"visual.blocks.22.attn.qkv",
|
166 |
-
"visual.blocks.22.attn.proj",
|
167 |
-
"visual.blocks.22.mlp.gate_proj",
|
168 |
-
"visual.blocks.22.mlp.up_proj",
|
169 |
-
"visual.blocks.22.mlp.down_proj",
|
170 |
-
"visual.blocks.23.attn.qkv",
|
171 |
-
"visual.blocks.23.attn.proj",
|
172 |
-
"visual.blocks.23.mlp.gate_proj",
|
173 |
-
"visual.blocks.23.mlp.up_proj",
|
174 |
-
"visual.blocks.23.mlp.down_proj",
|
175 |
-
"visual.blocks.24.attn.qkv",
|
176 |
-
"visual.blocks.24.attn.proj",
|
177 |
-
"visual.blocks.24.mlp.gate_proj",
|
178 |
-
"visual.blocks.24.mlp.up_proj",
|
179 |
-
"visual.blocks.24.mlp.down_proj",
|
180 |
-
"visual.blocks.25.attn.qkv",
|
181 |
-
"visual.blocks.25.attn.proj",
|
182 |
-
"visual.blocks.25.mlp.gate_proj",
|
183 |
-
"visual.blocks.25.mlp.up_proj",
|
184 |
-
"visual.blocks.25.mlp.down_proj",
|
185 |
-
"visual.blocks.26.attn.qkv",
|
186 |
-
"visual.blocks.26.attn.proj",
|
187 |
-
"visual.blocks.26.mlp.gate_proj",
|
188 |
-
"visual.blocks.26.mlp.up_proj",
|
189 |
-
"visual.blocks.26.mlp.down_proj",
|
190 |
-
"visual.blocks.27.attn.qkv",
|
191 |
-
"visual.blocks.27.attn.proj",
|
192 |
-
"visual.blocks.27.mlp.gate_proj",
|
193 |
-
"visual.blocks.27.mlp.up_proj",
|
194 |
-
"visual.blocks.27.mlp.down_proj",
|
195 |
-
"visual.blocks.28.attn.qkv",
|
196 |
-
"visual.blocks.28.attn.proj",
|
197 |
-
"visual.blocks.28.mlp.gate_proj",
|
198 |
-
"visual.blocks.28.mlp.up_proj",
|
199 |
-
"visual.blocks.28.mlp.down_proj",
|
200 |
-
"visual.blocks.29.attn.qkv",
|
201 |
-
"visual.blocks.29.attn.proj",
|
202 |
-
"visual.blocks.29.mlp.gate_proj",
|
203 |
-
"visual.blocks.29.mlp.up_proj",
|
204 |
-
"visual.blocks.29.mlp.down_proj",
|
205 |
-
"visual.blocks.30.attn.qkv",
|
206 |
-
"visual.blocks.30.attn.proj",
|
207 |
-
"visual.blocks.30.mlp.gate_proj",
|
208 |
-
"visual.blocks.30.mlp.up_proj",
|
209 |
-
"visual.blocks.30.mlp.down_proj",
|
210 |
-
"visual.blocks.31.attn.qkv",
|
211 |
-
"visual.blocks.31.attn.proj",
|
212 |
-
"visual.blocks.31.mlp.gate_proj",
|
213 |
-
"visual.blocks.31.mlp.up_proj",
|
214 |
-
"visual.blocks.31.mlp.down_proj",
|
215 |
-
"visual.merger.mlp.0",
|
216 |
-
"visual.merger.mlp.2",
|
217 |
"lm_head"
|
218 |
],
|
219 |
"kv_cache_scheme": null,
|
|
|
52 |
"format": "float-quantized",
|
53 |
"global_compression_ratio": null,
|
54 |
"ignore": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
"lm_head"
|
56 |
],
|
57 |
"kv_cache_scheme": null,
|
model-00001-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3531038d3ab1495875f5f036edcc59016fa42bb56e65f832c43ee793b5155372
|
3 |
+
size 4892355856
|
model-00002-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4487f44ec59f9008edbac2871f4f4daeb85f81f9eb6a4860343b96523bb78e4c
|
3 |
+
size 4877702136
|
model-00003-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:726d4573ba37e78117f99ec61b23787b20df8d8ed4dd498cbd030879749ce35b
|
3 |
+
size 4877702224
|
model-00004-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bebc333febad4b94a9a358735ef62143e6f013245dcb792f50736a20a300e6d
|
3 |
+
size 4877702224
|
model-00005-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b79dfd105f75b72744321a18e373052828983663ca3a04e9b8ce37793af66b14
|
3 |
+
size 4877702224
|
model-00006-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:026fa97c113a3dd9479517f6873938f33c7055dbeb8f9c48fc0547f0f4c3e4ac
|
3 |
+
size 4877702224
|
model-00007-of-00008.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d68e4c7628a12d706b467220083cc8b51d5fb1c5aaec855d8eec9c76bbb0e711
|
3 |
+
size 4185374296
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00008-of-00008.safetensors",
|
@@ -119,14 +119,14 @@
|
|
119 |
"model.layers.13.self_attn.v_proj.bias": "model-00002-of-00008.safetensors",
|
120 |
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
121 |
"model.layers.13.self_attn.v_proj.weight_scale": "model-00002-of-00008.safetensors",
|
122 |
-
"model.layers.14.input_layernorm.weight": "model-
|
123 |
-
"model.layers.14.mlp.down_proj.weight": "model-
|
124 |
-
"model.layers.14.mlp.down_proj.weight_scale": "model-
|
125 |
-
"model.layers.14.mlp.gate_proj.weight": "model-
|
126 |
-
"model.layers.14.mlp.gate_proj.weight_scale": "model-
|
127 |
-
"model.layers.14.mlp.up_proj.weight": "model-
|
128 |
-
"model.layers.14.mlp.up_proj.weight_scale": "model-
|
129 |
-
"model.layers.14.post_attention_layernorm.weight": "model-
|
130 |
"model.layers.14.self_attn.k_proj.bias": "model-00002-of-00008.safetensors",
|
131 |
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
132 |
"model.layers.14.self_attn.k_proj.weight_scale": "model-00002-of-00008.safetensors",
|
@@ -141,22 +141,22 @@
|
|
141 |
"model.layers.15.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
142 |
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
143 |
"model.layers.15.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
|
144 |
-
"model.layers.15.mlp.gate_proj.weight": "model-
|
145 |
-
"model.layers.15.mlp.gate_proj.weight_scale": "model-
|
146 |
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
147 |
"model.layers.15.mlp.up_proj.weight_scale": "model-00003-of-00008.safetensors",
|
148 |
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
149 |
-
"model.layers.15.self_attn.k_proj.bias": "model-
|
150 |
-
"model.layers.15.self_attn.k_proj.weight": "model-
|
151 |
-
"model.layers.15.self_attn.k_proj.weight_scale": "model-
|
152 |
-
"model.layers.15.self_attn.o_proj.weight": "model-
|
153 |
-
"model.layers.15.self_attn.o_proj.weight_scale": "model-
|
154 |
-
"model.layers.15.self_attn.q_proj.bias": "model-
|
155 |
-
"model.layers.15.self_attn.q_proj.weight": "model-
|
156 |
-
"model.layers.15.self_attn.q_proj.weight_scale": "model-
|
157 |
-
"model.layers.15.self_attn.v_proj.bias": "model-
|
158 |
-
"model.layers.15.self_attn.v_proj.weight": "model-
|
159 |
-
"model.layers.15.self_attn.v_proj.weight_scale": "model-
|
160 |
"model.layers.16.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
161 |
"model.layers.16.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
162 |
"model.layers.16.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
|
@@ -328,14 +328,14 @@
|
|
328 |
"model.layers.23.self_attn.v_proj.bias": "model-00003-of-00008.safetensors",
|
329 |
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
330 |
"model.layers.23.self_attn.v_proj.weight_scale": "model-00003-of-00008.safetensors",
|
331 |
-
"model.layers.24.input_layernorm.weight": "model-
|
332 |
-
"model.layers.24.mlp.down_proj.weight": "model-
|
333 |
-
"model.layers.24.mlp.down_proj.weight_scale": "model-
|
334 |
-
"model.layers.24.mlp.gate_proj.weight": "model-
|
335 |
-
"model.layers.24.mlp.gate_proj.weight_scale": "model-
|
336 |
-
"model.layers.24.mlp.up_proj.weight": "model-
|
337 |
-
"model.layers.24.mlp.up_proj.weight_scale": "model-
|
338 |
-
"model.layers.24.post_attention_layernorm.weight": "model-
|
339 |
"model.layers.24.self_attn.k_proj.bias": "model-00003-of-00008.safetensors",
|
340 |
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
341 |
"model.layers.24.self_attn.k_proj.weight_scale": "model-00003-of-00008.safetensors",
|
@@ -350,22 +350,22 @@
|
|
350 |
"model.layers.25.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
351 |
"model.layers.25.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
352 |
"model.layers.25.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
|
353 |
-
"model.layers.25.mlp.gate_proj.weight": "model-
|
354 |
-
"model.layers.25.mlp.gate_proj.weight_scale": "model-
|
355 |
"model.layers.25.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
356 |
"model.layers.25.mlp.up_proj.weight_scale": "model-00004-of-00008.safetensors",
|
357 |
"model.layers.25.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
358 |
-
"model.layers.25.self_attn.k_proj.bias": "model-
|
359 |
-
"model.layers.25.self_attn.k_proj.weight": "model-
|
360 |
-
"model.layers.25.self_attn.k_proj.weight_scale": "model-
|
361 |
-
"model.layers.25.self_attn.o_proj.weight": "model-
|
362 |
-
"model.layers.25.self_attn.o_proj.weight_scale": "model-
|
363 |
-
"model.layers.25.self_attn.q_proj.bias": "model-
|
364 |
-
"model.layers.25.self_attn.q_proj.weight": "model-
|
365 |
-
"model.layers.25.self_attn.q_proj.weight_scale": "model-
|
366 |
-
"model.layers.25.self_attn.v_proj.bias": "model-
|
367 |
-
"model.layers.25.self_attn.v_proj.weight": "model-
|
368 |
-
"model.layers.25.self_attn.v_proj.weight_scale": "model-
|
369 |
"model.layers.26.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
370 |
"model.layers.26.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
371 |
"model.layers.26.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
|
@@ -537,14 +537,14 @@
|
|
537 |
"model.layers.33.self_attn.v_proj.bias": "model-00004-of-00008.safetensors",
|
538 |
"model.layers.33.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
539 |
"model.layers.33.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
|
540 |
-
"model.layers.34.input_layernorm.weight": "model-
|
541 |
-
"model.layers.34.mlp.down_proj.weight": "model-
|
542 |
-
"model.layers.34.mlp.down_proj.weight_scale": "model-
|
543 |
-
"model.layers.34.mlp.gate_proj.weight": "model-
|
544 |
-
"model.layers.34.mlp.gate_proj.weight_scale": "model-
|
545 |
-
"model.layers.34.mlp.up_proj.weight": "model-
|
546 |
-
"model.layers.34.mlp.up_proj.weight_scale": "model-
|
547 |
-
"model.layers.34.post_attention_layernorm.weight": "model-
|
548 |
"model.layers.34.self_attn.k_proj.bias": "model-00004-of-00008.safetensors",
|
549 |
"model.layers.34.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
550 |
"model.layers.34.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
|
@@ -559,22 +559,22 @@
|
|
559 |
"model.layers.35.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
560 |
"model.layers.35.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
561 |
"model.layers.35.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
|
562 |
-
"model.layers.35.mlp.gate_proj.weight": "model-
|
563 |
-
"model.layers.35.mlp.gate_proj.weight_scale": "model-
|
564 |
"model.layers.35.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
565 |
"model.layers.35.mlp.up_proj.weight_scale": "model-00005-of-00008.safetensors",
|
566 |
"model.layers.35.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
567 |
-
"model.layers.35.self_attn.k_proj.bias": "model-
|
568 |
-
"model.layers.35.self_attn.k_proj.weight": "model-
|
569 |
-
"model.layers.35.self_attn.k_proj.weight_scale": "model-
|
570 |
-
"model.layers.35.self_attn.o_proj.weight": "model-
|
571 |
-
"model.layers.35.self_attn.o_proj.weight_scale": "model-
|
572 |
-
"model.layers.35.self_attn.q_proj.bias": "model-
|
573 |
-
"model.layers.35.self_attn.q_proj.weight": "model-
|
574 |
-
"model.layers.35.self_attn.q_proj.weight_scale": "model-
|
575 |
-
"model.layers.35.self_attn.v_proj.bias": "model-
|
576 |
-
"model.layers.35.self_attn.v_proj.weight": "model-
|
577 |
-
"model.layers.35.self_attn.v_proj.weight_scale": "model-
|
578 |
"model.layers.36.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
579 |
"model.layers.36.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
580 |
"model.layers.36.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
|
@@ -651,14 +651,14 @@
|
|
651 |
"model.layers.39.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
|
652 |
"model.layers.39.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
653 |
"model.layers.39.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
|
654 |
-
"model.layers.4.input_layernorm.weight": "model-
|
655 |
-
"model.layers.4.mlp.down_proj.weight": "model-
|
656 |
-
"model.layers.4.mlp.down_proj.weight_scale": "model-
|
657 |
-
"model.layers.4.mlp.gate_proj.weight": "model-
|
658 |
-
"model.layers.4.mlp.gate_proj.weight_scale": "model-
|
659 |
-
"model.layers.4.mlp.up_proj.weight": "model-
|
660 |
-
"model.layers.4.mlp.up_proj.weight_scale": "model-
|
661 |
-
"model.layers.4.post_attention_layernorm.weight": "model-
|
662 |
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00008.safetensors",
|
663 |
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
664 |
"model.layers.4.self_attn.k_proj.weight_scale": "model-00001-of-00008.safetensors",
|
@@ -746,14 +746,14 @@
|
|
746 |
"model.layers.43.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
|
747 |
"model.layers.43.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
748 |
"model.layers.43.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
|
749 |
-
"model.layers.44.input_layernorm.weight": "model-
|
750 |
-
"model.layers.44.mlp.down_proj.weight": "model-
|
751 |
-
"model.layers.44.mlp.down_proj.weight_scale": "model-
|
752 |
-
"model.layers.44.mlp.gate_proj.weight": "model-
|
753 |
-
"model.layers.44.mlp.gate_proj.weight_scale": "model-
|
754 |
-
"model.layers.44.mlp.up_proj.weight": "model-
|
755 |
-
"model.layers.44.mlp.up_proj.weight_scale": "model-
|
756 |
-
"model.layers.44.post_attention_layernorm.weight": "model-
|
757 |
"model.layers.44.self_attn.k_proj.bias": "model-00005-of-00008.safetensors",
|
758 |
"model.layers.44.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
759 |
"model.layers.44.self_attn.k_proj.weight_scale": "model-00005-of-00008.safetensors",
|
@@ -768,22 +768,22 @@
|
|
768 |
"model.layers.45.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
769 |
"model.layers.45.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
770 |
"model.layers.45.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
|
771 |
-
"model.layers.45.mlp.gate_proj.weight": "model-
|
772 |
-
"model.layers.45.mlp.gate_proj.weight_scale": "model-
|
773 |
"model.layers.45.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
774 |
"model.layers.45.mlp.up_proj.weight_scale": "model-00006-of-00008.safetensors",
|
775 |
"model.layers.45.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
776 |
-
"model.layers.45.self_attn.k_proj.bias": "model-
|
777 |
-
"model.layers.45.self_attn.k_proj.weight": "model-
|
778 |
-
"model.layers.45.self_attn.k_proj.weight_scale": "model-
|
779 |
-
"model.layers.45.self_attn.o_proj.weight": "model-
|
780 |
-
"model.layers.45.self_attn.o_proj.weight_scale": "model-
|
781 |
-
"model.layers.45.self_attn.q_proj.bias": "model-
|
782 |
-
"model.layers.45.self_attn.q_proj.weight": "model-
|
783 |
-
"model.layers.45.self_attn.q_proj.weight_scale": "model-
|
784 |
-
"model.layers.45.self_attn.v_proj.bias": "model-
|
785 |
-
"model.layers.45.self_attn.v_proj.weight": "model-
|
786 |
-
"model.layers.45.self_attn.v_proj.weight_scale": "model-
|
787 |
"model.layers.46.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
788 |
"model.layers.46.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
789 |
"model.layers.46.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
|
@@ -863,22 +863,22 @@
|
|
863 |
"model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
864 |
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
865 |
"model.layers.5.mlp.down_proj.weight_scale": "model-00002-of-00008.safetensors",
|
866 |
-
"model.layers.5.mlp.gate_proj.weight": "model-
|
867 |
-
"model.layers.5.mlp.gate_proj.weight_scale": "model-
|
868 |
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
869 |
"model.layers.5.mlp.up_proj.weight_scale": "model-00002-of-00008.safetensors",
|
870 |
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
871 |
-
"model.layers.5.self_attn.k_proj.bias": "model-
|
872 |
-
"model.layers.5.self_attn.k_proj.weight": "model-
|
873 |
-
"model.layers.5.self_attn.k_proj.weight_scale": "model-
|
874 |
-
"model.layers.5.self_attn.o_proj.weight": "model-
|
875 |
-
"model.layers.5.self_attn.o_proj.weight_scale": "model-
|
876 |
-
"model.layers.5.self_attn.q_proj.bias": "model-
|
877 |
-
"model.layers.5.self_attn.q_proj.weight": "model-
|
878 |
-
"model.layers.5.self_attn.q_proj.weight_scale": "model-
|
879 |
-
"model.layers.5.self_attn.v_proj.bias": "model-
|
880 |
-
"model.layers.5.self_attn.v_proj.weight": "model-
|
881 |
-
"model.layers.5.self_attn.v_proj.weight_scale": "model-
|
882 |
"model.layers.50.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
883 |
"model.layers.50.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
884 |
"model.layers.50.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
|
@@ -955,14 +955,14 @@
|
|
955 |
"model.layers.53.self_attn.v_proj.bias": "model-00006-of-00008.safetensors",
|
956 |
"model.layers.53.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
957 |
"model.layers.53.self_attn.v_proj.weight_scale": "model-00006-of-00008.safetensors",
|
958 |
-
"model.layers.54.input_layernorm.weight": "model-
|
959 |
-
"model.layers.54.mlp.down_proj.weight": "model-
|
960 |
-
"model.layers.54.mlp.down_proj.weight_scale": "model-
|
961 |
-
"model.layers.54.mlp.gate_proj.weight": "model-
|
962 |
-
"model.layers.54.mlp.gate_proj.weight_scale": "model-
|
963 |
-
"model.layers.54.mlp.up_proj.weight": "model-
|
964 |
-
"model.layers.54.mlp.up_proj.weight_scale": "model-
|
965 |
-
"model.layers.54.post_attention_layernorm.weight": "model-
|
966 |
"model.layers.54.self_attn.k_proj.bias": "model-00006-of-00008.safetensors",
|
967 |
"model.layers.54.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
968 |
"model.layers.54.self_attn.k_proj.weight_scale": "model-00006-of-00008.safetensors",
|
@@ -977,22 +977,22 @@
|
|
977 |
"model.layers.55.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
978 |
"model.layers.55.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
979 |
"model.layers.55.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
|
980 |
-
"model.layers.55.mlp.gate_proj.weight": "model-
|
981 |
-
"model.layers.55.mlp.gate_proj.weight_scale": "model-
|
982 |
"model.layers.55.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
983 |
"model.layers.55.mlp.up_proj.weight_scale": "model-00007-of-00008.safetensors",
|
984 |
"model.layers.55.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
985 |
-
"model.layers.55.self_attn.k_proj.bias": "model-
|
986 |
-
"model.layers.55.self_attn.k_proj.weight": "model-
|
987 |
-
"model.layers.55.self_attn.k_proj.weight_scale": "model-
|
988 |
-
"model.layers.55.self_attn.o_proj.weight": "model-
|
989 |
-
"model.layers.55.self_attn.o_proj.weight_scale": "model-
|
990 |
-
"model.layers.55.self_attn.q_proj.bias": "model-
|
991 |
-
"model.layers.55.self_attn.q_proj.weight": "model-
|
992 |
-
"model.layers.55.self_attn.q_proj.weight_scale": "model-
|
993 |
-
"model.layers.55.self_attn.v_proj.bias": "model-
|
994 |
-
"model.layers.55.self_attn.v_proj.weight": "model-
|
995 |
-
"model.layers.55.self_attn.v_proj.weight_scale": "model-
|
996 |
"model.layers.56.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
997 |
"model.layers.56.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
998 |
"model.layers.56.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
|
@@ -1224,393 +1224,555 @@
|
|
1224 |
"model.norm.weight": "model-00007-of-00008.safetensors",
|
1225 |
"visual.blocks.0.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1226 |
"visual.blocks.0.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1227 |
"visual.blocks.0.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1228 |
"visual.blocks.0.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1229 |
"visual.blocks.0.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1230 |
"visual.blocks.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1231 |
"visual.blocks.0.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1232 |
"visual.blocks.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1233 |
"visual.blocks.0.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1234 |
"visual.blocks.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1235 |
"visual.blocks.0.norm1.weight": "model-00001-of-00008.safetensors",
|
1236 |
"visual.blocks.0.norm2.weight": "model-00001-of-00008.safetensors",
|
1237 |
"visual.blocks.1.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1238 |
"visual.blocks.1.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1239 |
"visual.blocks.1.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1240 |
"visual.blocks.1.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1241 |
"visual.blocks.1.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1242 |
"visual.blocks.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1243 |
"visual.blocks.1.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1244 |
"visual.blocks.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1245 |
"visual.blocks.1.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1246 |
"visual.blocks.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1247 |
"visual.blocks.1.norm1.weight": "model-00001-of-00008.safetensors",
|
1248 |
"visual.blocks.1.norm2.weight": "model-00001-of-00008.safetensors",
|
1249 |
"visual.blocks.10.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1250 |
"visual.blocks.10.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1251 |
"visual.blocks.10.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1252 |
"visual.blocks.10.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1253 |
"visual.blocks.10.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1254 |
"visual.blocks.10.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1255 |
"visual.blocks.10.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1256 |
"visual.blocks.10.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1257 |
"visual.blocks.10.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1258 |
"visual.blocks.10.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1259 |
"visual.blocks.10.norm1.weight": "model-00001-of-00008.safetensors",
|
1260 |
"visual.blocks.10.norm2.weight": "model-00001-of-00008.safetensors",
|
1261 |
"visual.blocks.11.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1262 |
"visual.blocks.11.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1263 |
"visual.blocks.11.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1264 |
"visual.blocks.11.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1265 |
"visual.blocks.11.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1266 |
"visual.blocks.11.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1267 |
"visual.blocks.11.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1268 |
"visual.blocks.11.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1269 |
"visual.blocks.11.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1270 |
"visual.blocks.11.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1271 |
"visual.blocks.11.norm1.weight": "model-00001-of-00008.safetensors",
|
1272 |
"visual.blocks.11.norm2.weight": "model-00001-of-00008.safetensors",
|
1273 |
"visual.blocks.12.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1274 |
"visual.blocks.12.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1275 |
"visual.blocks.12.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1276 |
"visual.blocks.12.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1277 |
"visual.blocks.12.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1278 |
"visual.blocks.12.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1279 |
"visual.blocks.12.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1280 |
"visual.blocks.12.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1281 |
"visual.blocks.12.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1282 |
"visual.blocks.12.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1283 |
"visual.blocks.12.norm1.weight": "model-00001-of-00008.safetensors",
|
1284 |
"visual.blocks.12.norm2.weight": "model-00001-of-00008.safetensors",
|
1285 |
"visual.blocks.13.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1286 |
"visual.blocks.13.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1287 |
"visual.blocks.13.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1288 |
"visual.blocks.13.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1289 |
"visual.blocks.13.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1290 |
"visual.blocks.13.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1291 |
"visual.blocks.13.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1292 |
"visual.blocks.13.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1293 |
"visual.blocks.13.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1294 |
"visual.blocks.13.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1295 |
"visual.blocks.13.norm1.weight": "model-00001-of-00008.safetensors",
|
1296 |
"visual.blocks.13.norm2.weight": "model-00001-of-00008.safetensors",
|
1297 |
"visual.blocks.14.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1298 |
"visual.blocks.14.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1299 |
"visual.blocks.14.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1300 |
"visual.blocks.14.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1301 |
"visual.blocks.14.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1302 |
"visual.blocks.14.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1303 |
"visual.blocks.14.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1304 |
"visual.blocks.14.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1305 |
"visual.blocks.14.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1306 |
"visual.blocks.14.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1307 |
"visual.blocks.14.norm1.weight": "model-00001-of-00008.safetensors",
|
1308 |
"visual.blocks.14.norm2.weight": "model-00001-of-00008.safetensors",
|
1309 |
"visual.blocks.15.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1310 |
"visual.blocks.15.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1311 |
"visual.blocks.15.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1312 |
"visual.blocks.15.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1313 |
"visual.blocks.15.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1314 |
"visual.blocks.15.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1315 |
"visual.blocks.15.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1316 |
"visual.blocks.15.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1317 |
"visual.blocks.15.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1318 |
"visual.blocks.15.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1319 |
"visual.blocks.15.norm1.weight": "model-00001-of-00008.safetensors",
|
1320 |
"visual.blocks.15.norm2.weight": "model-00001-of-00008.safetensors",
|
1321 |
"visual.blocks.16.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1322 |
"visual.blocks.16.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1323 |
"visual.blocks.16.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1324 |
"visual.blocks.16.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1325 |
"visual.blocks.16.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1326 |
"visual.blocks.16.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1327 |
"visual.blocks.16.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1328 |
"visual.blocks.16.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1329 |
"visual.blocks.16.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1330 |
"visual.blocks.16.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1331 |
"visual.blocks.16.norm1.weight": "model-00001-of-00008.safetensors",
|
1332 |
"visual.blocks.16.norm2.weight": "model-00001-of-00008.safetensors",
|
1333 |
"visual.blocks.17.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1334 |
"visual.blocks.17.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1335 |
"visual.blocks.17.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1336 |
"visual.blocks.17.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1337 |
"visual.blocks.17.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1338 |
"visual.blocks.17.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1339 |
"visual.blocks.17.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1340 |
"visual.blocks.17.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1341 |
"visual.blocks.17.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1342 |
"visual.blocks.17.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1343 |
"visual.blocks.17.norm1.weight": "model-00001-of-00008.safetensors",
|
1344 |
"visual.blocks.17.norm2.weight": "model-00001-of-00008.safetensors",
|
1345 |
"visual.blocks.18.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1346 |
"visual.blocks.18.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1347 |
"visual.blocks.18.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1348 |
"visual.blocks.18.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1349 |
"visual.blocks.18.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1350 |
"visual.blocks.18.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1351 |
"visual.blocks.18.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1352 |
"visual.blocks.18.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1353 |
"visual.blocks.18.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1354 |
"visual.blocks.18.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1355 |
"visual.blocks.18.norm1.weight": "model-00001-of-00008.safetensors",
|
1356 |
"visual.blocks.18.norm2.weight": "model-00001-of-00008.safetensors",
|
1357 |
"visual.blocks.19.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1358 |
"visual.blocks.19.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1359 |
"visual.blocks.19.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1360 |
"visual.blocks.19.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1361 |
"visual.blocks.19.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1362 |
"visual.blocks.19.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1363 |
"visual.blocks.19.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1364 |
"visual.blocks.19.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1365 |
"visual.blocks.19.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1366 |
"visual.blocks.19.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1367 |
"visual.blocks.19.norm1.weight": "model-00001-of-00008.safetensors",
|
1368 |
"visual.blocks.19.norm2.weight": "model-00001-of-00008.safetensors",
|
1369 |
"visual.blocks.2.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1370 |
"visual.blocks.2.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1371 |
"visual.blocks.2.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1372 |
"visual.blocks.2.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1373 |
"visual.blocks.2.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1374 |
"visual.blocks.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1375 |
"visual.blocks.2.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1376 |
"visual.blocks.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1377 |
"visual.blocks.2.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1378 |
"visual.blocks.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1379 |
"visual.blocks.2.norm1.weight": "model-00001-of-00008.safetensors",
|
1380 |
"visual.blocks.2.norm2.weight": "model-00001-of-00008.safetensors",
|
1381 |
"visual.blocks.20.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1382 |
"visual.blocks.20.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1383 |
"visual.blocks.20.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1384 |
"visual.blocks.20.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1385 |
"visual.blocks.20.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1386 |
"visual.blocks.20.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1387 |
"visual.blocks.20.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1388 |
"visual.blocks.20.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1389 |
"visual.blocks.20.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1390 |
"visual.blocks.20.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1391 |
"visual.blocks.20.norm1.weight": "model-00001-of-00008.safetensors",
|
1392 |
"visual.blocks.20.norm2.weight": "model-00001-of-00008.safetensors",
|
1393 |
"visual.blocks.21.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1394 |
"visual.blocks.21.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1395 |
"visual.blocks.21.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1396 |
"visual.blocks.21.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1397 |
"visual.blocks.21.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1398 |
"visual.blocks.21.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1399 |
"visual.blocks.21.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1400 |
"visual.blocks.21.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1401 |
"visual.blocks.21.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1402 |
"visual.blocks.21.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1403 |
"visual.blocks.21.norm1.weight": "model-00001-of-00008.safetensors",
|
1404 |
"visual.blocks.21.norm2.weight": "model-00001-of-00008.safetensors",
|
1405 |
"visual.blocks.22.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1406 |
"visual.blocks.22.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1407 |
"visual.blocks.22.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1408 |
"visual.blocks.22.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1409 |
"visual.blocks.22.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1410 |
"visual.blocks.22.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1411 |
"visual.blocks.22.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1412 |
"visual.blocks.22.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1413 |
"visual.blocks.22.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1414 |
"visual.blocks.22.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1415 |
"visual.blocks.22.norm1.weight": "model-00001-of-00008.safetensors",
|
1416 |
"visual.blocks.22.norm2.weight": "model-00001-of-00008.safetensors",
|
1417 |
"visual.blocks.23.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1418 |
"visual.blocks.23.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1419 |
"visual.blocks.23.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1420 |
"visual.blocks.23.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1421 |
"visual.blocks.23.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1422 |
"visual.blocks.23.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1423 |
"visual.blocks.23.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1424 |
"visual.blocks.23.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1425 |
"visual.blocks.23.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1426 |
"visual.blocks.23.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1427 |
"visual.blocks.23.norm1.weight": "model-00001-of-00008.safetensors",
|
1428 |
"visual.blocks.23.norm2.weight": "model-00001-of-00008.safetensors",
|
1429 |
"visual.blocks.24.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1430 |
"visual.blocks.24.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1431 |
"visual.blocks.24.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1432 |
"visual.blocks.24.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1433 |
"visual.blocks.24.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1434 |
"visual.blocks.24.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1435 |
"visual.blocks.24.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1436 |
"visual.blocks.24.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1437 |
"visual.blocks.24.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1438 |
"visual.blocks.24.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1439 |
"visual.blocks.24.norm1.weight": "model-00001-of-00008.safetensors",
|
1440 |
"visual.blocks.24.norm2.weight": "model-00001-of-00008.safetensors",
|
1441 |
"visual.blocks.25.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1442 |
"visual.blocks.25.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1443 |
"visual.blocks.25.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1444 |
"visual.blocks.25.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1445 |
"visual.blocks.25.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1446 |
"visual.blocks.25.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1447 |
"visual.blocks.25.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1448 |
"visual.blocks.25.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1449 |
"visual.blocks.25.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1450 |
"visual.blocks.25.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1451 |
"visual.blocks.25.norm1.weight": "model-00001-of-00008.safetensors",
|
1452 |
"visual.blocks.25.norm2.weight": "model-00001-of-00008.safetensors",
|
1453 |
"visual.blocks.26.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1454 |
"visual.blocks.26.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1455 |
"visual.blocks.26.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1456 |
"visual.blocks.26.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1457 |
"visual.blocks.26.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1458 |
"visual.blocks.26.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1459 |
"visual.blocks.26.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1460 |
"visual.blocks.26.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1461 |
"visual.blocks.26.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1462 |
"visual.blocks.26.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1463 |
"visual.blocks.26.norm1.weight": "model-00001-of-00008.safetensors",
|
1464 |
"visual.blocks.26.norm2.weight": "model-00001-of-00008.safetensors",
|
1465 |
"visual.blocks.27.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1466 |
"visual.blocks.27.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1467 |
"visual.blocks.27.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1468 |
"visual.blocks.27.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1469 |
"visual.blocks.27.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1470 |
"visual.blocks.27.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1471 |
"visual.blocks.27.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1472 |
"visual.blocks.27.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1473 |
"visual.blocks.27.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1474 |
"visual.blocks.27.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1475 |
"visual.blocks.27.norm1.weight": "model-00001-of-00008.safetensors",
|
1476 |
"visual.blocks.27.norm2.weight": "model-00001-of-00008.safetensors",
|
1477 |
"visual.blocks.28.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1478 |
"visual.blocks.28.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1479 |
"visual.blocks.28.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1480 |
"visual.blocks.28.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1481 |
"visual.blocks.28.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1482 |
"visual.blocks.28.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1483 |
"visual.blocks.28.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1484 |
"visual.blocks.28.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1485 |
"visual.blocks.28.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1486 |
"visual.blocks.28.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1487 |
"visual.blocks.28.norm1.weight": "model-00001-of-00008.safetensors",
|
1488 |
"visual.blocks.28.norm2.weight": "model-00001-of-00008.safetensors",
|
1489 |
"visual.blocks.29.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1490 |
"visual.blocks.29.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1491 |
"visual.blocks.29.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1492 |
"visual.blocks.29.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1493 |
"visual.blocks.29.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1494 |
"visual.blocks.29.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1495 |
"visual.blocks.29.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1496 |
"visual.blocks.29.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1497 |
"visual.blocks.29.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1498 |
"visual.blocks.29.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1499 |
"visual.blocks.29.norm1.weight": "model-00001-of-00008.safetensors",
|
1500 |
"visual.blocks.29.norm2.weight": "model-00001-of-00008.safetensors",
|
1501 |
"visual.blocks.3.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1502 |
"visual.blocks.3.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1503 |
"visual.blocks.3.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1504 |
"visual.blocks.3.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1505 |
"visual.blocks.3.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1506 |
"visual.blocks.3.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1507 |
"visual.blocks.3.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1508 |
"visual.blocks.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1509 |
"visual.blocks.3.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1510 |
"visual.blocks.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1511 |
"visual.blocks.3.norm1.weight": "model-00001-of-00008.safetensors",
|
1512 |
"visual.blocks.3.norm2.weight": "model-00001-of-00008.safetensors",
|
1513 |
"visual.blocks.30.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1514 |
"visual.blocks.30.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1515 |
"visual.blocks.30.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1516 |
"visual.blocks.30.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1517 |
"visual.blocks.30.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1518 |
"visual.blocks.30.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1519 |
"visual.blocks.30.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1520 |
"visual.blocks.30.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1521 |
"visual.blocks.30.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1522 |
"visual.blocks.30.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1523 |
"visual.blocks.30.norm1.weight": "model-00001-of-00008.safetensors",
|
1524 |
"visual.blocks.30.norm2.weight": "model-00001-of-00008.safetensors",
|
1525 |
"visual.blocks.31.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1526 |
"visual.blocks.31.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1527 |
"visual.blocks.31.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1528 |
"visual.blocks.31.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1529 |
"visual.blocks.31.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1530 |
"visual.blocks.31.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1531 |
"visual.blocks.31.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1532 |
"visual.blocks.31.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1533 |
"visual.blocks.31.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1534 |
"visual.blocks.31.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1535 |
"visual.blocks.31.norm1.weight": "model-00001-of-00008.safetensors",
|
1536 |
"visual.blocks.31.norm2.weight": "model-00001-of-00008.safetensors",
|
1537 |
"visual.blocks.4.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1538 |
"visual.blocks.4.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1539 |
"visual.blocks.4.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1540 |
"visual.blocks.4.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1541 |
"visual.blocks.4.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1542 |
"visual.blocks.4.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1543 |
"visual.blocks.4.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1544 |
"visual.blocks.4.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1545 |
"visual.blocks.4.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1546 |
"visual.blocks.4.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1547 |
"visual.blocks.4.norm1.weight": "model-00001-of-00008.safetensors",
|
1548 |
"visual.blocks.4.norm2.weight": "model-00001-of-00008.safetensors",
|
1549 |
"visual.blocks.5.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1550 |
"visual.blocks.5.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1551 |
"visual.blocks.5.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1552 |
"visual.blocks.5.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1553 |
"visual.blocks.5.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1554 |
"visual.blocks.5.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1555 |
"visual.blocks.5.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1556 |
"visual.blocks.5.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1557 |
"visual.blocks.5.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1558 |
"visual.blocks.5.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1559 |
"visual.blocks.5.norm1.weight": "model-00001-of-00008.safetensors",
|
1560 |
"visual.blocks.5.norm2.weight": "model-00001-of-00008.safetensors",
|
1561 |
"visual.blocks.6.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1562 |
"visual.blocks.6.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1563 |
"visual.blocks.6.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1564 |
"visual.blocks.6.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1565 |
"visual.blocks.6.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1566 |
"visual.blocks.6.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1567 |
"visual.blocks.6.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1568 |
"visual.blocks.6.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1569 |
"visual.blocks.6.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1570 |
"visual.blocks.6.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1571 |
"visual.blocks.6.norm1.weight": "model-00001-of-00008.safetensors",
|
1572 |
"visual.blocks.6.norm2.weight": "model-00001-of-00008.safetensors",
|
1573 |
"visual.blocks.7.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1574 |
"visual.blocks.7.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1575 |
"visual.blocks.7.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1576 |
"visual.blocks.7.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1577 |
"visual.blocks.7.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1578 |
"visual.blocks.7.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1579 |
"visual.blocks.7.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1580 |
"visual.blocks.7.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1581 |
"visual.blocks.7.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1582 |
"visual.blocks.7.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1583 |
"visual.blocks.7.norm1.weight": "model-00001-of-00008.safetensors",
|
1584 |
"visual.blocks.7.norm2.weight": "model-00001-of-00008.safetensors",
|
1585 |
"visual.blocks.8.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1586 |
"visual.blocks.8.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1587 |
"visual.blocks.8.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1588 |
"visual.blocks.8.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1589 |
"visual.blocks.8.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1590 |
"visual.blocks.8.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1591 |
"visual.blocks.8.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1592 |
"visual.blocks.8.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1593 |
"visual.blocks.8.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1594 |
"visual.blocks.8.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1595 |
"visual.blocks.8.norm1.weight": "model-00001-of-00008.safetensors",
|
1596 |
"visual.blocks.8.norm2.weight": "model-00001-of-00008.safetensors",
|
1597 |
"visual.blocks.9.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1598 |
"visual.blocks.9.attn.proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1599 |
"visual.blocks.9.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1600 |
"visual.blocks.9.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
|
|
1601 |
"visual.blocks.9.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1602 |
"visual.blocks.9.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1603 |
"visual.blocks.9.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1604 |
"visual.blocks.9.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1605 |
"visual.blocks.9.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1606 |
"visual.blocks.9.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
|
|
1607 |
"visual.blocks.9.norm1.weight": "model-00001-of-00008.safetensors",
|
1608 |
"visual.blocks.9.norm2.weight": "model-00001-of-00008.safetensors",
|
1609 |
"visual.merger.ln_q.weight": "model-00001-of-00008.safetensors",
|
1610 |
"visual.merger.mlp.0.bias": "model-00001-of-00008.safetensors",
|
1611 |
"visual.merger.mlp.0.weight": "model-00001-of-00008.safetensors",
|
|
|
1612 |
"visual.merger.mlp.2.bias": "model-00001-of-00008.safetensors",
|
1613 |
"visual.merger.mlp.2.weight": "model-00001-of-00008.safetensors",
|
|
|
1614 |
"visual.patch_embed.proj.weight": "model-00001-of-00008.safetensors"
|
1615 |
}
|
1616 |
}
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 35023176192
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00008-of-00008.safetensors",
|
|
|
119 |
"model.layers.13.self_attn.v_proj.bias": "model-00002-of-00008.safetensors",
|
120 |
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
121 |
"model.layers.13.self_attn.v_proj.weight_scale": "model-00002-of-00008.safetensors",
|
122 |
+
"model.layers.14.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
123 |
+
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
124 |
+
"model.layers.14.mlp.down_proj.weight_scale": "model-00002-of-00008.safetensors",
|
125 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
126 |
+
"model.layers.14.mlp.gate_proj.weight_scale": "model-00002-of-00008.safetensors",
|
127 |
+
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
128 |
+
"model.layers.14.mlp.up_proj.weight_scale": "model-00002-of-00008.safetensors",
|
129 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
130 |
"model.layers.14.self_attn.k_proj.bias": "model-00002-of-00008.safetensors",
|
131 |
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
132 |
"model.layers.14.self_attn.k_proj.weight_scale": "model-00002-of-00008.safetensors",
|
|
|
141 |
"model.layers.15.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
142 |
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
143 |
"model.layers.15.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
|
144 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
145 |
+
"model.layers.15.mlp.gate_proj.weight_scale": "model-00002-of-00008.safetensors",
|
146 |
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
147 |
"model.layers.15.mlp.up_proj.weight_scale": "model-00003-of-00008.safetensors",
|
148 |
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
149 |
+
"model.layers.15.self_attn.k_proj.bias": "model-00002-of-00008.safetensors",
|
150 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
151 |
+
"model.layers.15.self_attn.k_proj.weight_scale": "model-00002-of-00008.safetensors",
|
152 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
153 |
+
"model.layers.15.self_attn.o_proj.weight_scale": "model-00002-of-00008.safetensors",
|
154 |
+
"model.layers.15.self_attn.q_proj.bias": "model-00002-of-00008.safetensors",
|
155 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
156 |
+
"model.layers.15.self_attn.q_proj.weight_scale": "model-00002-of-00008.safetensors",
|
157 |
+
"model.layers.15.self_attn.v_proj.bias": "model-00002-of-00008.safetensors",
|
158 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
159 |
+
"model.layers.15.self_attn.v_proj.weight_scale": "model-00002-of-00008.safetensors",
|
160 |
"model.layers.16.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
161 |
"model.layers.16.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
162 |
"model.layers.16.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
|
|
|
328 |
"model.layers.23.self_attn.v_proj.bias": "model-00003-of-00008.safetensors",
|
329 |
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
330 |
"model.layers.23.self_attn.v_proj.weight_scale": "model-00003-of-00008.safetensors",
|
331 |
+
"model.layers.24.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
332 |
+
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
333 |
+
"model.layers.24.mlp.down_proj.weight_scale": "model-00003-of-00008.safetensors",
|
334 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
335 |
+
"model.layers.24.mlp.gate_proj.weight_scale": "model-00003-of-00008.safetensors",
|
336 |
+
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
337 |
+
"model.layers.24.mlp.up_proj.weight_scale": "model-00003-of-00008.safetensors",
|
338 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
339 |
"model.layers.24.self_attn.k_proj.bias": "model-00003-of-00008.safetensors",
|
340 |
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
341 |
"model.layers.24.self_attn.k_proj.weight_scale": "model-00003-of-00008.safetensors",
|
|
|
350 |
"model.layers.25.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
351 |
"model.layers.25.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
352 |
"model.layers.25.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
|
353 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
354 |
+
"model.layers.25.mlp.gate_proj.weight_scale": "model-00003-of-00008.safetensors",
|
355 |
"model.layers.25.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
356 |
"model.layers.25.mlp.up_proj.weight_scale": "model-00004-of-00008.safetensors",
|
357 |
"model.layers.25.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
358 |
+
"model.layers.25.self_attn.k_proj.bias": "model-00003-of-00008.safetensors",
|
359 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
360 |
+
"model.layers.25.self_attn.k_proj.weight_scale": "model-00003-of-00008.safetensors",
|
361 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
362 |
+
"model.layers.25.self_attn.o_proj.weight_scale": "model-00003-of-00008.safetensors",
|
363 |
+
"model.layers.25.self_attn.q_proj.bias": "model-00003-of-00008.safetensors",
|
364 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
365 |
+
"model.layers.25.self_attn.q_proj.weight_scale": "model-00003-of-00008.safetensors",
|
366 |
+
"model.layers.25.self_attn.v_proj.bias": "model-00003-of-00008.safetensors",
|
367 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
368 |
+
"model.layers.25.self_attn.v_proj.weight_scale": "model-00003-of-00008.safetensors",
|
369 |
"model.layers.26.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
370 |
"model.layers.26.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
371 |
"model.layers.26.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
|
|
|
537 |
"model.layers.33.self_attn.v_proj.bias": "model-00004-of-00008.safetensors",
|
538 |
"model.layers.33.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
539 |
"model.layers.33.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
|
540 |
+
"model.layers.34.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
541 |
+
"model.layers.34.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
542 |
+
"model.layers.34.mlp.down_proj.weight_scale": "model-00004-of-00008.safetensors",
|
543 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
544 |
+
"model.layers.34.mlp.gate_proj.weight_scale": "model-00004-of-00008.safetensors",
|
545 |
+
"model.layers.34.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
546 |
+
"model.layers.34.mlp.up_proj.weight_scale": "model-00004-of-00008.safetensors",
|
547 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
548 |
"model.layers.34.self_attn.k_proj.bias": "model-00004-of-00008.safetensors",
|
549 |
"model.layers.34.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
550 |
"model.layers.34.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
|
|
|
559 |
"model.layers.35.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
560 |
"model.layers.35.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
561 |
"model.layers.35.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
|
562 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
563 |
+
"model.layers.35.mlp.gate_proj.weight_scale": "model-00004-of-00008.safetensors",
|
564 |
"model.layers.35.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
565 |
"model.layers.35.mlp.up_proj.weight_scale": "model-00005-of-00008.safetensors",
|
566 |
"model.layers.35.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
567 |
+
"model.layers.35.self_attn.k_proj.bias": "model-00004-of-00008.safetensors",
|
568 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
569 |
+
"model.layers.35.self_attn.k_proj.weight_scale": "model-00004-of-00008.safetensors",
|
570 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
571 |
+
"model.layers.35.self_attn.o_proj.weight_scale": "model-00004-of-00008.safetensors",
|
572 |
+
"model.layers.35.self_attn.q_proj.bias": "model-00004-of-00008.safetensors",
|
573 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
574 |
+
"model.layers.35.self_attn.q_proj.weight_scale": "model-00004-of-00008.safetensors",
|
575 |
+
"model.layers.35.self_attn.v_proj.bias": "model-00004-of-00008.safetensors",
|
576 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
577 |
+
"model.layers.35.self_attn.v_proj.weight_scale": "model-00004-of-00008.safetensors",
|
578 |
"model.layers.36.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
579 |
"model.layers.36.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
580 |
"model.layers.36.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
|
|
|
651 |
"model.layers.39.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
|
652 |
"model.layers.39.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
653 |
"model.layers.39.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
|
654 |
+
"model.layers.4.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
655 |
+
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
656 |
+
"model.layers.4.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
657 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
658 |
+
"model.layers.4.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
659 |
+
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
660 |
+
"model.layers.4.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
661 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
662 |
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00008.safetensors",
|
663 |
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
664 |
"model.layers.4.self_attn.k_proj.weight_scale": "model-00001-of-00008.safetensors",
|
|
|
746 |
"model.layers.43.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
|
747 |
"model.layers.43.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
748 |
"model.layers.43.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
|
749 |
+
"model.layers.44.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
750 |
+
"model.layers.44.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
751 |
+
"model.layers.44.mlp.down_proj.weight_scale": "model-00005-of-00008.safetensors",
|
752 |
+
"model.layers.44.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
753 |
+
"model.layers.44.mlp.gate_proj.weight_scale": "model-00005-of-00008.safetensors",
|
754 |
+
"model.layers.44.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
755 |
+
"model.layers.44.mlp.up_proj.weight_scale": "model-00005-of-00008.safetensors",
|
756 |
+
"model.layers.44.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
757 |
"model.layers.44.self_attn.k_proj.bias": "model-00005-of-00008.safetensors",
|
758 |
"model.layers.44.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
759 |
"model.layers.44.self_attn.k_proj.weight_scale": "model-00005-of-00008.safetensors",
|
|
|
768 |
"model.layers.45.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
769 |
"model.layers.45.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
770 |
"model.layers.45.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
|
771 |
+
"model.layers.45.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
772 |
+
"model.layers.45.mlp.gate_proj.weight_scale": "model-00005-of-00008.safetensors",
|
773 |
"model.layers.45.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
774 |
"model.layers.45.mlp.up_proj.weight_scale": "model-00006-of-00008.safetensors",
|
775 |
"model.layers.45.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
776 |
+
"model.layers.45.self_attn.k_proj.bias": "model-00005-of-00008.safetensors",
|
777 |
+
"model.layers.45.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
778 |
+
"model.layers.45.self_attn.k_proj.weight_scale": "model-00005-of-00008.safetensors",
|
779 |
+
"model.layers.45.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
780 |
+
"model.layers.45.self_attn.o_proj.weight_scale": "model-00005-of-00008.safetensors",
|
781 |
+
"model.layers.45.self_attn.q_proj.bias": "model-00005-of-00008.safetensors",
|
782 |
+
"model.layers.45.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
783 |
+
"model.layers.45.self_attn.q_proj.weight_scale": "model-00005-of-00008.safetensors",
|
784 |
+
"model.layers.45.self_attn.v_proj.bias": "model-00005-of-00008.safetensors",
|
785 |
+
"model.layers.45.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
786 |
+
"model.layers.45.self_attn.v_proj.weight_scale": "model-00005-of-00008.safetensors",
|
787 |
"model.layers.46.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
788 |
"model.layers.46.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
789 |
"model.layers.46.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
|
|
|
863 |
"model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
864 |
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
865 |
"model.layers.5.mlp.down_proj.weight_scale": "model-00002-of-00008.safetensors",
|
866 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
867 |
+
"model.layers.5.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
868 |
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
869 |
"model.layers.5.mlp.up_proj.weight_scale": "model-00002-of-00008.safetensors",
|
870 |
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
871 |
+
"model.layers.5.self_attn.k_proj.bias": "model-00001-of-00008.safetensors",
|
872 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
873 |
+
"model.layers.5.self_attn.k_proj.weight_scale": "model-00001-of-00008.safetensors",
|
874 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
875 |
+
"model.layers.5.self_attn.o_proj.weight_scale": "model-00001-of-00008.safetensors",
|
876 |
+
"model.layers.5.self_attn.q_proj.bias": "model-00001-of-00008.safetensors",
|
877 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
878 |
+
"model.layers.5.self_attn.q_proj.weight_scale": "model-00001-of-00008.safetensors",
|
879 |
+
"model.layers.5.self_attn.v_proj.bias": "model-00001-of-00008.safetensors",
|
880 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
881 |
+
"model.layers.5.self_attn.v_proj.weight_scale": "model-00001-of-00008.safetensors",
|
882 |
"model.layers.50.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
883 |
"model.layers.50.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
884 |
"model.layers.50.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
|
|
|
955 |
"model.layers.53.self_attn.v_proj.bias": "model-00006-of-00008.safetensors",
|
956 |
"model.layers.53.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
957 |
"model.layers.53.self_attn.v_proj.weight_scale": "model-00006-of-00008.safetensors",
|
958 |
+
"model.layers.54.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
959 |
+
"model.layers.54.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
960 |
+
"model.layers.54.mlp.down_proj.weight_scale": "model-00006-of-00008.safetensors",
|
961 |
+
"model.layers.54.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
962 |
+
"model.layers.54.mlp.gate_proj.weight_scale": "model-00006-of-00008.safetensors",
|
963 |
+
"model.layers.54.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
964 |
+
"model.layers.54.mlp.up_proj.weight_scale": "model-00006-of-00008.safetensors",
|
965 |
+
"model.layers.54.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
966 |
"model.layers.54.self_attn.k_proj.bias": "model-00006-of-00008.safetensors",
|
967 |
"model.layers.54.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
968 |
"model.layers.54.self_attn.k_proj.weight_scale": "model-00006-of-00008.safetensors",
|
|
|
977 |
"model.layers.55.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
978 |
"model.layers.55.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
979 |
"model.layers.55.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
|
980 |
+
"model.layers.55.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
981 |
+
"model.layers.55.mlp.gate_proj.weight_scale": "model-00006-of-00008.safetensors",
|
982 |
"model.layers.55.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
983 |
"model.layers.55.mlp.up_proj.weight_scale": "model-00007-of-00008.safetensors",
|
984 |
"model.layers.55.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
985 |
+
"model.layers.55.self_attn.k_proj.bias": "model-00006-of-00008.safetensors",
|
986 |
+
"model.layers.55.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
987 |
+
"model.layers.55.self_attn.k_proj.weight_scale": "model-00006-of-00008.safetensors",
|
988 |
+
"model.layers.55.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
989 |
+
"model.layers.55.self_attn.o_proj.weight_scale": "model-00006-of-00008.safetensors",
|
990 |
+
"model.layers.55.self_attn.q_proj.bias": "model-00006-of-00008.safetensors",
|
991 |
+
"model.layers.55.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
992 |
+
"model.layers.55.self_attn.q_proj.weight_scale": "model-00006-of-00008.safetensors",
|
993 |
+
"model.layers.55.self_attn.v_proj.bias": "model-00006-of-00008.safetensors",
|
994 |
+
"model.layers.55.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
995 |
+
"model.layers.55.self_attn.v_proj.weight_scale": "model-00006-of-00008.safetensors",
|
996 |
"model.layers.56.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
997 |
"model.layers.56.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
998 |
"model.layers.56.mlp.down_proj.weight_scale": "model-00007-of-00008.safetensors",
|
|
|
1224 |
"model.norm.weight": "model-00007-of-00008.safetensors",
|
1225 |
"visual.blocks.0.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1226 |
"visual.blocks.0.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1227 |
+
"visual.blocks.0.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1228 |
"visual.blocks.0.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1229 |
"visual.blocks.0.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1230 |
+
"visual.blocks.0.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1231 |
"visual.blocks.0.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1232 |
"visual.blocks.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1233 |
+
"visual.blocks.0.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1234 |
"visual.blocks.0.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1235 |
"visual.blocks.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1236 |
+
"visual.blocks.0.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1237 |
"visual.blocks.0.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1238 |
"visual.blocks.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1239 |
+
"visual.blocks.0.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1240 |
"visual.blocks.0.norm1.weight": "model-00001-of-00008.safetensors",
|
1241 |
"visual.blocks.0.norm2.weight": "model-00001-of-00008.safetensors",
|
1242 |
"visual.blocks.1.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1243 |
"visual.blocks.1.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1244 |
+
"visual.blocks.1.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1245 |
"visual.blocks.1.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1246 |
"visual.blocks.1.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1247 |
+
"visual.blocks.1.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1248 |
"visual.blocks.1.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1249 |
"visual.blocks.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1250 |
+
"visual.blocks.1.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1251 |
"visual.blocks.1.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1252 |
"visual.blocks.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1253 |
+
"visual.blocks.1.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1254 |
"visual.blocks.1.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1255 |
"visual.blocks.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1256 |
+
"visual.blocks.1.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1257 |
"visual.blocks.1.norm1.weight": "model-00001-of-00008.safetensors",
|
1258 |
"visual.blocks.1.norm2.weight": "model-00001-of-00008.safetensors",
|
1259 |
"visual.blocks.10.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1260 |
"visual.blocks.10.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1261 |
+
"visual.blocks.10.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1262 |
"visual.blocks.10.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1263 |
"visual.blocks.10.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1264 |
+
"visual.blocks.10.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1265 |
"visual.blocks.10.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1266 |
"visual.blocks.10.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1267 |
+
"visual.blocks.10.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1268 |
"visual.blocks.10.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1269 |
"visual.blocks.10.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1270 |
+
"visual.blocks.10.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1271 |
"visual.blocks.10.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1272 |
"visual.blocks.10.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1273 |
+
"visual.blocks.10.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1274 |
"visual.blocks.10.norm1.weight": "model-00001-of-00008.safetensors",
|
1275 |
"visual.blocks.10.norm2.weight": "model-00001-of-00008.safetensors",
|
1276 |
"visual.blocks.11.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1277 |
"visual.blocks.11.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1278 |
+
"visual.blocks.11.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1279 |
"visual.blocks.11.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1280 |
"visual.blocks.11.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1281 |
+
"visual.blocks.11.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1282 |
"visual.blocks.11.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1283 |
"visual.blocks.11.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1284 |
+
"visual.blocks.11.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1285 |
"visual.blocks.11.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1286 |
"visual.blocks.11.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1287 |
+
"visual.blocks.11.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1288 |
"visual.blocks.11.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1289 |
"visual.blocks.11.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1290 |
+
"visual.blocks.11.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1291 |
"visual.blocks.11.norm1.weight": "model-00001-of-00008.safetensors",
|
1292 |
"visual.blocks.11.norm2.weight": "model-00001-of-00008.safetensors",
|
1293 |
"visual.blocks.12.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1294 |
"visual.blocks.12.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1295 |
+
"visual.blocks.12.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1296 |
"visual.blocks.12.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1297 |
"visual.blocks.12.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1298 |
+
"visual.blocks.12.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1299 |
"visual.blocks.12.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1300 |
"visual.blocks.12.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1301 |
+
"visual.blocks.12.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1302 |
"visual.blocks.12.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1303 |
"visual.blocks.12.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1304 |
+
"visual.blocks.12.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1305 |
"visual.blocks.12.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1306 |
"visual.blocks.12.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1307 |
+
"visual.blocks.12.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1308 |
"visual.blocks.12.norm1.weight": "model-00001-of-00008.safetensors",
|
1309 |
"visual.blocks.12.norm2.weight": "model-00001-of-00008.safetensors",
|
1310 |
"visual.blocks.13.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1311 |
"visual.blocks.13.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1312 |
+
"visual.blocks.13.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1313 |
"visual.blocks.13.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1314 |
"visual.blocks.13.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1315 |
+
"visual.blocks.13.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1316 |
"visual.blocks.13.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1317 |
"visual.blocks.13.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1318 |
+
"visual.blocks.13.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1319 |
"visual.blocks.13.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1320 |
"visual.blocks.13.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1321 |
+
"visual.blocks.13.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1322 |
"visual.blocks.13.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1323 |
"visual.blocks.13.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1324 |
+
"visual.blocks.13.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1325 |
"visual.blocks.13.norm1.weight": "model-00001-of-00008.safetensors",
|
1326 |
"visual.blocks.13.norm2.weight": "model-00001-of-00008.safetensors",
|
1327 |
"visual.blocks.14.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1328 |
"visual.blocks.14.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1329 |
+
"visual.blocks.14.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1330 |
"visual.blocks.14.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1331 |
"visual.blocks.14.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1332 |
+
"visual.blocks.14.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1333 |
"visual.blocks.14.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1334 |
"visual.blocks.14.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1335 |
+
"visual.blocks.14.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1336 |
"visual.blocks.14.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1337 |
"visual.blocks.14.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1338 |
+
"visual.blocks.14.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1339 |
"visual.blocks.14.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1340 |
"visual.blocks.14.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1341 |
+
"visual.blocks.14.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1342 |
"visual.blocks.14.norm1.weight": "model-00001-of-00008.safetensors",
|
1343 |
"visual.blocks.14.norm2.weight": "model-00001-of-00008.safetensors",
|
1344 |
"visual.blocks.15.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1345 |
"visual.blocks.15.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1346 |
+
"visual.blocks.15.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1347 |
"visual.blocks.15.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1348 |
"visual.blocks.15.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1349 |
+
"visual.blocks.15.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1350 |
"visual.blocks.15.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1351 |
"visual.blocks.15.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1352 |
+
"visual.blocks.15.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1353 |
"visual.blocks.15.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1354 |
"visual.blocks.15.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1355 |
+
"visual.blocks.15.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1356 |
"visual.blocks.15.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1357 |
"visual.blocks.15.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1358 |
+
"visual.blocks.15.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1359 |
"visual.blocks.15.norm1.weight": "model-00001-of-00008.safetensors",
|
1360 |
"visual.blocks.15.norm2.weight": "model-00001-of-00008.safetensors",
|
1361 |
"visual.blocks.16.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1362 |
"visual.blocks.16.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1363 |
+
"visual.blocks.16.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1364 |
"visual.blocks.16.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1365 |
"visual.blocks.16.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1366 |
+
"visual.blocks.16.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1367 |
"visual.blocks.16.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1368 |
"visual.blocks.16.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1369 |
+
"visual.blocks.16.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1370 |
"visual.blocks.16.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1371 |
"visual.blocks.16.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1372 |
+
"visual.blocks.16.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1373 |
"visual.blocks.16.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1374 |
"visual.blocks.16.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1375 |
+
"visual.blocks.16.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1376 |
"visual.blocks.16.norm1.weight": "model-00001-of-00008.safetensors",
|
1377 |
"visual.blocks.16.norm2.weight": "model-00001-of-00008.safetensors",
|
1378 |
"visual.blocks.17.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1379 |
"visual.blocks.17.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1380 |
+
"visual.blocks.17.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1381 |
"visual.blocks.17.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1382 |
"visual.blocks.17.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1383 |
+
"visual.blocks.17.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1384 |
"visual.blocks.17.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1385 |
"visual.blocks.17.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1386 |
+
"visual.blocks.17.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1387 |
"visual.blocks.17.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1388 |
"visual.blocks.17.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1389 |
+
"visual.blocks.17.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1390 |
"visual.blocks.17.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1391 |
"visual.blocks.17.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1392 |
+
"visual.blocks.17.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1393 |
"visual.blocks.17.norm1.weight": "model-00001-of-00008.safetensors",
|
1394 |
"visual.blocks.17.norm2.weight": "model-00001-of-00008.safetensors",
|
1395 |
"visual.blocks.18.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1396 |
"visual.blocks.18.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1397 |
+
"visual.blocks.18.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1398 |
"visual.blocks.18.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1399 |
"visual.blocks.18.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1400 |
+
"visual.blocks.18.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1401 |
"visual.blocks.18.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1402 |
"visual.blocks.18.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1403 |
+
"visual.blocks.18.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1404 |
"visual.blocks.18.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1405 |
"visual.blocks.18.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1406 |
+
"visual.blocks.18.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1407 |
"visual.blocks.18.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1408 |
"visual.blocks.18.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1409 |
+
"visual.blocks.18.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1410 |
"visual.blocks.18.norm1.weight": "model-00001-of-00008.safetensors",
|
1411 |
"visual.blocks.18.norm2.weight": "model-00001-of-00008.safetensors",
|
1412 |
"visual.blocks.19.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1413 |
"visual.blocks.19.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1414 |
+
"visual.blocks.19.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1415 |
"visual.blocks.19.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1416 |
"visual.blocks.19.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1417 |
+
"visual.blocks.19.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1418 |
"visual.blocks.19.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1419 |
"visual.blocks.19.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1420 |
+
"visual.blocks.19.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1421 |
"visual.blocks.19.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1422 |
"visual.blocks.19.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1423 |
+
"visual.blocks.19.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1424 |
"visual.blocks.19.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1425 |
"visual.blocks.19.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1426 |
+
"visual.blocks.19.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1427 |
"visual.blocks.19.norm1.weight": "model-00001-of-00008.safetensors",
|
1428 |
"visual.blocks.19.norm2.weight": "model-00001-of-00008.safetensors",
|
1429 |
"visual.blocks.2.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1430 |
"visual.blocks.2.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1431 |
+
"visual.blocks.2.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1432 |
"visual.blocks.2.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1433 |
"visual.blocks.2.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1434 |
+
"visual.blocks.2.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1435 |
"visual.blocks.2.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1436 |
"visual.blocks.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1437 |
+
"visual.blocks.2.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1438 |
"visual.blocks.2.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1439 |
"visual.blocks.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1440 |
+
"visual.blocks.2.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1441 |
"visual.blocks.2.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1442 |
"visual.blocks.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1443 |
+
"visual.blocks.2.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1444 |
"visual.blocks.2.norm1.weight": "model-00001-of-00008.safetensors",
|
1445 |
"visual.blocks.2.norm2.weight": "model-00001-of-00008.safetensors",
|
1446 |
"visual.blocks.20.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1447 |
"visual.blocks.20.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1448 |
+
"visual.blocks.20.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1449 |
"visual.blocks.20.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1450 |
"visual.blocks.20.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1451 |
+
"visual.blocks.20.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1452 |
"visual.blocks.20.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1453 |
"visual.blocks.20.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1454 |
+
"visual.blocks.20.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1455 |
"visual.blocks.20.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1456 |
"visual.blocks.20.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1457 |
+
"visual.blocks.20.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1458 |
"visual.blocks.20.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1459 |
"visual.blocks.20.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1460 |
+
"visual.blocks.20.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1461 |
"visual.blocks.20.norm1.weight": "model-00001-of-00008.safetensors",
|
1462 |
"visual.blocks.20.norm2.weight": "model-00001-of-00008.safetensors",
|
1463 |
"visual.blocks.21.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1464 |
"visual.blocks.21.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1465 |
+
"visual.blocks.21.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1466 |
"visual.blocks.21.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1467 |
"visual.blocks.21.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1468 |
+
"visual.blocks.21.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1469 |
"visual.blocks.21.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1470 |
"visual.blocks.21.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1471 |
+
"visual.blocks.21.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1472 |
"visual.blocks.21.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1473 |
"visual.blocks.21.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1474 |
+
"visual.blocks.21.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1475 |
"visual.blocks.21.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1476 |
"visual.blocks.21.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1477 |
+
"visual.blocks.21.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1478 |
"visual.blocks.21.norm1.weight": "model-00001-of-00008.safetensors",
|
1479 |
"visual.blocks.21.norm2.weight": "model-00001-of-00008.safetensors",
|
1480 |
"visual.blocks.22.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1481 |
"visual.blocks.22.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1482 |
+
"visual.blocks.22.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1483 |
"visual.blocks.22.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1484 |
"visual.blocks.22.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1485 |
+
"visual.blocks.22.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1486 |
"visual.blocks.22.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1487 |
"visual.blocks.22.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1488 |
+
"visual.blocks.22.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1489 |
"visual.blocks.22.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1490 |
"visual.blocks.22.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1491 |
+
"visual.blocks.22.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1492 |
"visual.blocks.22.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1493 |
"visual.blocks.22.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1494 |
+
"visual.blocks.22.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1495 |
"visual.blocks.22.norm1.weight": "model-00001-of-00008.safetensors",
|
1496 |
"visual.blocks.22.norm2.weight": "model-00001-of-00008.safetensors",
|
1497 |
"visual.blocks.23.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1498 |
"visual.blocks.23.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1499 |
+
"visual.blocks.23.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1500 |
"visual.blocks.23.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1501 |
"visual.blocks.23.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1502 |
+
"visual.blocks.23.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1503 |
"visual.blocks.23.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1504 |
"visual.blocks.23.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1505 |
+
"visual.blocks.23.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1506 |
"visual.blocks.23.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1507 |
"visual.blocks.23.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1508 |
+
"visual.blocks.23.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1509 |
"visual.blocks.23.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1510 |
"visual.blocks.23.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1511 |
+
"visual.blocks.23.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1512 |
"visual.blocks.23.norm1.weight": "model-00001-of-00008.safetensors",
|
1513 |
"visual.blocks.23.norm2.weight": "model-00001-of-00008.safetensors",
|
1514 |
"visual.blocks.24.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1515 |
"visual.blocks.24.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1516 |
+
"visual.blocks.24.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1517 |
"visual.blocks.24.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1518 |
"visual.blocks.24.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1519 |
+
"visual.blocks.24.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1520 |
"visual.blocks.24.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1521 |
"visual.blocks.24.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1522 |
+
"visual.blocks.24.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1523 |
"visual.blocks.24.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1524 |
"visual.blocks.24.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1525 |
+
"visual.blocks.24.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1526 |
"visual.blocks.24.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1527 |
"visual.blocks.24.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1528 |
+
"visual.blocks.24.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1529 |
"visual.blocks.24.norm1.weight": "model-00001-of-00008.safetensors",
|
1530 |
"visual.blocks.24.norm2.weight": "model-00001-of-00008.safetensors",
|
1531 |
"visual.blocks.25.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1532 |
"visual.blocks.25.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1533 |
+
"visual.blocks.25.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1534 |
"visual.blocks.25.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1535 |
"visual.blocks.25.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1536 |
+
"visual.blocks.25.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1537 |
"visual.blocks.25.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1538 |
"visual.blocks.25.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1539 |
+
"visual.blocks.25.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1540 |
"visual.blocks.25.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1541 |
"visual.blocks.25.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1542 |
+
"visual.blocks.25.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1543 |
"visual.blocks.25.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1544 |
"visual.blocks.25.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1545 |
+
"visual.blocks.25.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1546 |
"visual.blocks.25.norm1.weight": "model-00001-of-00008.safetensors",
|
1547 |
"visual.blocks.25.norm2.weight": "model-00001-of-00008.safetensors",
|
1548 |
"visual.blocks.26.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1549 |
"visual.blocks.26.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1550 |
+
"visual.blocks.26.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1551 |
"visual.blocks.26.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1552 |
"visual.blocks.26.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1553 |
+
"visual.blocks.26.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1554 |
"visual.blocks.26.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1555 |
"visual.blocks.26.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1556 |
+
"visual.blocks.26.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1557 |
"visual.blocks.26.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1558 |
"visual.blocks.26.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1559 |
+
"visual.blocks.26.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1560 |
"visual.blocks.26.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1561 |
"visual.blocks.26.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1562 |
+
"visual.blocks.26.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1563 |
"visual.blocks.26.norm1.weight": "model-00001-of-00008.safetensors",
|
1564 |
"visual.blocks.26.norm2.weight": "model-00001-of-00008.safetensors",
|
1565 |
"visual.blocks.27.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1566 |
"visual.blocks.27.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1567 |
+
"visual.blocks.27.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1568 |
"visual.blocks.27.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1569 |
"visual.blocks.27.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1570 |
+
"visual.blocks.27.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1571 |
"visual.blocks.27.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1572 |
"visual.blocks.27.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1573 |
+
"visual.blocks.27.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1574 |
"visual.blocks.27.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1575 |
"visual.blocks.27.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1576 |
+
"visual.blocks.27.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1577 |
"visual.blocks.27.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1578 |
"visual.blocks.27.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1579 |
+
"visual.blocks.27.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1580 |
"visual.blocks.27.norm1.weight": "model-00001-of-00008.safetensors",
|
1581 |
"visual.blocks.27.norm2.weight": "model-00001-of-00008.safetensors",
|
1582 |
"visual.blocks.28.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1583 |
"visual.blocks.28.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1584 |
+
"visual.blocks.28.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1585 |
"visual.blocks.28.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1586 |
"visual.blocks.28.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1587 |
+
"visual.blocks.28.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1588 |
"visual.blocks.28.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1589 |
"visual.blocks.28.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1590 |
+
"visual.blocks.28.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1591 |
"visual.blocks.28.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1592 |
"visual.blocks.28.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1593 |
+
"visual.blocks.28.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1594 |
"visual.blocks.28.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1595 |
"visual.blocks.28.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1596 |
+
"visual.blocks.28.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1597 |
"visual.blocks.28.norm1.weight": "model-00001-of-00008.safetensors",
|
1598 |
"visual.blocks.28.norm2.weight": "model-00001-of-00008.safetensors",
|
1599 |
"visual.blocks.29.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1600 |
"visual.blocks.29.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1601 |
+
"visual.blocks.29.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1602 |
"visual.blocks.29.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1603 |
"visual.blocks.29.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1604 |
+
"visual.blocks.29.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1605 |
"visual.blocks.29.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1606 |
"visual.blocks.29.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1607 |
+
"visual.blocks.29.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1608 |
"visual.blocks.29.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1609 |
"visual.blocks.29.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1610 |
+
"visual.blocks.29.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1611 |
"visual.blocks.29.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1612 |
"visual.blocks.29.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1613 |
+
"visual.blocks.29.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1614 |
"visual.blocks.29.norm1.weight": "model-00001-of-00008.safetensors",
|
1615 |
"visual.blocks.29.norm2.weight": "model-00001-of-00008.safetensors",
|
1616 |
"visual.blocks.3.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1617 |
"visual.blocks.3.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1618 |
+
"visual.blocks.3.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1619 |
"visual.blocks.3.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1620 |
"visual.blocks.3.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1621 |
+
"visual.blocks.3.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1622 |
"visual.blocks.3.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1623 |
"visual.blocks.3.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1624 |
+
"visual.blocks.3.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1625 |
"visual.blocks.3.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1626 |
"visual.blocks.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1627 |
+
"visual.blocks.3.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1628 |
"visual.blocks.3.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1629 |
"visual.blocks.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1630 |
+
"visual.blocks.3.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1631 |
"visual.blocks.3.norm1.weight": "model-00001-of-00008.safetensors",
|
1632 |
"visual.blocks.3.norm2.weight": "model-00001-of-00008.safetensors",
|
1633 |
"visual.blocks.30.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1634 |
"visual.blocks.30.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1635 |
+
"visual.blocks.30.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1636 |
"visual.blocks.30.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1637 |
"visual.blocks.30.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1638 |
+
"visual.blocks.30.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1639 |
"visual.blocks.30.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1640 |
"visual.blocks.30.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1641 |
+
"visual.blocks.30.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1642 |
"visual.blocks.30.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1643 |
"visual.blocks.30.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1644 |
+
"visual.blocks.30.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1645 |
"visual.blocks.30.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1646 |
"visual.blocks.30.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1647 |
+
"visual.blocks.30.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1648 |
"visual.blocks.30.norm1.weight": "model-00001-of-00008.safetensors",
|
1649 |
"visual.blocks.30.norm2.weight": "model-00001-of-00008.safetensors",
|
1650 |
"visual.blocks.31.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1651 |
"visual.blocks.31.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1652 |
+
"visual.blocks.31.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1653 |
"visual.blocks.31.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1654 |
"visual.blocks.31.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1655 |
+
"visual.blocks.31.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1656 |
"visual.blocks.31.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1657 |
"visual.blocks.31.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1658 |
+
"visual.blocks.31.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1659 |
"visual.blocks.31.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1660 |
"visual.blocks.31.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1661 |
+
"visual.blocks.31.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1662 |
"visual.blocks.31.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1663 |
"visual.blocks.31.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1664 |
+
"visual.blocks.31.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1665 |
"visual.blocks.31.norm1.weight": "model-00001-of-00008.safetensors",
|
1666 |
"visual.blocks.31.norm2.weight": "model-00001-of-00008.safetensors",
|
1667 |
"visual.blocks.4.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1668 |
"visual.blocks.4.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1669 |
+
"visual.blocks.4.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1670 |
"visual.blocks.4.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1671 |
"visual.blocks.4.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1672 |
+
"visual.blocks.4.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1673 |
"visual.blocks.4.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1674 |
"visual.blocks.4.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1675 |
+
"visual.blocks.4.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1676 |
"visual.blocks.4.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1677 |
"visual.blocks.4.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1678 |
+
"visual.blocks.4.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1679 |
"visual.blocks.4.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1680 |
"visual.blocks.4.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1681 |
+
"visual.blocks.4.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1682 |
"visual.blocks.4.norm1.weight": "model-00001-of-00008.safetensors",
|
1683 |
"visual.blocks.4.norm2.weight": "model-00001-of-00008.safetensors",
|
1684 |
"visual.blocks.5.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1685 |
"visual.blocks.5.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1686 |
+
"visual.blocks.5.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1687 |
"visual.blocks.5.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1688 |
"visual.blocks.5.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1689 |
+
"visual.blocks.5.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1690 |
"visual.blocks.5.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1691 |
"visual.blocks.5.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1692 |
+
"visual.blocks.5.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1693 |
"visual.blocks.5.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1694 |
"visual.blocks.5.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1695 |
+
"visual.blocks.5.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1696 |
"visual.blocks.5.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1697 |
"visual.blocks.5.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1698 |
+
"visual.blocks.5.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1699 |
"visual.blocks.5.norm1.weight": "model-00001-of-00008.safetensors",
|
1700 |
"visual.blocks.5.norm2.weight": "model-00001-of-00008.safetensors",
|
1701 |
"visual.blocks.6.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1702 |
"visual.blocks.6.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1703 |
+
"visual.blocks.6.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1704 |
"visual.blocks.6.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1705 |
"visual.blocks.6.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1706 |
+
"visual.blocks.6.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1707 |
"visual.blocks.6.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1708 |
"visual.blocks.6.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1709 |
+
"visual.blocks.6.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1710 |
"visual.blocks.6.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1711 |
"visual.blocks.6.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1712 |
+
"visual.blocks.6.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1713 |
"visual.blocks.6.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1714 |
"visual.blocks.6.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1715 |
+
"visual.blocks.6.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1716 |
"visual.blocks.6.norm1.weight": "model-00001-of-00008.safetensors",
|
1717 |
"visual.blocks.6.norm2.weight": "model-00001-of-00008.safetensors",
|
1718 |
"visual.blocks.7.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1719 |
"visual.blocks.7.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1720 |
+
"visual.blocks.7.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1721 |
"visual.blocks.7.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1722 |
"visual.blocks.7.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1723 |
+
"visual.blocks.7.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1724 |
"visual.blocks.7.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1725 |
"visual.blocks.7.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1726 |
+
"visual.blocks.7.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1727 |
"visual.blocks.7.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1728 |
"visual.blocks.7.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1729 |
+
"visual.blocks.7.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1730 |
"visual.blocks.7.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1731 |
"visual.blocks.7.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1732 |
+
"visual.blocks.7.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1733 |
"visual.blocks.7.norm1.weight": "model-00001-of-00008.safetensors",
|
1734 |
"visual.blocks.7.norm2.weight": "model-00001-of-00008.safetensors",
|
1735 |
"visual.blocks.8.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1736 |
"visual.blocks.8.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1737 |
+
"visual.blocks.8.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1738 |
"visual.blocks.8.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1739 |
"visual.blocks.8.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1740 |
+
"visual.blocks.8.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1741 |
"visual.blocks.8.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1742 |
"visual.blocks.8.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1743 |
+
"visual.blocks.8.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1744 |
"visual.blocks.8.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1745 |
"visual.blocks.8.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1746 |
+
"visual.blocks.8.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1747 |
"visual.blocks.8.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1748 |
"visual.blocks.8.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1749 |
+
"visual.blocks.8.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1750 |
"visual.blocks.8.norm1.weight": "model-00001-of-00008.safetensors",
|
1751 |
"visual.blocks.8.norm2.weight": "model-00001-of-00008.safetensors",
|
1752 |
"visual.blocks.9.attn.proj.bias": "model-00001-of-00008.safetensors",
|
1753 |
"visual.blocks.9.attn.proj.weight": "model-00001-of-00008.safetensors",
|
1754 |
+
"visual.blocks.9.attn.proj.weight_scale": "model-00001-of-00008.safetensors",
|
1755 |
"visual.blocks.9.attn.qkv.bias": "model-00001-of-00008.safetensors",
|
1756 |
"visual.blocks.9.attn.qkv.weight": "model-00001-of-00008.safetensors",
|
1757 |
+
"visual.blocks.9.attn.qkv.weight_scale": "model-00001-of-00008.safetensors",
|
1758 |
"visual.blocks.9.mlp.down_proj.bias": "model-00001-of-00008.safetensors",
|
1759 |
"visual.blocks.9.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
1760 |
+
"visual.blocks.9.mlp.down_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1761 |
"visual.blocks.9.mlp.gate_proj.bias": "model-00001-of-00008.safetensors",
|
1762 |
"visual.blocks.9.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
1763 |
+
"visual.blocks.9.mlp.gate_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1764 |
"visual.blocks.9.mlp.up_proj.bias": "model-00001-of-00008.safetensors",
|
1765 |
"visual.blocks.9.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
1766 |
+
"visual.blocks.9.mlp.up_proj.weight_scale": "model-00001-of-00008.safetensors",
|
1767 |
"visual.blocks.9.norm1.weight": "model-00001-of-00008.safetensors",
|
1768 |
"visual.blocks.9.norm2.weight": "model-00001-of-00008.safetensors",
|
1769 |
"visual.merger.ln_q.weight": "model-00001-of-00008.safetensors",
|
1770 |
"visual.merger.mlp.0.bias": "model-00001-of-00008.safetensors",
|
1771 |
"visual.merger.mlp.0.weight": "model-00001-of-00008.safetensors",
|
1772 |
+
"visual.merger.mlp.0.weight_scale": "model-00001-of-00008.safetensors",
|
1773 |
"visual.merger.mlp.2.bias": "model-00001-of-00008.safetensors",
|
1774 |
"visual.merger.mlp.2.weight": "model-00001-of-00008.safetensors",
|
1775 |
+
"visual.merger.mlp.2.weight_scale": "model-00001-of-00008.safetensors",
|
1776 |
"visual.patch_embed.proj.weight": "model-00001-of-00008.safetensors"
|
1777 |
}
|
1778 |
}
|
recipe.yaml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
default_stage:
|
2 |
default_modifiers:
|
3 |
QuantizationModifier:
|
4 |
-
ignore: [lm_head, 're:
|
5 |
targets: [Linear]
|
6 |
scheme: FP8_DYNAMIC
|
|
|
1 |
default_stage:
|
2 |
default_modifiers:
|
3 |
QuantizationModifier:
|
4 |
+
ignore: ['re:.*lm_head', 're:vision_tower.*', 're:multi_modal_projector.*']
|
5 |
targets: [Linear]
|
6 |
scheme: FP8_DYNAMIC
|