helenai commited on
Commit
238d5a6
·
verified ·
1 Parent(s): aa8b8f2

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
chat_template.jinja ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {%- if messages[0].content is string %}
5
+ {{- messages[0].content }}
6
+ {%- else %}
7
+ {%- for content in messages[0].content %}
8
+ {%- if 'text' in content %}
9
+ {{- content.text }}
10
+ {%- endif %}
11
+ {%- endfor %}
12
+ {%- endif %}
13
+ {{- '\n\n' }}
14
+ {%- endif %}
15
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
16
+ {%- for tool in tools %}
17
+ {{- "\n" }}
18
+ {{- tool | tojson }}
19
+ {%- endfor %}
20
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
21
+ {%- else %}
22
+ {%- if messages[0].role == 'system' %}
23
+ {{- '<|im_start|>system\n' }}
24
+ {%- if messages[0].content is string %}
25
+ {{- messages[0].content }}
26
+ {%- else %}
27
+ {%- for content in messages[0].content %}
28
+ {%- if 'text' in content %}
29
+ {{- content.text }}
30
+ {%- endif %}
31
+ {%- endfor %}
32
+ {%- endif %}
33
+ {{- '<|im_end|>\n' }}
34
+ {%- endif %}
35
+ {%- endif %}
36
+ {%- set image_count = namespace(value=0) %}
37
+ {%- set video_count = namespace(value=0) %}
38
+ {%- for message in messages %}
39
+ {%- if message.role == "user" %}
40
+ {{- '<|im_start|>' + message.role + '\n' }}
41
+ {%- if message.content is string %}
42
+ {{- message.content }}
43
+ {%- else %}
44
+ {%- for content in message.content %}
45
+ {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
46
+ {%- set image_count.value = image_count.value + 1 %}
47
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
48
+ <|vision_start|><|image_pad|><|vision_end|>
49
+ {%- elif content.type == 'video' or 'video' in content %}
50
+ {%- set video_count.value = video_count.value + 1 %}
51
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
52
+ <|vision_start|><|video_pad|><|vision_end|>
53
+ {%- elif 'text' in content %}
54
+ {{- content.text }}
55
+ {%- endif %}
56
+ {%- endfor %}
57
+ {%- endif %}
58
+ {{- '<|im_end|>\n' }}
59
+ {%- elif message.role == "assistant" %}
60
+ {{- '<|im_start|>' + message.role + '\n' }}
61
+ {%- if message.content is string %}
62
+ {{- message.content }}
63
+ {%- else %}
64
+ {%- for content_item in message.content %}
65
+ {%- if 'text' in content_item %}
66
+ {{- content_item.text }}
67
+ {%- endif %}
68
+ {%- endfor %}
69
+ {%- endif %}
70
+ {%- if message.tool_calls %}
71
+ {%- for tool_call in message.tool_calls %}
72
+ {%- if (loop.first and message.content) or (not loop.first) %}
73
+ {{- '\n' }}
74
+ {%- endif %}
75
+ {%- if tool_call.function %}
76
+ {%- set tool_call = tool_call.function %}
77
+ {%- endif %}
78
+ {{- '<tool_call>\n{"name": "' }}
79
+ {{- tool_call.name }}
80
+ {{- '", "arguments": ' }}
81
+ {%- if tool_call.arguments is string %}
82
+ {{- tool_call.arguments }}
83
+ {%- else %}
84
+ {{- tool_call.arguments | tojson }}
85
+ {%- endif %}
86
+ {{- '}\n</tool_call>' }}
87
+ {%- endfor %}
88
+ {%- endif %}
89
+ {{- '<|im_end|>\n' }}
90
+ {%- elif message.role == "tool" %}
91
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
92
+ {{- '<|im_start|>user' }}
93
+ {%- endif %}
94
+ {{- '\n<tool_response>\n' }}
95
+ {%- if message.content is string %}
96
+ {{- message.content }}
97
+ {%- else %}
98
+ {%- for content in message.content %}
99
+ {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
100
+ {%- set image_count.value = image_count.value + 1 %}
101
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
102
+ <|vision_start|><|image_pad|><|vision_end|>
103
+ {%- elif content.type == 'video' or 'video' in content %}
104
+ {%- set video_count.value = video_count.value + 1 %}
105
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
106
+ <|vision_start|><|video_pad|><|vision_end|>
107
+ {%- elif 'text' in content %}
108
+ {{- content.text }}
109
+ {%- endif %}
110
+ {%- endfor %}
111
+ {%- endif %}
112
+ {{- '\n</tool_response>' }}
113
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
114
+ {{- '<|im_end|>\n' }}
115
+ {%- endif %}
116
+ {%- endif %}
117
+ {%- endfor %}
118
+ {%- if add_generation_prompt %}
119
+ {{- '<|im_start|>assistant\n' }}
120
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLForConditionalGeneration"
4
+ ],
5
+ "dtype": "float32",
6
+ "image_token_id": 151655,
7
+ "model_type": "qwen3_vl",
8
+ "text_config": {
9
+ "attention_bias": false,
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 151643,
12
+ "dtype": "float32",
13
+ "eos_token_id": 151645,
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 2048,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 6144,
19
+ "max_position_embeddings": 262144,
20
+ "model_type": "qwen3_vl_text",
21
+ "num_attention_heads": 16,
22
+ "num_hidden_layers": 28,
23
+ "num_key_value_heads": 8,
24
+ "rms_norm_eps": 1e-06,
25
+ "rope_scaling": {
26
+ "mrope_interleaved": true,
27
+ "mrope_section": [
28
+ 24,
29
+ 20,
30
+ 20
31
+ ],
32
+ "rope_type": "default"
33
+ },
34
+ "rope_theta": 5000000,
35
+ "tie_word_embeddings": true,
36
+ "use_cache": true,
37
+ "vocab_size": 151936
38
+ },
39
+ "tie_word_embeddings": true,
40
+ "transformers_version": "4.57.0.dev0",
41
+ "video_token_id": 151656,
42
+ "vision_config": {
43
+ "deepstack_visual_indexes": [
44
+ 5,
45
+ 11,
46
+ 17
47
+ ],
48
+ "depth": 24,
49
+ "dtype": "float32",
50
+ "hidden_act": "gelu_pytorch_tanh",
51
+ "hidden_size": 1024,
52
+ "in_channels": 3,
53
+ "initializer_range": 0.02,
54
+ "intermediate_size": 4096,
55
+ "model_type": "qwen3_vl",
56
+ "num_heads": 16,
57
+ "num_position_embeddings": 2304,
58
+ "out_hidden_size": 2048,
59
+ "patch_size": 16,
60
+ "spatial_merge_size": 2,
61
+ "temporal_patch_size": 2
62
+ },
63
+ "vision_end_token_id": 151653,
64
+ "vision_start_token_id": 151652
65
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.7,
10
+ "top_k": 20,
11
+ "top_p": 0.8,
12
+ "transformers_version": "4.57.0.dev0"
13
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
openvino_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dtype": "int4",
3
+ "input_info": null,
4
+ "optimum_version": "1.27.0",
5
+ "output_attentions": false,
6
+ "quantization_config": {
7
+ "all_layers": null,
8
+ "backup_precision": null,
9
+ "bits": 4,
10
+ "dataset": null,
11
+ "dtype": "int4",
12
+ "gptq": null,
13
+ "group_size": 128,
14
+ "ignored_scope": null,
15
+ "lora_correction": null,
16
+ "num_samples": null,
17
+ "processor": null,
18
+ "quant_method": "default",
19
+ "ratio": 1.0,
20
+ "scale_estimation": null,
21
+ "sensitivity_metric": null,
22
+ "statistics_path": null,
23
+ "sym": false,
24
+ "tokenizer": null,
25
+ "trust_remote_code": false
26
+ },
27
+ "save_onnx_model": false,
28
+ "transformers_version": "4.57.0.dev0"
29
+ }
openvino_language_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2757a54d727a5ef5e8d29f55dfdd39e98eaf10bd11d3a2f649fbebee0ddd96cb
3
+ size 1044285185
openvino_language_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
openvino_text_embeddings_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a045cc9803e22ac5670d676cf8370990833b6fb8e836f9549c8d619ed0be0341
3
+ size 311468804
openvino_text_embeddings_model.xml ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="Model9" version="11">
3
+ <layers>
4
+ <layer id="0" name="input" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="input">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="self.weight" type="Const" version="opset1">
14
+ <data element_type="i8" shape="151936, 2048" offset="0" size="311164928" />
15
+ <output>
16
+ <port id="0" precision="I8">
17
+ <dim>151936</dim>
18
+ <dim>2048</dim>
19
+ </port>
20
+ </output>
21
+ </layer>
22
+ <layer id="2" name="Convert_987666" type="Convert" version="opset1">
23
+ <data destination_type="f16" />
24
+ <input>
25
+ <port id="0" precision="I8">
26
+ <dim>151936</dim>
27
+ <dim>2048</dim>
28
+ </port>
29
+ </input>
30
+ <output>
31
+ <port id="1" precision="FP16">
32
+ <dim>151936</dim>
33
+ <dim>2048</dim>
34
+ </port>
35
+ </output>
36
+ </layer>
37
+ <layer id="3" name="self.weight/scale" type="Const" version="opset1">
38
+ <data element_type="f16" shape="151936, 1" offset="311164928" size="303872" />
39
+ <output>
40
+ <port id="0" precision="FP16">
41
+ <dim>151936</dim>
42
+ <dim>1</dim>
43
+ </port>
44
+ </output>
45
+ </layer>
46
+ <layer id="4" name="self.weight/fq_weights_0" type="Multiply" version="opset1">
47
+ <data auto_broadcast="numpy" />
48
+ <input>
49
+ <port id="0" precision="FP16">
50
+ <dim>151936</dim>
51
+ <dim>2048</dim>
52
+ </port>
53
+ <port id="1" precision="FP16">
54
+ <dim>151936</dim>
55
+ <dim>1</dim>
56
+ </port>
57
+ </input>
58
+ <output>
59
+ <port id="2" precision="FP16">
60
+ <dim>151936</dim>
61
+ <dim>2048</dim>
62
+ </port>
63
+ </output>
64
+ </layer>
65
+ <layer id="5" name="self.weight/fq_weights_0/convert" type="Convert" version="opset1">
66
+ <data destination_type="f32" />
67
+ <input>
68
+ <port id="0" precision="FP16">
69
+ <dim>151936</dim>
70
+ <dim>2048</dim>
71
+ </port>
72
+ </input>
73
+ <output>
74
+ <port id="1" precision="FP32">
75
+ <dim>151936</dim>
76
+ <dim>2048</dim>
77
+ </port>
78
+ </output>
79
+ </layer>
80
+ <layer id="6" name="aten::embedding/Convert" type="Convert" version="opset1">
81
+ <data destination_type="i32" />
82
+ <input>
83
+ <port id="0" precision="I64">
84
+ <dim>-1</dim>
85
+ <dim>-1</dim>
86
+ </port>
87
+ </input>
88
+ <output>
89
+ <port id="1" precision="I32">
90
+ <dim>-1</dim>
91
+ <dim>-1</dim>
92
+ </port>
93
+ </output>
94
+ </layer>
95
+ <layer id="7" name="aten::embedding/Constant" type="Const" version="opset1">
96
+ <data element_type="i32" shape="" offset="311468800" size="4" />
97
+ <output>
98
+ <port id="0" precision="I32" />
99
+ </output>
100
+ </layer>
101
+ <layer id="8" name="aten::embedding/Gather" type="Gather" version="opset8">
102
+ <data batch_dims="0" />
103
+ <input>
104
+ <port id="0" precision="FP32">
105
+ <dim>151936</dim>
106
+ <dim>2048</dim>
107
+ </port>
108
+ <port id="1" precision="I32">
109
+ <dim>-1</dim>
110
+ <dim>-1</dim>
111
+ </port>
112
+ <port id="2" precision="I32" />
113
+ </input>
114
+ <output>
115
+ <port id="3" precision="FP32" names="inputs_embeds">
116
+ <dim>-1</dim>
117
+ <dim>-1</dim>
118
+ <dim>2048</dim>
119
+ </port>
120
+ </output>
121
+ </layer>
122
+ <layer id="9" name="Result_165697" type="Result" version="opset1" output_names="inputs_embeds">
123
+ <input>
124
+ <port id="0" precision="FP32">
125
+ <dim>-1</dim>
126
+ <dim>-1</dim>
127
+ <dim>2048</dim>
128
+ </port>
129
+ </input>
130
+ </layer>
131
+ </layers>
132
+ <edges>
133
+ <edge from-layer="0" from-port="0" to-layer="6" to-port="0" />
134
+ <edge from-layer="1" from-port="0" to-layer="2" to-port="0" />
135
+ <edge from-layer="2" from-port="1" to-layer="4" to-port="0" />
136
+ <edge from-layer="3" from-port="0" to-layer="4" to-port="1" />
137
+ <edge from-layer="4" from-port="2" to-layer="5" to-port="0" />
138
+ <edge from-layer="5" from-port="1" to-layer="8" to-port="0" />
139
+ <edge from-layer="6" from-port="1" to-layer="8" to-port="1" />
140
+ <edge from-layer="7" from-port="0" to-layer="8" to-port="2" />
141
+ <edge from-layer="8" from-port="3" to-layer="9" to-port="0" />
142
+ </edges>
143
+ <rt_info>
144
+ <Runtime_version value="2025.3.0-19807-44526285f24-releases/2025/3" />
145
+ <conversion_parameters>
146
+ <framework value="pytorch" />
147
+ <is_python_object value="True" />
148
+ </conversion_parameters>
149
+ <nncf>
150
+ <friendly_names_were_updated value="True" />
151
+ <version value="2.18.0" />
152
+ <weight_compression>
153
+ <advanced_parameters value="{'statistics_path': None, 'lora_adapter_rank': 256, 'group_size_fallback_mode': 'ignore', 'min_adjusted_group_size': 16, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100, 'prefer_data_aware_scaling': True}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}, 'backend_params': {}, 'codebook': None}" />
154
+ <all_layers value="False" />
155
+ <awq value="False" />
156
+ <backup_mode value="int8_asym" />
157
+ <compression_format value="dequantize" />
158
+ <gptq value="False" />
159
+ <group_size value="-1" />
160
+ <ignored_scope value="[]" />
161
+ <lora_correction value="False" />
162
+ <mode value="int8_sym" />
163
+ <ratio value="1.0" />
164
+ <scale_estimation value="False" />
165
+ <sensitivity_metric value="weight_quantization_error" />
166
+ </weight_compression>
167
+ </nncf>
168
+ <optimum>
169
+ <nncf_version value="2.18.0" />
170
+ <optimum_intel_version value="1.26.0.dev0+741501e" />
171
+ <optimum_version value="1.27.0" />
172
+ <pytorch_version value="2.8.0+cpu" />
173
+ <transformers_version value="4.57.0.dev0" />
174
+ </optimum>
175
+ </rt_info>
176
+ </net>
openvino_vision_embeddings_merger_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dad3c2974237e2e16db1691bc4f3c4870221864b77d2f509fe682518ba5c65a
3
+ size 404627620
openvino_vision_embeddings_merger_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
openvino_vision_embeddings_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be86e59e7c76bd9ddd9d2e66d42ffe4718fc880e69d6e082e4977acbad203bc5
3
+ size 1579064
openvino_vision_embeddings_model.xml ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="Model3" version="11">
3
+ <layers>
4
+ <layer id="0" name="hidden_states" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="f32" />
6
+ <output>
7
+ <port id="0" precision="FP32" names="hidden_states">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="Constant_100925" type="Const" version="opset1">
14
+ <data element_type="i64" shape="5" offset="0" size="40" />
15
+ <output>
16
+ <port id="0" precision="I64" names="8">
17
+ <dim>5</dim>
18
+ </port>
19
+ </output>
20
+ </layer>
21
+ <layer id="2" name="aten::view/Reshape" type="Reshape" version="opset1">
22
+ <data special_zero="false" />
23
+ <input>
24
+ <port id="0" precision="FP32">
25
+ <dim>-1</dim>
26
+ <dim>-1</dim>
27
+ </port>
28
+ <port id="1" precision="I64">
29
+ <dim>5</dim>
30
+ </port>
31
+ </input>
32
+ <output>
33
+ <port id="2" precision="FP32" names="14,9,hidden_states_1,input">
34
+ <dim>-1</dim>
35
+ <dim>3</dim>
36
+ <dim>2</dim>
37
+ <dim>16</dim>
38
+ <dim>16</dim>
39
+ </port>
40
+ </output>
41
+ </layer>
42
+ <layer id="3" name="self.proj.weight" type="Const" version="opset1">
43
+ <data element_type="i8" shape="1024, 3, 2, 16, 16" offset="40" size="1572864" />
44
+ <output>
45
+ <port id="0" precision="I8">
46
+ <dim>1024</dim>
47
+ <dim>3</dim>
48
+ <dim>2</dim>
49
+ <dim>16</dim>
50
+ <dim>16</dim>
51
+ </port>
52
+ </output>
53
+ </layer>
54
+ <layer id="4" name="Convert_991615" type="Convert" version="opset1">
55
+ <data destination_type="f16" />
56
+ <input>
57
+ <port id="0" precision="I8">
58
+ <dim>1024</dim>
59
+ <dim>3</dim>
60
+ <dim>2</dim>
61
+ <dim>16</dim>
62
+ <dim>16</dim>
63
+ </port>
64
+ </input>
65
+ <output>
66
+ <port id="1" precision="FP16">
67
+ <dim>1024</dim>
68
+ <dim>3</dim>
69
+ <dim>2</dim>
70
+ <dim>16</dim>
71
+ <dim>16</dim>
72
+ </port>
73
+ </output>
74
+ </layer>
75
+ <layer id="5" name="self.proj.weight/scale" type="Const" version="opset1">
76
+ <data element_type="f16" shape="1024, 1, 1, 1, 1" offset="1572904" size="2048" />
77
+ <output>
78
+ <port id="0" precision="FP16">
79
+ <dim>1024</dim>
80
+ <dim>1</dim>
81
+ <dim>1</dim>
82
+ <dim>1</dim>
83
+ <dim>1</dim>
84
+ </port>
85
+ </output>
86
+ </layer>
87
+ <layer id="6" name="self.proj.weight/fq_weights_1" type="Multiply" version="opset1">
88
+ <data auto_broadcast="numpy" />
89
+ <input>
90
+ <port id="0" precision="FP16">
91
+ <dim>1024</dim>
92
+ <dim>3</dim>
93
+ <dim>2</dim>
94
+ <dim>16</dim>
95
+ <dim>16</dim>
96
+ </port>
97
+ <port id="1" precision="FP16">
98
+ <dim>1024</dim>
99
+ <dim>1</dim>
100
+ <dim>1</dim>
101
+ <dim>1</dim>
102
+ <dim>1</dim>
103
+ </port>
104
+ </input>
105
+ <output>
106
+ <port id="2" precision="FP16">
107
+ <dim>1024</dim>
108
+ <dim>3</dim>
109
+ <dim>2</dim>
110
+ <dim>16</dim>
111
+ <dim>16</dim>
112
+ </port>
113
+ </output>
114
+ </layer>
115
+ <layer id="7" name="self.proj.weight/fq_weights_1/convert" type="Convert" version="opset1">
116
+ <data destination_type="f32" />
117
+ <input>
118
+ <port id="0" precision="FP16">
119
+ <dim>1024</dim>
120
+ <dim>3</dim>
121
+ <dim>2</dim>
122
+ <dim>16</dim>
123
+ <dim>16</dim>
124
+ </port>
125
+ </input>
126
+ <output>
127
+ <port id="1" precision="FP32">
128
+ <dim>1024</dim>
129
+ <dim>3</dim>
130
+ <dim>2</dim>
131
+ <dim>16</dim>
132
+ <dim>16</dim>
133
+ </port>
134
+ </output>
135
+ </layer>
136
+ <layer id="8" name="__module.proj/aten::_convolution/Convolution" type="Convolution" version="opset1">
137
+ <data strides="2, 16, 16" dilations="1, 1, 1" pads_begin="0, 0, 0" pads_end="0, 0, 0" auto_pad="explicit" />
138
+ <input>
139
+ <port id="0" precision="FP32">
140
+ <dim>-1</dim>
141
+ <dim>3</dim>
142
+ <dim>2</dim>
143
+ <dim>16</dim>
144
+ <dim>16</dim>
145
+ </port>
146
+ <port id="1" precision="FP32">
147
+ <dim>1024</dim>
148
+ <dim>3</dim>
149
+ <dim>2</dim>
150
+ <dim>16</dim>
151
+ <dim>16</dim>
152
+ </port>
153
+ </input>
154
+ <output>
155
+ <port id="2" precision="FP32">
156
+ <dim>-1</dim>
157
+ <dim>1024</dim>
158
+ <dim>1</dim>
159
+ <dim>1</dim>
160
+ <dim>1</dim>
161
+ </port>
162
+ </output>
163
+ </layer>
164
+ <layer id="9" name="__module.proj/aten::_convolution/Reshape" type="Const" version="opset1">
165
+ <data element_type="f32" shape="1, 1024, 1, 1, 1" offset="1574952" size="4096" />
166
+ <output>
167
+ <port id="0" precision="FP32">
168
+ <dim>1</dim>
169
+ <dim>1024</dim>
170
+ <dim>1</dim>
171
+ <dim>1</dim>
172
+ <dim>1</dim>
173
+ </port>
174
+ </output>
175
+ </layer>
176
+ <layer id="10" name="__module.proj/aten::_convolution/Add" type="Add" version="opset1">
177
+ <data auto_broadcast="numpy" />
178
+ <input>
179
+ <port id="0" precision="FP32">
180
+ <dim>-1</dim>
181
+ <dim>1024</dim>
182
+ <dim>1</dim>
183
+ <dim>1</dim>
184
+ <dim>1</dim>
185
+ </port>
186
+ <port id="1" precision="FP32">
187
+ <dim>1</dim>
188
+ <dim>1024</dim>
189
+ <dim>1</dim>
190
+ <dim>1</dim>
191
+ <dim>1</dim>
192
+ </port>
193
+ </input>
194
+ <output>
195
+ <port id="2" precision="FP32" names="33">
196
+ <dim>-1</dim>
197
+ <dim>1024</dim>
198
+ <dim>1</dim>
199
+ <dim>1</dim>
200
+ <dim>1</dim>
201
+ </port>
202
+ </output>
203
+ </layer>
204
+ <layer id="11" name="Constant_100991" type="Const" version="opset1">
205
+ <data element_type="i64" shape="2" offset="1579048" size="16" />
206
+ <output>
207
+ <port id="0" precision="I64" names="18">
208
+ <dim>2</dim>
209
+ </port>
210
+ </output>
211
+ </layer>
212
+ <layer id="12" name="aten::view/Reshape_1" type="Reshape" version="opset1">
213
+ <data special_zero="false" />
214
+ <input>
215
+ <port id="0" precision="FP32">
216
+ <dim>-1</dim>
217
+ <dim>1024</dim>
218
+ <dim>1</dim>
219
+ <dim>1</dim>
220
+ <dim>1</dim>
221
+ </port>
222
+ <port id="1" precision="I64">
223
+ <dim>2</dim>
224
+ </port>
225
+ </input>
226
+ <output>
227
+ <port id="2" precision="FP32" names="last_hidden_state">
228
+ <dim>-1</dim>
229
+ <dim>1024</dim>
230
+ </port>
231
+ </output>
232
+ </layer>
233
+ <layer id="13" name="Result_101018" type="Result" version="opset1" output_names="last_hidden_state">
234
+ <input>
235
+ <port id="0" precision="FP32">
236
+ <dim>-1</dim>
237
+ <dim>1024</dim>
238
+ </port>
239
+ </input>
240
+ </layer>
241
+ </layers>
242
+ <edges>
243
+ <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
244
+ <edge from-layer="1" from-port="0" to-layer="2" to-port="1" />
245
+ <edge from-layer="2" from-port="2" to-layer="8" to-port="0" />
246
+ <edge from-layer="3" from-port="0" to-layer="4" to-port="0" />
247
+ <edge from-layer="4" from-port="1" to-layer="6" to-port="0" />
248
+ <edge from-layer="5" from-port="0" to-layer="6" to-port="1" />
249
+ <edge from-layer="6" from-port="2" to-layer="7" to-port="0" />
250
+ <edge from-layer="7" from-port="1" to-layer="8" to-port="1" />
251
+ <edge from-layer="8" from-port="2" to-layer="10" to-port="0" />
252
+ <edge from-layer="9" from-port="0" to-layer="10" to-port="1" />
253
+ <edge from-layer="10" from-port="2" to-layer="12" to-port="0" />
254
+ <edge from-layer="11" from-port="0" to-layer="12" to-port="1" />
255
+ <edge from-layer="12" from-port="2" to-layer="13" to-port="0" />
256
+ </edges>
257
+ <rt_info>
258
+ <Runtime_version value="2025.3.0-19807-44526285f24-releases/2025/3" />
259
+ <conversion_parameters>
260
+ <framework value="pytorch" />
261
+ <is_python_object value="True" />
262
+ </conversion_parameters>
263
+ <nncf>
264
+ <friendly_names_were_updated value="True" />
265
+ <version value="2.18.0" />
266
+ <weight_compression>
267
+ <advanced_parameters value="{'statistics_path': None, 'lora_adapter_rank': 256, 'group_size_fallback_mode': 'ignore', 'min_adjusted_group_size': 16, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100, 'prefer_data_aware_scaling': True}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}, 'backend_params': {}, 'codebook': None}" />
268
+ <all_layers value="False" />
269
+ <awq value="False" />
270
+ <backup_mode value="int8_asym" />
271
+ <compression_format value="dequantize" />
272
+ <gptq value="False" />
273
+ <group_size value="-1" />
274
+ <ignored_scope value="[]" />
275
+ <lora_correction value="False" />
276
+ <mode value="int8_sym" />
277
+ <ratio value="1.0" />
278
+ <scale_estimation value="False" />
279
+ <sensitivity_metric value="weight_quantization_error" />
280
+ </weight_compression>
281
+ </nncf>
282
+ <optimum>
283
+ <nncf_version value="2.18.0" />
284
+ <optimum_intel_version value="1.26.0.dev0+741501e" />
285
+ <optimum_version value="1.27.0" />
286
+ <pytorch_version value="2.8.0+cpu" />
287
+ <transformers_version value="4.57.0.dev0" />
288
+ </optimum>
289
+ </rt_info>
290
+ </net>
openvino_vision_embeddings_pos_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b02afaa8807e3672aefb76d3fdbab09a1a6d38908b5b26d0faa1750c7cacab0
3
+ size 2363908
openvino_vision_embeddings_pos_model.xml ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="Model12" version="11">
3
+ <layers>
4
+ <layer id="0" name="input" type="Parameter" version="opset1">
5
+ <data shape="4,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="input">
8
+ <dim>4</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="self.weight" type="Const" version="opset1">
14
+ <data element_type="i8" shape="2304, 1024" offset="0" size="2359296" />
15
+ <output>
16
+ <port id="0" precision="I8">
17
+ <dim>2304</dim>
18
+ <dim>1024</dim>
19
+ </port>
20
+ </output>
21
+ </layer>
22
+ <layer id="2" name="Convert_1406260" type="Convert" version="opset1">
23
+ <data destination_type="f16" />
24
+ <input>
25
+ <port id="0" precision="I8">
26
+ <dim>2304</dim>
27
+ <dim>1024</dim>
28
+ </port>
29
+ </input>
30
+ <output>
31
+ <port id="1" precision="FP16">
32
+ <dim>2304</dim>
33
+ <dim>1024</dim>
34
+ </port>
35
+ </output>
36
+ </layer>
37
+ <layer id="3" name="self.weight/scale" type="Const" version="opset1">
38
+ <data element_type="f16" shape="2304, 1" offset="2359296" size="4608" />
39
+ <output>
40
+ <port id="0" precision="FP16">
41
+ <dim>2304</dim>
42
+ <dim>1</dim>
43
+ </port>
44
+ </output>
45
+ </layer>
46
+ <layer id="4" name="self.weight/fq_weights_0" type="Multiply" version="opset1">
47
+ <data auto_broadcast="numpy" />
48
+ <input>
49
+ <port id="0" precision="FP16">
50
+ <dim>2304</dim>
51
+ <dim>1024</dim>
52
+ </port>
53
+ <port id="1" precision="FP16">
54
+ <dim>2304</dim>
55
+ <dim>1</dim>
56
+ </port>
57
+ </input>
58
+ <output>
59
+ <port id="2" precision="FP16">
60
+ <dim>2304</dim>
61
+ <dim>1024</dim>
62
+ </port>
63
+ </output>
64
+ </layer>
65
+ <layer id="5" name="self.weight/fq_weights_0/convert" type="Convert" version="opset1">
66
+ <data destination_type="f32" />
67
+ <input>
68
+ <port id="0" precision="FP16">
69
+ <dim>2304</dim>
70
+ <dim>1024</dim>
71
+ </port>
72
+ </input>
73
+ <output>
74
+ <port id="1" precision="FP32">
75
+ <dim>2304</dim>
76
+ <dim>1024</dim>
77
+ </port>
78
+ </output>
79
+ </layer>
80
+ <layer id="6" name="aten::embedding/Convert" type="Convert" version="opset1">
81
+ <data destination_type="i32" />
82
+ <input>
83
+ <port id="0" precision="I64">
84
+ <dim>4</dim>
85
+ <dim>-1</dim>
86
+ </port>
87
+ </input>
88
+ <output>
89
+ <port id="1" precision="I32">
90
+ <dim>4</dim>
91
+ <dim>-1</dim>
92
+ </port>
93
+ </output>
94
+ </layer>
95
+ <layer id="7" name="aten::embedding/Constant" type="Const" version="opset1">
96
+ <data element_type="i32" shape="" offset="2363904" size="4" />
97
+ <output>
98
+ <port id="0" precision="I32" />
99
+ </output>
100
+ </layer>
101
+ <layer id="8" name="aten::embedding/Gather" type="Gather" version="opset8">
102
+ <data batch_dims="0" />
103
+ <input>
104
+ <port id="0" precision="FP32">
105
+ <dim>2304</dim>
106
+ <dim>1024</dim>
107
+ </port>
108
+ <port id="1" precision="I32">
109
+ <dim>4</dim>
110
+ <dim>-1</dim>
111
+ </port>
112
+ <port id="2" precision="I32" />
113
+ </input>
114
+ <output>
115
+ <port id="3" precision="FP32" names="last_hidden_state">
116
+ <dim>4</dim>
117
+ <dim>-1</dim>
118
+ <dim>1024</dim>
119
+ </port>
120
+ </output>
121
+ </layer>
122
+ <layer id="9" name="Result_167765" type="Result" version="opset1" output_names="last_hidden_state">
123
+ <input>
124
+ <port id="0" precision="FP32">
125
+ <dim>4</dim>
126
+ <dim>-1</dim>
127
+ <dim>1024</dim>
128
+ </port>
129
+ </input>
130
+ </layer>
131
+ </layers>
132
+ <edges>
133
+ <edge from-layer="0" from-port="0" to-layer="6" to-port="0" />
134
+ <edge from-layer="1" from-port="0" to-layer="2" to-port="0" />
135
+ <edge from-layer="2" from-port="1" to-layer="4" to-port="0" />
136
+ <edge from-layer="3" from-port="0" to-layer="4" to-port="1" />
137
+ <edge from-layer="4" from-port="2" to-layer="5" to-port="0" />
138
+ <edge from-layer="5" from-port="1" to-layer="8" to-port="0" />
139
+ <edge from-layer="6" from-port="1" to-layer="8" to-port="1" />
140
+ <edge from-layer="7" from-port="0" to-layer="8" to-port="2" />
141
+ <edge from-layer="8" from-port="3" to-layer="9" to-port="0" />
142
+ </edges>
143
+ <rt_info>
144
+ <Runtime_version value="2025.3.0-19807-44526285f24-releases/2025/3" />
145
+ <conversion_parameters>
146
+ <framework value="pytorch" />
147
+ <is_python_object value="True" />
148
+ </conversion_parameters>
149
+ <nncf>
150
+ <friendly_names_were_updated value="True" />
151
+ <version value="2.18.0" />
152
+ <weight_compression>
153
+ <advanced_parameters value="{'statistics_path': None, 'lora_adapter_rank': 256, 'group_size_fallback_mode': 'ignore', 'min_adjusted_group_size': 16, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100, 'prefer_data_aware_scaling': True}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}, 'backend_params': {}, 'codebook': None}" />
154
+ <all_layers value="False" />
155
+ <awq value="False" />
156
+ <backup_mode value="int8_asym" />
157
+ <compression_format value="dequantize" />
158
+ <gptq value="False" />
159
+ <group_size value="-1" />
160
+ <ignored_scope value="[]" />
161
+ <lora_correction value="False" />
162
+ <mode value="int8_sym" />
163
+ <ratio value="1.0" />
164
+ <scale_estimation value="False" />
165
+ <sensitivity_metric value="weight_quantization_error" />
166
+ </weight_compression>
167
+ </nncf>
168
+ <optimum>
169
+ <nncf_version value="2.18.0" />
170
+ <optimum_intel_version value="1.26.0.dev0+741501e" />
171
+ <optimum_version value="1.27.0" />
172
+ <pytorch_version value="2.8.0+cpu" />
173
+ <transformers_version value="4.57.0.dev0" />
174
+ </optimum>
175
+ </rt_info>
176
+ </net>
preprocessor_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "disable_grouping": null,
7
+ "do_center_crop": null,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_rescale": true,
11
+ "do_resize": true,
12
+ "image_mean": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "image_processor_type": "Qwen2VLImageProcessorFast",
18
+ "image_std": [
19
+ 0.5,
20
+ 0.5,
21
+ 0.5
22
+ ],
23
+ "input_data_format": null,
24
+ "max_pixels": null,
25
+ "merge_size": 2,
26
+ "min_pixels": null,
27
+ "patch_size": 16,
28
+ "processor_class": "Qwen3VLProcessor",
29
+ "resample": 3,
30
+ "rescale_factor": 0.00392156862745098,
31
+ "return_tensors": null,
32
+ "size": {
33
+ "longest_edge": 16777216,
34
+ "shortest_edge": 65536
35
+ },
36
+ "temporal_patch_size": 2
37
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 262144,
235
+ "pad_token": "<|endoftext|>",
236
+ "processor_class": "Qwen3VLProcessor",
237
+ "split_special_tokens": false,
238
+ "tokenizer_class": "Qwen2Tokenizer",
239
+ "unk_token": null
240
+ }
video_preprocessor_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "do_center_crop": null,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_pad": null,
10
+ "do_rescale": true,
11
+ "do_resize": true,
12
+ "do_sample_frames": true,
13
+ "fps": 2,
14
+ "image_mean": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "image_std": [
20
+ 0.5,
21
+ 0.5,
22
+ 0.5
23
+ ],
24
+ "input_data_format": null,
25
+ "max_frames": 768,
26
+ "merge_size": 2,
27
+ "min_frames": 4,
28
+ "num_frames": null,
29
+ "patch_size": 16,
30
+ "processor_class": "Qwen3VLProcessor",
31
+ "resample": 3,
32
+ "rescale_factor": 0.00392156862745098,
33
+ "return_metadata": false,
34
+ "size": {
35
+ "longest_edge": 25165824,
36
+ "shortest_edge": 4096
37
+ },
38
+ "size_divisor": null,
39
+ "temporal_patch_size": 2,
40
+ "video_metadata": null,
41
+ "video_processor_type": "Qwen3VLVideoProcessor"
42
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff