Image-Text-to-Text
Transformers
Safetensors
English
qwen3_vl
image-to-text
programming
code generation
images
image to text
qwen3_vl_text
Qwen3VLForConditionalGeneration
thinking
reasoning
video
code
coding
coder
chat
brainstorm
qwen
qwen3
qwencoder
brainstorm 20x
all uses cases
finetune
conversational
| { | |
| "size": { | |
| "longest_edge": 25165824, | |
| "shortest_edge": 4096 | |
| }, | |
| "patch_size": 16, | |
| "temporal_patch_size": 2, | |
| "merge_size": 2, | |
| "image_mean": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "image_std": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "processor_class": "Qwen3VLProcessor", | |
| "video_processor_type": "Qwen3VLVideoProcessor" | |
| } |