Spaces:

akhaliq
/

Qwen3-VL-4B-Instruct

Running on Zero

akhaliq HF Staff commited on 23 days ago

Commit

f89ae07

verified ·

1 Parent(s): fe7f387

Update Gradio app with multiple files

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ model_id = "Qwen/Qwen3-VL-4B-Instruct"
 # Load model with optimizations for inference
 model = Qwen3VLForConditionalGeneration.from_pretrained(
     model_id,
-    dtype="auto",
     device_map="auto"
 )
 processor = AutoProcessor.from_pretrained(model_id)
@@ -80,6 +80,9 @@ def process_chat_message(
         return_tensors="pt"
     )
     # Generate response
     with torch.no_grad():
         generated_ids = model.generate(
@@ -93,7 +96,7 @@ def process_chat_message(
     # Decode the generated response
     generated_ids_trimmed = [
         out_ids[len(in_ids):]
-        for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
     ]
     response = processor.batch_decode(

 # Load model with optimizations for inference
 model = Qwen3VLForConditionalGeneration.from_pretrained(
     model_id,
+    torch_dtype=torch.bfloat16,
     device_map="auto"
 )
 processor = AutoProcessor.from_pretrained(model_id)
         return_tensors="pt"
     )
+    # Move inputs to the same device as the model
+    inputs = {k: v.to(model.device) if torch.is_tensor(v) else v for k, v in inputs.items()}
     # Generate response
     with torch.no_grad():
         generated_ids = model.generate(
     # Decode the generated response
     generated_ids_trimmed = [
         out_ids[len(in_ids):]
+        for in_ids, out_ids in zip(inputs['input_ids'], generated_ids)
     ]
     response = processor.batch_decode(