from transformers import AutoProcessor, PaliGemmaForConditionalGeneration from PIL import Image import torch import gradio as gr # Load model and processor processor = AutoProcessor.from_pretrained("google/paligemma-3b") model = PaliGemmaForConditionalGeneration.from_pretrained( "google/paligemma-3b", torch_dtype=torch.float16, device_map="auto" ) def ecommerce_assistant(image, question): prompt = f"Question: {question}\nAnswer:" inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda", torch.float16) outputs = model.generate(**inputs, max_new_tokens=50) return processor.batch_decode(outputs, skip_special_tokens=True)[0] demo = gr.Interface( fn=ecommerce_assistant, inputs=["image", "text"], outputs="text", title="🛍️ E-commerce Visual Assistant", description="Upload a product photo and ask questions like 'What brand is this?' or 'Can I buy it online?'" ) demo.launch()