import gradio as gr from transformers import AutoProcessor, AutoModelForCausalLM from PIL import Image import torch model = AutoModelForCausalLM.from_pretrained("llava-hf/llava-1.5-7b-hf", torch_dtype=torch.float16).to("cuda") processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf") def chat(image, prompt): inputs = processor(prompt, images=image, return_tensors="pt").to("cuda") output = model.generate(**inputs, max_new_tokens=50) return processor.tokenizer.decode(output[0], skip_special_tokens=True) gr.Interface(fn=chat, inputs=["image", "text"], outputs="text").launch()