z-coder's picture
Create app.py
980de81 verified
raw
history blame
607 Bytes
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
from PIL import Image
import torch
model = AutoModelForCausalLM.from_pretrained("llava-hf/llava-1.5-7b-hf", torch_dtype=torch.float16).to("cuda")
processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
def chat(image, prompt):
inputs = processor(prompt, images=image, return_tensors="pt").to("cuda")
output = model.generate(**inputs, max_new_tokens=50)
return processor.tokenizer.decode(output[0], skip_special_tokens=True)
gr.Interface(fn=chat, inputs=["image", "text"], outputs="text").launch()