File size: 1,140 Bytes
97aa4d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import torch
import gradio as gr
from transformers import pipeline
# Load the ViT+GPT2 image-to-text pipeline with bfloat16 precision
captioner = pipeline(
"image-to-text",
model="nlpconnect/vit-gpt2-image-captioning",
torch_dtype=torch.bfloat16
)
def generate_caption(image):
"""
Takes a PIL image and returns a generated caption.
"""
outputs = captioner(image)
return outputs[0]["generated_text"]
# Build the Gradio interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown(
"""
# πΌοΈ Image Caption Generator
Upload an image to generate a descriptive caption using ViT+GPT2.
"""
)
with gr.Row():
input_image = gr.Image(type="pil", label="Upload Image")
caption_output = gr.Textbox(label="Generated Caption", lines=2)
generate_btn = gr.Button("Generate Caption")
generate_btn.click(fn=generate_caption, inputs=input_image, outputs=caption_output)
gr.Markdown(
"""
---
Built with π€ Transformers (`nlpconnect/vit-gpt2-image-captioning`) and π Gradio
"""
)
demo.launch() |