balaji4991512's picture
Create app.py
97aa4d9 verified
import torch
import gradio as gr
from transformers import pipeline
# Load the ViT+GPT2 image-to-text pipeline with bfloat16 precision
captioner = pipeline(
"image-to-text",
model="nlpconnect/vit-gpt2-image-captioning",
torch_dtype=torch.bfloat16
)
def generate_caption(image):
"""
Takes a PIL image and returns a generated caption.
"""
outputs = captioner(image)
return outputs[0]["generated_text"]
# Build the Gradio interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown(
"""
# πŸ–ΌοΈ Image Caption Generator
Upload an image to generate a descriptive caption using ViT+GPT2.
"""
)
with gr.Row():
input_image = gr.Image(type="pil", label="Upload Image")
caption_output = gr.Textbox(label="Generated Caption", lines=2)
generate_btn = gr.Button("Generate Caption")
generate_btn.click(fn=generate_caption, inputs=input_image, outputs=caption_output)
gr.Markdown(
"""
---
Built with πŸ€— Transformers (`nlpconnect/vit-gpt2-image-captioning`) and πŸš€ Gradio
"""
)
demo.launch()