File size: 1,140 Bytes
97aa4d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import torch
import gradio as gr
from transformers import pipeline

# Load the ViT+GPT2 image-to-text pipeline with bfloat16 precision
captioner = pipeline(
    "image-to-text",
    model="nlpconnect/vit-gpt2-image-captioning",
    torch_dtype=torch.bfloat16
)

def generate_caption(image):
    """
    Takes a PIL image and returns a generated caption.
    """
    outputs = captioner(image)
    return outputs[0]["generated_text"]

# Build the Gradio interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
    gr.Markdown(
        """
        # πŸ–ΌοΈ Image Caption Generator
        Upload an image to generate a descriptive caption using ViT+GPT2.
        """
    )

    with gr.Row():
        input_image = gr.Image(type="pil", label="Upload Image")
        caption_output = gr.Textbox(label="Generated Caption", lines=2)

    generate_btn = gr.Button("Generate Caption")
    generate_btn.click(fn=generate_caption, inputs=input_image, outputs=caption_output)

    gr.Markdown(
        """
        ---
        Built with πŸ€— Transformers (`nlpconnect/vit-gpt2-image-captioning`) and πŸš€ Gradio
        """
    )

demo.launch()