Spaces:

tonyassi
/

blip-image-captioning-large

Running

File size: 1,588 Bytes

183ba69
471f43d
 
 
389a29c
4273fa3
a51fb10
 
471f43d
 
 
a51fb10
471f43d
a51fb10
471f43d
a51fb10
 
 
 
 
471f43d
183ba69
a51fb10
 
 
389a29c
a51fb10
 
 
 
 
 
183ba69
a51fb10

import gradio as gr
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import time

Image.MAX_IMAGE_PIXELS = None  # disable pillow’s size limit

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

def caption(img, min_new, max_new):
    raw_image = Image.open(img).convert('RGB')
    raw_image.thumbnail((1024, 1024))
    inputs = processor(raw_image, return_tensors="pt")
    out = model.generate(
        **inputs,
        min_new_tokens=min_new,
        max_new_tokens=max_new
    )
    return processor.decode(out[0], skip_special_tokens=True)

def greet(img, min_new, max_new):
    if img is None:
        return "❌ Please upload an image."
    start = time.time()
    try:
        result = caption(img, min_new, max_new)
    except Exception as e:
        return f"⚠️ Error: {e}"
    elapsed = time.time() - start
    return f"{result}\n⏱ Took {elapsed:.2f} seconds"

iface = gr.Interface(
    fn=greet,
    title='BLIP Image Captioning (large)',
    description="Uses Salesforce/blip-image-captioning-large on CPU.",
    inputs=[
        gr.Image(type='filepath', label='Image'),
        gr.Slider(label='Min New Tokens', minimum=1, maximum=50, value=5),
        gr.Slider(label='Max New Tokens', minimum=1, maximum=100, value=20),
    ],
    outputs=gr.Textbox(label='Caption'),
    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
)
iface.launch()