|
import gradio as gr |
|
import requests |
|
from PIL import Image |
|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
import time |
|
|
|
Image.MAX_IMAGE_PIXELS = None |
|
|
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") |
|
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") |
|
|
|
def caption(img, min_new, max_new): |
|
raw_image = Image.open(img).convert('RGB') |
|
raw_image.thumbnail((1024, 1024)) |
|
inputs = processor(raw_image, return_tensors="pt") |
|
out = model.generate( |
|
**inputs, |
|
min_new_tokens=min_new, |
|
max_new_tokens=max_new |
|
) |
|
return processor.decode(out[0], skip_special_tokens=True) |
|
|
|
def greet(img, min_new, max_new): |
|
if img is None: |
|
return "❌ Please upload an image." |
|
start = time.time() |
|
try: |
|
result = caption(img, min_new, max_new) |
|
except Exception as e: |
|
return f"⚠️ Error: {e}" |
|
elapsed = time.time() - start |
|
return f"{result}\n⏱ Took {elapsed:.2f} seconds" |
|
|
|
iface = gr.Interface( |
|
fn=greet, |
|
title='BLIP Image Captioning (large)', |
|
description="Uses Salesforce/blip-image-captioning-large on CPU.", |
|
inputs=[ |
|
gr.Image(type='filepath', label='Image'), |
|
gr.Slider(label='Min New Tokens', minimum=1, maximum=50, value=5), |
|
gr.Slider(label='Max New Tokens', minimum=1, maximum=100, value=20), |
|
], |
|
outputs=gr.Textbox(label='Caption'), |
|
theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"), |
|
) |
|
iface.launch() |
|
|