File size: 1,588 Bytes
183ba69
471f43d
 
 
389a29c
4273fa3
a51fb10
 
471f43d
 
 
a51fb10
471f43d
a51fb10
471f43d
a51fb10
 
 
 
 
471f43d
183ba69
a51fb10
 
 
389a29c
a51fb10
 
 
 
 
 
183ba69
a51fb10
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import time

Image.MAX_IMAGE_PIXELS = None  # disable pillow’s size limit

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

def caption(img, min_new, max_new):
    raw_image = Image.open(img).convert('RGB')
    raw_image.thumbnail((1024, 1024))
    inputs = processor(raw_image, return_tensors="pt")
    out = model.generate(
        **inputs,
        min_new_tokens=min_new,
        max_new_tokens=max_new
    )
    return processor.decode(out[0], skip_special_tokens=True)

def greet(img, min_new, max_new):
    if img is None:
        return "❌ Please upload an image."
    start = time.time()
    try:
        result = caption(img, min_new, max_new)
    except Exception as e:
        return f"⚠️ Error: {e}"
    elapsed = time.time() - start
    return f"{result}\n⏱ Took {elapsed:.2f} seconds"

iface = gr.Interface(
    fn=greet,
    title='BLIP Image Captioning (large)',
    description="Uses Salesforce/blip-image-captioning-large on CPU.",
    inputs=[
        gr.Image(type='filepath', label='Image'),
        gr.Slider(label='Min New Tokens', minimum=1, maximum=50, value=5),
        gr.Slider(label='Max New Tokens', minimum=1, maximum=100, value=20),
    ],
    outputs=gr.Textbox(label='Caption'),
    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
)
iface.launch()