import gradio as gr from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration import torch import base64 from io import BytesIO # Modell vorbereiten device = torch.device("cuda" if torch.cuda.is_available() else "cpu") processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device) # Hilfsfunktion: Bild in base64 für HTML-Thumbnail umwandeln def image_to_base64_html(img): buffered = BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() html = f'' return html # Hauptfunktion: Bild → Beschreibung → beides in Chat def describe_image_with_thumbnail(image, history): if image is None: return history # Beschreibung generieren inputs = processor(images=image, return_tensors="pt").to(device) output = model.generate(**inputs) caption = processor.decode(output[0], skip_special_tokens=True) # Bild in HTML konvertieren image_html = image_to_base64_html(image) # Chatverlauf aktualisieren (Bild + Beschreibung) history.append((image_html, caption)) return history # UI bauen with gr.Blocks() as demo: gr.Markdown("## 🤖 Bildbeschreibung-Chatbot mit Thumbnail-Vorschau") chatbot = gr.Chatbot(label="Bilder-Chat") with gr.Row(): image_input = gr.Image(type="pil", label="Bild hier hochladen") send_btn = gr.Button("Bild analysieren") send_btn.click(fn=describe_image_with_thumbnail, inputs=[image_input, chatbot], outputs=chatbot) demo.launch()