Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from huggingface_hub import InferenceClient | |
from PIL import Image | |
import os | |
def initialize(): | |
if 'initialized' not in st.session_state: # Initialize only once | |
print("Initializing...") | |
st.session_state['initialized'] = True | |
st.session_state['api_key'] = os.getenv("HUGGINGFACE_TOKEN") | |
st.session_state['client'] = InferenceClient(api_key=st.session_state['api_key']) | |
def main(): | |
initialize() | |
st.header("Character Captions") | |
st.write("Have a character caption any image you upload!") | |
character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible", "cookie monster"]) | |
uploaded_img = st.file_uploader("Upload an image") | |
if uploaded_img is not None: | |
# Open Image | |
image = Image.open(uploaded_img) | |
st.image(image) | |
# Get caption from image | |
image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") | |
response = image_captioner(image) | |
caption = response[0]['generated_text'] | |
# Pass the caption to a character prompt | |
character_prompts = { | |
"rapper": f"Describe this caption like you're a rapper: {caption}.", | |
"shrek": f"Describe this caption like you're Shrek: {caption}.", | |
"unintelligible": f"Describe this caption in a way that makes no sense: {caption}.", | |
"cookie monster": f"Describe this caption like you're cookie monster: {caption}." | |
} | |
prompt = character_prompts[character] | |
messages = [ | |
{ "role": "user", "content": prompt } | |
] | |
# Pass to Llama for character output regarding image caption | |
stream = st.session_state['client'].chat.completions.create( | |
model="meta-llama/Llama-3.2-3B-Instruct", | |
messages=messages, | |
max_tokens=500, | |
stream=True | |
) | |
response = '' | |
for chunk in stream: | |
response += chunk.choices[0].delta.content | |
st.write(response) | |
if __name__ == '__main__': | |
main() | |