Spaces:

Claudz163
/

Image-To-Character-Text

Sleeping

App Files Files Community

Image-To-Character-Text / app.py

Claudz163

added main

f9d8d7e 10 months ago

raw

history blame

2.14 kB

	import streamlit as st
	from transformers import pipeline
	from huggingface_hub import InferenceClient
	from PIL import Image
	import os


	def initialize():
	if 'initialized' not in st.session_state: # Initialize only once
	print("Initializing...")
	st.session_state['initialized'] = True
	st.session_state['api_key'] = os.getenv("HUGGINGFACE_TOKEN")
	st.session_state['client'] = InferenceClient(api_key=st.session_state['api_key'])


	def main():
	initialize()
	st.header("Character Captions")
	st.write("Have a character caption any image you upload!")
	character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible", "cookie monster"])

	uploaded_img = st.file_uploader("Upload an image")

	if uploaded_img is not None:
	# Open Image
	image = Image.open(uploaded_img)
	st.image(image)

	# Get caption from image
	image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
	response = image_captioner(image)
	caption = response[0]['generated_text']


	# Pass the caption to a character prompt
	character_prompts = {
	"rapper": f"Describe this caption like you're a rapper: {caption}.",
	"shrek": f"Describe this caption like you're Shrek: {caption}.",
	"unintelligible": f"Describe this caption in a way that makes no sense: {caption}.",
	"cookie monster": f"Describe this caption like you're cookie monster: {caption}."
	}

	prompt = character_prompts[character]
	messages = [
	{ "role": "user", "content": prompt }
	]

	# Pass to Llama for character output regarding image caption
	stream = st.session_state['client'].chat.completions.create(
	model="meta-llama/Llama-3.2-3B-Instruct",
	messages=messages,
	max_tokens=500,
	stream=True
	)

	response = ''
	for chunk in stream:
	response += chunk.choices[0].delta.content

	st.write(response)



	if __name__ == '__main__':
	main()