Spaces:
Running
Running
#imagetext-to-text | |
import gradio as gr | |
import base64 | |
from huggingface_hub import InferenceClient | |
client = InferenceClient('nanonets/Nanonets-OCR-s') | |
def imageDescription(image, prompt): | |
image_path="image.png" | |
image.save(image_path) | |
with open(image_path, "rb") as f: | |
base64_image = base64.b64encode(f.read()).decode("utf-8") | |
image_url = f"data:image/png;base64,{base64_image}" | |
output = client.chat.completions.create(messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "image_url", | |
"image_url": {"url": image_url}, | |
}, | |
{ | |
"type": "text", | |
"text": prompt, | |
}, | |
], | |
}, | |
], | |
) | |
return output.choices[0].message.content | |
with gr.Blocks(theme=gr.themes.Citrus()) as demo: | |
with gr.Row(): | |
with gr.Column(): | |
#an image input | |
image=gr.Image(type="pil", label="upload an immage") | |
with gr.Column(): | |
prompt = gr.Textbox(label="What would you like to know about this picture?",scale=1) | |
describe_btn = gr.Button("Describe the image",scale=1) | |
output = gr.Textbox(label="Description",scale=1) | |
#sending two inputs to imageDescription function | |
describe_btn.click(fn=imageDescription, inputs=[image, prompt], outputs=output) | |
demo.launch(debug=True) | |