import gradio as gr
from transformers import pipeline

# Initialize the zero-shot-object-detection pipeline
pipe = pipeline("zero-shot-object-detection", model="google/owlvit-base-patch16")

# Define the function to use the pipeline
def detect_objects(image, labels):
    # Split the labels into a list
    candidate_labels = [label.strip() for label in labels.split(",")]
    result = pipe(image, candidate_labels=candidate_labels)
    # Return the detected objects and their confidence scores
    return result

# Create the Gradio interface
iface = gr.Interface(
    fn=detect_objects,  # function to process input
    inputs=[
        gr.Image(type="filepath", label="Upload Image"),  # input for image using updated gr.Image
        gr.Textbox(lines=2, label="Candidate Labels (comma separated)"),  # input for candidate labels
    ],
    outputs=gr.JSON(),  # output as JSON for multiple object detection results
    title="Zero-Shot Object Detection",  # Title of the interface
    description="Upload an image and provide a list of labels (comma separated) for object detection.",  # Description
)

# Launch the interface
iface.launch()