import gradio as gr
from optimum.intel import OVDiffusionPipeline
from transformers import AutoTokenizer
from threading import Lock
import warnings

# Suppress deprecation warnings (optional)
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Load the pipeline globally
model_id = "OpenVINO/FLUX.1-schnell-int4-ov"
pipeline = OVDiffusionPipeline.from_pretrained(model_id, device="CPU")

# Explicitly load and configure the tokenizer
# FLUX.1-schnell uses a T5-based encoder, typically t5-v1_1-xl or similar
tokenizer = AutoTokenizer.from_pretrained("google/t5-v1_1-xl", use_fast=True, add_prefix_space=True)
pipeline.text_encoder_2.tokenizer = tokenizer  # Assign to the T5 encoder

lock = Lock()

# Define the image generation function
def generate_image(prompt):
    with lock:
        # Reduce num_inference_steps for faster inference to avoid timeouts
        image = pipeline(prompt, num_inference_steps=2, guidance_scale=3.5).images[0]
    return image

# Create the Gradio interface
interface = gr.Interface(
    fn=generate_image,
    inputs=gr.Textbox(label="Enter your prompt", placeholder="e.g., A futuristic cityscape at sunset"),
    outputs=gr.Image(label="Generated Image"),
    title="FLUX.1-Schnell (OpenVINO INT4) Image Generator",
    description="Generate images from text prompts using FLUX.1-schnell optimized for CPU with OpenVINO.",
    examples=[["A serene mountain landscape"], ["A cyberpunk city at night"]],
    cache_examples=False
)

# Launch the interface
if __name__ == "__main__":
    interface.launch(server_name="0.0.0.0", server_port=7860)  # Explicitly set for Spaces