Spaces:

bielas194
/

image_generate_search

Runtime error

App Files Files Community

bielas194 commited on Jun 12

Commit

63621b6

verified ·

1 Parent(s): 90a9f10

Create app.py

Browse files

Files changed (1) hide show

app.py +165 -0

app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import os
+import torch
+import gradio as gr
+from PIL import Image
+import tempfile
+import shutil
+from functools import partial # To create a callable for our custom tool
+from diffusers import StableDiffusionPipeline
+from huggingface_hub import InferenceClient
+# LangChain imports
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.tools import tool
+from langchain_community.tools import DuckDuckGoSearchRun
+from langchain_community.llms import HuggingFaceHub
+from langchain.agents import AgentExecutor, create_react_agent
+from langchain.schema import HumanMessage, AIMessage
+# --- 1. Load Stable Diffusion Pipeline (happens once at startup) ---
+HF_TOKEN = os.environ.get("HF_TOKEN") # Using HF_TOKEN for consistency with HuggingFaceHub LLM
+# Define the model ID for image generation
+IMAGE_GEN_MODEL_ID = "segmind/tiny-sd" # Using the smaller model as it loaded successfully
+# IMAGE_GEN_MODEL_ID = "runwayml/stable-diffusion-v1-5" # You can try this again after proving basic functionality
+print(f"Loading Stable Diffusion Pipeline directly on GPU: {IMAGE_GEN_MODEL_ID}...")
+try:
+    pipe = StableDiffusionPipeline.from_pretrained(
+        IMAGE_GEN_MODEL_ID,
+        torch_dtype=torch.float16,
+        use_safetensors=False, # Set to False for models that don't have safetensors (like tiny-sd)
+        token=HF_TOKEN # Pass token for potential faster model download
+    )
+    pipe.to("cuda") # Move the model to the GPU
+    print(f"Stable Diffusion Pipeline ({IMAGE_GEN_MODEL_ID}) loaded successfully on GPU.")
+except Exception as e:
+    print("❌ Error loading Stable Diffusion Pipeline:")
+    import traceback
+    traceback.print_exc()
+    pipe = None # Indicate failure to load
+# --- 2. Define Custom Image Generation Tool for LangChain ---
+# Use @tool decorator to make a function a LangChain tool
+@tool
+def image_generator(prompt: str) -> str:
+    """
+    Generates an image from a detailed text prompt using a Stable Diffusion pipeline.
+    The input MUST be a detailed text description for the image to generate.
+    """
+    if pipe is None:
+        return "Error: Image generation pipeline failed to load. Please check Space logs during startup."
+    print(f"\n--- Agent is calling image_generator with prompt: '{prompt}' ---")
+    try:
+        with torch.no_grad():
+            pil_image = pipe(prompt, guidance_scale=7.5, height=512, width=512).images[0]
+        # Save the PIL image to a temporary file, Gradio will handle displaying this path
+        # NOTE: LangChain tools typically return strings. For image display, we'll return
+        # the path, and handle its display in the Gradio UI directly based on content.
+        temp_dir = tempfile.mkdtemp()
+        image_path = os.path.join(temp_dir, "generated_image.png")
+        pil_image.save(image_path)
+        print(f"Image saved to temporary path: {image_path}")
+        # Return a special string prefix so Gradio knows it's an image path
+        return f"__IMAGE_PATH__:{image_path}"
+    except Exception as e:
+        print("Error in image_generator tool execution:")
+        traceback.print_exc()
+        return f"Error generating image: {str(e)}"
+# --- 3. Define other Tools for LangChain ---
+search = DuckDuckGoSearchRun()
+# --- 4. Define the LangChain Agent ---
+# Ensure models are loaded successfully before proceeding
+if pipe is None:
+    raise RuntimeError("Cannot start agent as image generation pipeline failed to load. Check logs.")
+# Instantiate the LLM for the agent
+# Using HuggingFaceHub to connect to Zephyr-7b-beta model on HF Inference API
+# Ensure HF_TOKEN is set as a Space Secret
+llm = HuggingFaceHub(
+    repo_id="HuggingFaceH4/zephyr-7b-beta",
+    huggingfacehub_api_token=HF_TOKEN, # Use HF_TOKEN directly as required by HuggingFaceHub LLM
+    model_kwargs={"temperature": 0.5, "max_new_tokens": 512}
+)
+# Create the tools list
+tools = [image_generator, search]
+# Define the agent prompt
+# This prompt guides the LLM on how to use the tools
+prompt_template = ChatPromptTemplate.from_messages(
+    [
+        ("system", """You are a powerful AI assistant that can generate images and search the web.
+You have access to the following tools: {tools}
+When you need to generate an image, use the `image_generator` tool. Its input must be a very detailed, descriptive text string.
+When you need factual information or context, use the `search` tool.
+Always follow these steps:
+1. Think step-by-step: Analyze the user's request and determine if you need to search or generate an image.
+2. If you need to search, use the `search` tool.
+3. If you need to generate an image, ensure you have enough detail. If not, ask for more or use search.
+4. When you have enough information, use the `image_generator` tool.
+5. Provide your final answer. If you generated an image, include the image in your final answer.
+"""),
+        MessagesPlaceholder(variable_name="chat_history"),
+        ("human", "{input}"),
+        MessagesPlaceholder(variable_name="agent_scratchpad"),
+    ]
+)
+# Create the agent
+agent = create_react_agent(llm, tools, prompt_template)
+# Create the agent executor
+agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)
+# --- 5. Gradio UI Integration ---
+# Function to run the agent and display output
+def run_agent_in_gradio(message, history):
+    # Convert Gradio history to LangChain chat_history format
+    chat_history = []
+    for human_msg, ai_msg in history:
+        chat_history.append(HumanMessage(content=human_msg))
+        chat_history.append(AIMessage(content=ai_msg))
+    try:
+        # Stream output from the agent
+        # LangChain AgentExecutor doesn't directly stream token by token in a simple loop
+        # For streaming, you'd typically use .stream() or a custom callback handler.
+        # For simplicity in Gradio ChatInterface, we'll run it once.
+        response = agent_executor.invoke({"input": message, "chat_history": chat_history})
+        agent_output = response["output"]
+        # Check if the output is an image path from our custom tool
+        if agent_output.startswith("__IMAGE_PATH__:") :
+            image_path = agent_output.replace("__IMAGE_PATH__:", "")
+            # Return the Gradio Image component directly
+            return gr.Image(value=image_path, label="Generated Image")
+        else:
+            # Return regular text
+            return agent_output
+    except Exception as e:
+        print(f"Error running agent: {e}")
+        traceback.print_exc()
+        return f"❌ Agent encountered an error: {str(e)}"
+# Gradio ChatInterface setup
+demo = gr.ChatInterface(
+    fn=run_agent_in_gradio,
+    chatbot=gr.Chatbot(label="AI Agent"),
+    textbox=gr.Textbox(placeholder="Ask me to generate an image or search the web...", container=False, scale=7),
+    title="Intelligent Image Generator & Web Search Agent (LangChain)",
+    description="This agent can generate images based on prompts or search the web for information first."
+)
+if __name__ == "__main__":
+    demo.launch()