Spaces:

AgentsGuards
/

image_utilities_mcp

Running

App Files Files Community

RafaelJaime commited on 18 days ago

Commit

417d69b

2 Parent(s): 7b49e90 26e31ab

Merge branch 'main' of https://huggingface.co/spaces/AgentsGuards/agents-guard-mcp

Browse files

Files changed (5) hide show

.gitignore +3 -1
gradio_interface/app.py +201 -4
mcp_server.py +1 -0
src/utils/change_format.py +26 -13
src/utils/resize_image.py +56 -0

.gitignore CHANGED Viewed

	@@ -1 +1,3 @@
1	- __pycache__/

+__pycache__/
+.env
+test_agent.py

gradio_interface/app.py CHANGED Viewed

@@ -1,7 +1,204 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import os
 import gradio as gr
+from os import getenv
+import base64
+from io import BytesIO
+from dotenv import load_dotenv
+import requests
+import socket
+import logging
+import json
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage, AIMessage
+from langchain_core.callbacks import StreamingStdOutCallbackHandler
+# Load environment
+dotenv_path = os.path.join(os.path.dirname(__file__), '.env')
+load_dotenv(dotenv_path=dotenv_path)
+# Connectivity test
+def test_connectivity(url="https://openrouter.helicone.ai/api/v1"):
+    try:
+        return requests.get(url, timeout=5).status_code == 200
+    except (requests.RequestException, socket.error):
+        return False
+# Helper to make direct API calls to OpenRouter when LangChain fails
+def direct_api_call(messages, api_key, base_url):
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}",
+        "HTTP-Referer": "https://your-app-domain.com",  # Add your domain
+        "X-Title": "Image Analysis App"
+    }
+    if getenv("HELICONE_API_KEY"):
+        headers["Helicone-Auth"] = f"Bearer {getenv('HELICONE_API_KEY')}"
+    payload = {
+        "model": "google/gemini-flash-1.5",
+        "messages": messages,
+        "stream": False,
+    }
+    try:
+        response = requests.post(
+            f"{base_url}/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=30
+        )
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Initialize LLM with streaming and retry logic
+def init_llm():
+    if not test_connectivity():
+        raise RuntimeError("No hay conexión a OpenRouter. Verifica red y claves.")
+    return ChatOpenAI(
+        openai_api_key=getenv("OPENROUTER_API_KEY"),
+        openai_api_base=getenv("OPENROUTER_BASE_URL"),
+        model_name="google/gemini-flash-1.5",
+        streaming=True,
+        callbacks=[StreamingStdOutCallbackHandler()],
+        model_kwargs={
+            "extra_headers": {"Helicone-Auth": f"Bearer {getenv('HELICONE_API_KEY')}"}
+        },
+    )
+# Try to initialize LLM but handle failures gracefully
+try:
+    llm = init_llm()
+except Exception as e:
+    llm = None
+# Helpers
+def encode_image_to_base64(pil_image):
+    buffer = BytesIO()
+    pil_image.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode()
+# Core logic
+def generate_response(message, chat_history, image):
+    # Convert chat history to standard format
+    formatted_history = []
+    for msg in chat_history:
+        role = msg.get('role')
+        content = msg.get('content')
+        if role == 'user':
+            formatted_history.append({"role": "user", "content": content})
+        else:
+            formatted_history.append({"role": "assistant", "content": content})
+    # Prepare system message
+    system_msg = {"role": "system", "content": "You are an expert image analysis assistant. Answer succinctly."}
+    # Prepare the latest message with image if provided
+    if image:
+        base64_image = encode_image_to_base64(image)
+        # Format for direct API call (OpenRouter/OpenAI format)
+        api_messages = [system_msg] + formatted_history + [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": message},
+                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
+            ]
+        }]
+        # For LangChain format
+        content_for_langchain = [
+            {"type": "text", "text": message},
+            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
+        ]
+    else:
+        api_messages = [system_msg] + formatted_history + [{"role": "user", "content": message}]
+        content_for_langchain = message
+    # Build LangChain messages
+    lc_messages = [HumanMessage(content="You are an expert image analysis assistant. Answer succinctly.")]
+    for msg in chat_history:
+        role = msg.get('role')
+        content = msg.get('content')
+        if role == 'user':
+            lc_messages.append(HumanMessage(content=content))
+        else:
+            lc_messages.append(AIMessage(content=content))
+    lc_messages.append(HumanMessage(content=content_for_langchain))
+    try:
+        # First try with LangChain
+        if llm:
+            try:
+                try:
+                    stream_iter = llm.stream(lc_messages)
+                    partial = ""
+                    for chunk in stream_iter:
+                        if chunk is None:
+                            continue
+                        content = getattr(chunk, 'content', None)
+                        if content is None:
+                            continue
+                        partial += content
+                        yield partial
+                    # If we got this far, streaming worked
+                    return
+                except Exception as e:
+                    print(f"Streaming failed: {e}. Falling back to non-streaming mode")
+                # Try non-streaming
+                try:
+                    response = llm.invoke(lc_messages)
+                    yield response.content
+                    return
+                except Exception as e:
+                    raise e
+            except Exception as e:
+                raise e
+        response_text = direct_api_call(
+            api_messages,
+            getenv("OPENROUTER_API_KEY"),
+            getenv("OPENROUTER_BASE_URL")
+        )
+        yield response_text
+    except Exception as e:
+        import traceback
+        error_trace = traceback.format_exc()
+        yield f"⚠️ Error al generar respuesta: {str(e)}. Intenta más tarde."
+# Gradio interface
+def process_message(message, chat_history, image):
+    if chat_history is None:
+        chat_history = []
+    if image is None:
+        chat_history.append({'role':'assistant','content':'Por favor sube una imagen.'})
+        return "", chat_history
+    chat_history.append({'role':'user','content':message})
+    chat_history.append({'role':'assistant','content':'⏳ Procesando...'})
+    yield "", chat_history
+    for chunk in generate_response(message, chat_history, image):
+        chat_history[-1]['content'] = chunk
+        yield "", chat_history
+    return "", chat_history
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(type='messages', height=600)
+            msg = gr.Textbox(label="Mensaje", placeholder="Escribe tu pregunta...")
+            clear = gr.ClearButton([msg, chatbot])
+        with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="Sube Imagen")
+            info = gr.Textbox(label="Info Imagen", interactive=False)
+    msg.submit(process_message, [msg, chatbot, image_input], [msg, chatbot])
+    image_input.change(lambda img: f"Tamaño: {img.size}" if img else "Sin imagen.", [image_input], [info])
+demo.launch()

mcp_server.py CHANGED Viewed

@@ -5,6 +5,7 @@ from src.utils.visualize_image import visualize_base64_image
 from src.utils.generate_image import generate_image
 from src.utils.apply_filter import apply_filter
 from src.utils.add_text import add_text_to_image
 from src.utils.watermark import add_watermark, remove_watermark
 from src.utils.describe import describe_image
 from src.utils.compress import compress_image

 from src.utils.generate_image import generate_image
 from src.utils.apply_filter import apply_filter
 from src.utils.add_text import add_text_to_image
+from src.utils.resize_image import resize_image
 from src.utils.watermark import add_watermark, remove_watermark
 from src.utils.describe import describe_image
 from src.utils.compress import compress_image

src/utils/change_format.py CHANGED Viewed

@@ -2,8 +2,9 @@ from PIL import Image
 from io import BytesIO
 import requests
 import base64
-def change_format(image_url: str, target_format: str) -> str:
     """
     Change the format of an image from a URL to the specified target format.
@@ -15,18 +16,30 @@ def change_format(image_url: str, target_format: str) -> str:
         The image converted to the target format as a base64-encoded string.
     """
-    response = requests.get(image_url, timeout=30)
-    response.raise_for_status()
-    # Open the image from bytes
-    img = Image.open(BytesIO(response.content))
     # Convert the image to the target format
-    output = BytesIO()
-    img.save(output, format=target_format)
-    output.seek(0)
-    # Convert to base64 string for JSON serialization
-    encoded_image = base64.b64encode(output.getvalue()).decode('utf-8')
-    return encoded_image  # Return base64 encoded string that can be serialized to JSON

 from io import BytesIO
 import requests
 import base64
+from typing import Union
+def change_format(image: Union[str, BytesIO], target_format: str) -> str:
     """
     Change the format of an image from a URL to the specified target format.
         The image converted to the target format as a base64-encoded string.
     """
+    if not isinstance(image, BytesIO):
+        response = requests.get(image, timeout=30)
+        response.raise_for_status()
+        # Open the image from bytes
+        img = Image.open(BytesIO(response.content))
     # Convert the image to the target format
+        output = BytesIO()
+        img.save(output, format=target_format)
+        output.seek(0)
+        # Convert to base64 string for JSON serialization
+        encoded_image = base64.b64encode(output.getvalue()).decode('utf-8')
+        return encoded_image  # Return base64 encoded string that can be serialized to JSON
+    else:
+        img = Image.open(image)
+        output = BytesIO()
+        img.save(output, format=target_format)
+        output.seek(0)
+        # Convert to base64 string for JSON serialization
+        encoded_image = base64.b64encode(output.getvalue()).decode('utf-8')
+        return encoded_image

src/utils/resize_image.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from PIL import Image
+from io import BytesIO
+import requests
+import base64
+from typing import Union, Tuple
+def resize_image(image_input: Union[str, BytesIO], target_size: Tuple[int, int], return_format: str = "base64") -> str:
+    """
+    Resize an image to the target size while maintaining aspect ratio.
+    Args:
+        image_input: URL, file path, base64 string, or BytesIO object
+        target_size: Tuple (width, height) for the target size
+        return_format: Format to return the image in ("base64" or "pil")
+    Returns:
+        Base64 encoded string of the resized image or PIL Image object
+    """
+    # Convert input to PIL Image
+    if isinstance(image_input, str):
+        if image_input.startswith(('http://', 'https://')):
+            # It's a URL
+            response = requests.get(image_input, timeout=10)
+            response.raise_for_status()
+            image = Image.open(BytesIO(response.content))
+        elif image_input.startswith('data:image'):
+            # It's a base64 data URI
+            base64_data = image_input.split(',')[1]
+            image = Image.open(BytesIO(base64.b64decode(base64_data)))
+        elif ';base64,' not in image_input and len(image_input) > 500:
+            # Likely a raw base64 string
+            image = Image.open(BytesIO(base64.b64decode(image_input)))
+        else:
+            # Assume it's a file path
+            image = Image.open(image_input)
+    elif isinstance(image_input, BytesIO):
+        image = Image.open(image_input)
+    else:
+        raise ValueError("Unsupported image input type")
+    # Calculate the aspect ratio
+    aspect_ratio = min(target_size[0] / image.width, target_size[1] / image.height)
+    # Calculate new size
+    new_size = (int(image.width * aspect_ratio), int(image.height * aspect_ratio))
+    # Resize the image using the proper resampling filter
+    resized_image = image.resize(new_size, Image.LANCZOS)
+    # Return in requested format
+    if return_format.lower() == "base64":
+        buffer = BytesIO()
+        resized_image.save(buffer, format="PNG")
+        return base64.b64encode(buffer.getvalue()).decode('utf-8')
+    else:
+        return resized_image