Spaces:

AgentsGuards
/

image_utilities_mcp

Running

File size: 4,288 Bytes

import os
import base64
import requests
from pathlib import Path
from openai import OpenAI
from urllib.parse import urlparse
from dotenv import load_dotenv


def describe_image(image_path: str) -> str:
    """
    Generate a description of the image at the given path or URL.
    
    Args:
        image_path: Path to local image file OR URL to image
    
    Returns:
        A string description of the image """
    load_dotenv()
    
    # Check if API key is available
    api_key = os.getenv("NEBIUS_API_KEY")
    if not api_key:
        return "Error: NEBIUS_API_KEY environment variable not set"
    
    try:
        # Determine if it's a URL or local file path
        parsed = urlparse(image_path)
        is_url = bool(parsed.scheme and parsed.netloc)
        
        if is_url:
            # Handle URL
            print(f"📡 Downloading image from URL: {image_path}")
            response = requests.get(image_path, timeout=30)
            response.raise_for_status()
            image_data = response.content
            
            # Determine content type from response headers
            content_type = response.headers.get('content-type', '')
            if 'image' not in content_type:
                return f"Error: URL does not appear to contain an image. Content-Type: {content_type}"
                
        else:
            # Handle local file
            image_path = Path(image_path)
            
            if not image_path.exists():
                return f"Error: Local file not found: {image_path}"
            
            # Check if it's an image file
            valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
            if image_path.suffix.lower() not in valid_extensions:
                return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}"
            
            print(f"📁 Reading local image: {image_path}")
            with open(image_path, "rb") as f:
                image_data = f.read()
        
        # Encode image to base64
        base64_image = base64.b64encode(image_data).decode('utf-8')
        
        # Create OpenAI client
        client = OpenAI(
            base_url="https://api.studio.nebius.com/v1/",
            api_key=api_key
        )  
        
        # Make API call with proper vision format
        response = client.chat.completions.create(
            model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details."
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "Please provide a detailed description of this image."
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            max_tokens=500
        )
        
        description = response.choices[0].message.content.strip()
        return description
        
    except requests.RequestException as e:
        return f"Error downloading image from URL: {str(e)}"
    except FileNotFoundError:
        return f"Error: File not found: {image_path}"
    except Exception as e:
        error_msg = str(e)
        
        if "vision" in error_msg.lower() or "image" in error_msg.lower():
            return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}"
        elif "401" in error_msg or "unauthorized" in error_msg.lower():
            return "Error: Invalid API key or insufficient permissions"
        elif "rate" in error_msg.lower() or "quota" in error_msg.lower():
            return f"Error: API rate limit or quota exceeded: {error_msg}"
        else:
            return f"Error processing image: {error_msg}"