import os import base64 import requests from pathlib import Path from openai import OpenAI from urllib.parse import urlparse from dotenv import load_dotenv def describe_image(image_path: str) -> str: """ Generate a description of the image at the given path or URL. Args: image_path: Path to local image file OR URL to image Returns: A string description of the image """ load_dotenv() # Check if API key is available api_key = os.getenv("NEBIUS_API_KEY") if not api_key: return "Error: NEBIUS_API_KEY environment variable not set" try: # Determine if it's a URL or local file path parsed = urlparse(image_path) is_url = bool(parsed.scheme and parsed.netloc) if is_url: # Handle URL print(f"📡 Downloading image from URL: {image_path}") response = requests.get(image_path, timeout=30) response.raise_for_status() image_data = response.content # Determine content type from response headers content_type = response.headers.get('content-type', '') if 'image' not in content_type: return f"Error: URL does not appear to contain an image. Content-Type: {content_type}" else: # Handle local file image_path = Path(image_path) if not image_path.exists(): return f"Error: Local file not found: {image_path}" # Check if it's an image file valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'} if image_path.suffix.lower() not in valid_extensions: return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}" print(f"📁 Reading local image: {image_path}") with open(image_path, "rb") as f: image_data = f.read() # Encode image to base64 base64_image = base64.b64encode(image_data).decode('utf-8') # Create OpenAI client client = OpenAI( base_url="https://api.studio.nebius.com/v1/", api_key=api_key ) # Make API call with proper vision format response = client.chat.completions.create( model="mistralai/Mistral-Small-3.1-24B-Instruct-2503", messages=[ { "role": "system", "content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details." }, { "role": "user", "content": [ { "type": "text", "text": "Please provide a detailed description of this image." }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], max_tokens=500 ) description = response.choices[0].message.content.strip() return description except requests.RequestException as e: return f"Error downloading image from URL: {str(e)}" except FileNotFoundError: return f"Error: File not found: {image_path}" except Exception as e: error_msg = str(e) if "vision" in error_msg.lower() or "image" in error_msg.lower(): return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}" elif "401" in error_msg or "unauthorized" in error_msg.lower(): return "Error: Invalid API key or insufficient permissions" elif "rate" in error_msg.lower() or "quota" in error_msg.lower(): return f"Error: API rate limit or quota exceeded: {error_msg}" else: return f"Error processing image: {error_msg}"