JuanjoSG5
curretn progress
cc083b4
raw
history blame
4.29 kB
import os
import base64
import requests
from pathlib import Path
from openai import OpenAI
from urllib.parse import urlparse
from dotenv import load_dotenv
def describe_image(image_path: str) -> str:
"""
Generate a description of the image at the given path or URL.
Args:
image_path: Path to local image file OR URL to image
Returns:
A string description of the image """
load_dotenv()
# Check if API key is available
api_key = os.getenv("NEBIUS_API_KEY")
if not api_key:
return "Error: NEBIUS_API_KEY environment variable not set"
try:
# Determine if it's a URL or local file path
parsed = urlparse(image_path)
is_url = bool(parsed.scheme and parsed.netloc)
if is_url:
# Handle URL
print(f"๐Ÿ“ก Downloading image from URL: {image_path}")
response = requests.get(image_path, timeout=30)
response.raise_for_status()
image_data = response.content
# Determine content type from response headers
content_type = response.headers.get('content-type', '')
if 'image' not in content_type:
return f"Error: URL does not appear to contain an image. Content-Type: {content_type}"
else:
# Handle local file
image_path = Path(image_path)
if not image_path.exists():
return f"Error: Local file not found: {image_path}"
# Check if it's an image file
valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
if image_path.suffix.lower() not in valid_extensions:
return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}"
print(f"๐Ÿ“ Reading local image: {image_path}")
with open(image_path, "rb") as f:
image_data = f.read()
# Encode image to base64
base64_image = base64.b64encode(image_data).decode('utf-8')
# Create OpenAI client
client = OpenAI(
base_url="https://api.studio.nebius.com/v1/",
api_key=api_key
)
# Make API call with proper vision format
response = client.chat.completions.create(
model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
messages=[
{
"role": "system",
"content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details."
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "Please provide a detailed description of this image."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
max_tokens=500
)
description = response.choices[0].message.content.strip()
return description
except requests.RequestException as e:
return f"Error downloading image from URL: {str(e)}"
except FileNotFoundError:
return f"Error: File not found: {image_path}"
except Exception as e:
error_msg = str(e)
if "vision" in error_msg.lower() or "image" in error_msg.lower():
return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}"
elif "401" in error_msg or "unauthorized" in error_msg.lower():
return "Error: Invalid API key or insufficient permissions"
elif "rate" in error_msg.lower() or "quota" in error_msg.lower():
return f"Error: API rate limit or quota exceeded: {error_msg}"
else:
return f"Error processing image: {error_msg}"