File size: 4,288 Bytes
72565dd
 
 
 
 
 
cc083b4
 
72565dd
 
 
 
 
 
 
 
 
 
cc083b4
72565dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import base64
import requests
from pathlib import Path
from openai import OpenAI
from urllib.parse import urlparse
from dotenv import load_dotenv


def describe_image(image_path: str) -> str:
    """
    Generate a description of the image at the given path or URL.
    
    Args:
        image_path: Path to local image file OR URL to image
    
    Returns:
        A string description of the image """
    load_dotenv()
    
    # Check if API key is available
    api_key = os.getenv("NEBIUS_API_KEY")
    if not api_key:
        return "Error: NEBIUS_API_KEY environment variable not set"
    
    try:
        # Determine if it's a URL or local file path
        parsed = urlparse(image_path)
        is_url = bool(parsed.scheme and parsed.netloc)
        
        if is_url:
            # Handle URL
            print(f"📡 Downloading image from URL: {image_path}")
            response = requests.get(image_path, timeout=30)
            response.raise_for_status()
            image_data = response.content
            
            # Determine content type from response headers
            content_type = response.headers.get('content-type', '')
            if 'image' not in content_type:
                return f"Error: URL does not appear to contain an image. Content-Type: {content_type}"
                
        else:
            # Handle local file
            image_path = Path(image_path)
            
            if not image_path.exists():
                return f"Error: Local file not found: {image_path}"
            
            # Check if it's an image file
            valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
            if image_path.suffix.lower() not in valid_extensions:
                return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}"
            
            print(f"📁 Reading local image: {image_path}")
            with open(image_path, "rb") as f:
                image_data = f.read()
        
        # Encode image to base64
        base64_image = base64.b64encode(image_data).decode('utf-8')
        
        # Create OpenAI client
        client = OpenAI(
            base_url="https://api.studio.nebius.com/v1/",
            api_key=api_key
        )  
        
        # Make API call with proper vision format
        response = client.chat.completions.create(
            model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details."
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "Please provide a detailed description of this image."
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            max_tokens=500
        )
        
        description = response.choices[0].message.content.strip()
        return description
        
    except requests.RequestException as e:
        return f"Error downloading image from URL: {str(e)}"
    except FileNotFoundError:
        return f"Error: File not found: {image_path}"
    except Exception as e:
        error_msg = str(e)
        
        if "vision" in error_msg.lower() or "image" in error_msg.lower():
            return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}"
        elif "401" in error_msg or "unauthorized" in error_msg.lower():
            return "Error: Invalid API key or insufficient permissions"
        elif "rate" in error_msg.lower() or "quota" in error_msg.lower():
            return f"Error: API rate limit or quota exceeded: {error_msg}"
        else:
            return f"Error processing image: {error_msg}"