File size: 6,613 Bytes
f06f058
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Helper functions for the Streamlit app

import google.generativeai as genai
import logging
import json
from PIL import Image
import io

# Configure logging
logging.basicConfig(level=logging.INFO)

# --- Constants --- #
MAX_IMAGE_DIMENSION = 3500 # Increased max dimension for better detail
MAX_IMAGE_MB = 16 # Slightly increase target size limit as quality is higher
TARGET_COMPRESSION_QUALITY = 35 # Increased JPEG quality (less compression, more quality)

# --- Gemini Configuration --- #
def configure_gemini(api_key):
    """Configures the Gemini client library and returns the model instance."""
    if not api_key:
        logging.warning("GOOGLE_API_KEY not found. Gemini API cannot be configured.")
        return None
    try:
        genai.configure(api_key=api_key)
        # Use a model capable of multimodal input (image+text)
        model = genai.GenerativeModel('gemini-2.5-flash-preview-04-17')
        logging.info("Gemini model initialized successfully (gemini-2.5-flash-preview-04-17).")
        return model
    except Exception as e:
        logging.error(f"Failed to configure Gemini API or initialize model: {e}", exc_info=True)
        return None

# --- Image Processing --- #
def resize_image(image_bytes):
    """Resizes and compresses image bytes using Pillow."""
    try:
        img = Image.open(io.BytesIO(image_bytes))
        original_format = img.format or 'PNG'
        logging.info(f"Opened image for processing. Original format: {original_format}, Original mode: {img.mode}")
        
        # 1. Resize if necessary
        width, height = img.size
        if width > MAX_IMAGE_DIMENSION or height > MAX_IMAGE_DIMENSION:
            logging.info(f"Resizing image from {width}x{height} to max {MAX_IMAGE_DIMENSION}px dimension.")
            img.thumbnail((MAX_IMAGE_DIMENSION, MAX_IMAGE_DIMENSION), Image.Resampling.LANCZOS)
        
        # 2. Compress
        output_buffer = io.BytesIO()
        save_format = 'JPEG' # Often provides good compression for photos/screenshots
        quality = TARGET_COMPRESSION_QUALITY
        
        # Handle transparency for JPEG conversion
        if img.mode in ('RGBA', 'P', 'LA'):
            logging.info(f"Converting image mode {img.mode} to RGB for JPEG saving.")
            # Create a white background image
            background = Image.new('RGB', img.size, (255, 255, 255))
            # Paste the image onto the background using the alpha channel as mask
            try:
                 if img.mode == 'P': # Ensure palette image is converted properly
                      img = img.convert('RGBA')
                 background.paste(img, mask=img.split()[-1]) 
                 img = background
            except Exception as paste_err:
                 logging.warning(f"Could not properly handle transparency during conversion, falling back to simple RGB conversion: {paste_err}")
                 img = img.convert('RGB') # Fallback if pasting fails
        elif img.mode != 'RGB':
             img = img.convert('RGB')

        # Save with compression
        img.save(output_buffer, format=save_format, quality=quality, optimize=True)
        compressed_bytes = output_buffer.getvalue()
        
        # Check final size (optional, could implement iterative compression)
        final_size_mb = len(compressed_bytes) / (1024 * 1024)
        if final_size_mb > MAX_IMAGE_MB:
             logging.warning(f"Compressed image size ({final_size_mb:.2f} MB) still exceeds target ({MAX_IMAGE_MB} MB). Consider adjusting quality further if needed.")
             
        return compressed_bytes, save_format

    except Exception as e:
        logging.error(f"Error processing image: {e}", exc_info=True)
        # Fallback: Return original bytes if processing fails
        # This might still cause Gemini issues if the original is too large/unsupported
        return image_bytes, original_format 

# --- Gemini Analysis --- #
def process_error_response(text_response):
    """Fallback processor if Gemini doesn't return valid JSON."""
    logging.warning("Gemini response was not valid JSON. Returning raw text.")
    # In a Streamlit context, returning the raw text might be more useful
    # than a fixed error dict, as it can be displayed directly.
    return {
        "analysis_error": "Response was not valid JSON",
        "raw_text": text_response
    }

def analyze_input_with_gemini(gemini_model, prompt, image_bytes=None, text_content=None):
    """
    Sends the prompt and either image bytes or text content to the Gemini model.
    Handles potential errors and parses the JSON response.
    """
    if not gemini_model:
        raise ValueError("Gemini model not configured.")

    if image_bytes is None and text_content is None:
        raise ValueError("No input (image or text) provided for analysis.")

    content_payload = [prompt]
    input_type = ""

    try:
        if image_bytes:
            input_type = "Image"
            # Determine MIME type based on how we saved it (likely JPEG)
            # Or could try to sniff bytes, but saving as JPEG is safer
            mime_type = "image/jpeg"
            img_part = {"mime_type": mime_type, "data": image_bytes}
            content_payload.append(img_part)
            logging.info(f"Preparing Gemini request with processed image ({len(image_bytes)/(1024*1024):.2f} MB)")
        else:
            input_type = "Text"
            content_payload.append(text_content)
            logging.info("Preparing Gemini request with text content.")

        # Make the API call
        response = gemini_model.generate_content(content_payload)
        response.resolve()
        logging.info("Received response from Gemini API.")

        # Process the response
        try:
            # Clean potential markdown backticks and parse JSON
            cleaned_text = response.text.strip().removeprefix('```json').removesuffix('```').strip()
            result = json.loads(cleaned_text)
            # Inject input type
            if isinstance(result, dict):
                result['input_type'] = input_type
            logging.info("Successfully parsed JSON response from Gemini.")
            return result
        except (json.JSONDecodeError, AttributeError) as e:
            logging.error(f"Failed to decode or process JSON response: {e}")
            return process_error_response(response.text)

    except Exception as e:
        logging.error(f"Gemini API call failed: {e}", exc_info=True)
        # Re-raise a more generic error for the Streamlit app to catch
        raise RuntimeError(f"Analysis failed due to API error: {e}")