File size: 6,613 Bytes
f06f058 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
# Helper functions for the Streamlit app
import google.generativeai as genai
import logging
import json
from PIL import Image
import io
# Configure logging
logging.basicConfig(level=logging.INFO)
# --- Constants --- #
MAX_IMAGE_DIMENSION = 3500 # Increased max dimension for better detail
MAX_IMAGE_MB = 16 # Slightly increase target size limit as quality is higher
TARGET_COMPRESSION_QUALITY = 35 # Increased JPEG quality (less compression, more quality)
# --- Gemini Configuration --- #
def configure_gemini(api_key):
"""Configures the Gemini client library and returns the model instance."""
if not api_key:
logging.warning("GOOGLE_API_KEY not found. Gemini API cannot be configured.")
return None
try:
genai.configure(api_key=api_key)
# Use a model capable of multimodal input (image+text)
model = genai.GenerativeModel('gemini-2.5-flash-preview-04-17')
logging.info("Gemini model initialized successfully (gemini-2.5-flash-preview-04-17).")
return model
except Exception as e:
logging.error(f"Failed to configure Gemini API or initialize model: {e}", exc_info=True)
return None
# --- Image Processing --- #
def resize_image(image_bytes):
"""Resizes and compresses image bytes using Pillow."""
try:
img = Image.open(io.BytesIO(image_bytes))
original_format = img.format or 'PNG'
logging.info(f"Opened image for processing. Original format: {original_format}, Original mode: {img.mode}")
# 1. Resize if necessary
width, height = img.size
if width > MAX_IMAGE_DIMENSION or height > MAX_IMAGE_DIMENSION:
logging.info(f"Resizing image from {width}x{height} to max {MAX_IMAGE_DIMENSION}px dimension.")
img.thumbnail((MAX_IMAGE_DIMENSION, MAX_IMAGE_DIMENSION), Image.Resampling.LANCZOS)
# 2. Compress
output_buffer = io.BytesIO()
save_format = 'JPEG' # Often provides good compression for photos/screenshots
quality = TARGET_COMPRESSION_QUALITY
# Handle transparency for JPEG conversion
if img.mode in ('RGBA', 'P', 'LA'):
logging.info(f"Converting image mode {img.mode} to RGB for JPEG saving.")
# Create a white background image
background = Image.new('RGB', img.size, (255, 255, 255))
# Paste the image onto the background using the alpha channel as mask
try:
if img.mode == 'P': # Ensure palette image is converted properly
img = img.convert('RGBA')
background.paste(img, mask=img.split()[-1])
img = background
except Exception as paste_err:
logging.warning(f"Could not properly handle transparency during conversion, falling back to simple RGB conversion: {paste_err}")
img = img.convert('RGB') # Fallback if pasting fails
elif img.mode != 'RGB':
img = img.convert('RGB')
# Save with compression
img.save(output_buffer, format=save_format, quality=quality, optimize=True)
compressed_bytes = output_buffer.getvalue()
# Check final size (optional, could implement iterative compression)
final_size_mb = len(compressed_bytes) / (1024 * 1024)
if final_size_mb > MAX_IMAGE_MB:
logging.warning(f"Compressed image size ({final_size_mb:.2f} MB) still exceeds target ({MAX_IMAGE_MB} MB). Consider adjusting quality further if needed.")
return compressed_bytes, save_format
except Exception as e:
logging.error(f"Error processing image: {e}", exc_info=True)
# Fallback: Return original bytes if processing fails
# This might still cause Gemini issues if the original is too large/unsupported
return image_bytes, original_format
# --- Gemini Analysis --- #
def process_error_response(text_response):
"""Fallback processor if Gemini doesn't return valid JSON."""
logging.warning("Gemini response was not valid JSON. Returning raw text.")
# In a Streamlit context, returning the raw text might be more useful
# than a fixed error dict, as it can be displayed directly.
return {
"analysis_error": "Response was not valid JSON",
"raw_text": text_response
}
def analyze_input_with_gemini(gemini_model, prompt, image_bytes=None, text_content=None):
"""
Sends the prompt and either image bytes or text content to the Gemini model.
Handles potential errors and parses the JSON response.
"""
if not gemini_model:
raise ValueError("Gemini model not configured.")
if image_bytes is None and text_content is None:
raise ValueError("No input (image or text) provided for analysis.")
content_payload = [prompt]
input_type = ""
try:
if image_bytes:
input_type = "Image"
# Determine MIME type based on how we saved it (likely JPEG)
# Or could try to sniff bytes, but saving as JPEG is safer
mime_type = "image/jpeg"
img_part = {"mime_type": mime_type, "data": image_bytes}
content_payload.append(img_part)
logging.info(f"Preparing Gemini request with processed image ({len(image_bytes)/(1024*1024):.2f} MB)")
else:
input_type = "Text"
content_payload.append(text_content)
logging.info("Preparing Gemini request with text content.")
# Make the API call
response = gemini_model.generate_content(content_payload)
response.resolve()
logging.info("Received response from Gemini API.")
# Process the response
try:
# Clean potential markdown backticks and parse JSON
cleaned_text = response.text.strip().removeprefix('```json').removesuffix('```').strip()
result = json.loads(cleaned_text)
# Inject input type
if isinstance(result, dict):
result['input_type'] = input_type
logging.info("Successfully parsed JSON response from Gemini.")
return result
except (json.JSONDecodeError, AttributeError) as e:
logging.error(f"Failed to decode or process JSON response: {e}")
return process_error_response(response.text)
except Exception as e:
logging.error(f"Gemini API call failed: {e}", exc_info=True)
# Re-raise a more generic error for the Streamlit app to catch
raise RuntimeError(f"Analysis failed due to API error: {e}") |