Spaces:

metehan777
/

image-to-google-discover

Running

File size: 6,613 Bytes

f06f058

# Helper functions for the Streamlit app

import google.generativeai as genai
import logging
import json
from PIL import Image
import io

# Configure logging
logging.basicConfig(level=logging.INFO)

# --- Constants --- #
MAX_IMAGE_DIMENSION = 3500 # Increased max dimension for better detail
MAX_IMAGE_MB = 16 # Slightly increase target size limit as quality is higher
TARGET_COMPRESSION_QUALITY = 35 # Increased JPEG quality (less compression, more quality)

# --- Gemini Configuration --- #
def configure_gemini(api_key):
    """Configures the Gemini client library and returns the model instance."""
    if not api_key:
        logging.warning("GOOGLE_API_KEY not found. Gemini API cannot be configured.")
        return None
    try:
        genai.configure(api_key=api_key)
        # Use a model capable of multimodal input (image+text)
        model = genai.GenerativeModel('gemini-2.5-flash-preview-04-17')
        logging.info("Gemini model initialized successfully (gemini-2.5-flash-preview-04-17).")
        return model
    except Exception as e:
        logging.error(f"Failed to configure Gemini API or initialize model: {e}", exc_info=True)
        return None

# --- Image Processing --- #
def resize_image(image_bytes):
    """Resizes and compresses image bytes using Pillow."""
    try:
        img = Image.open(io.BytesIO(image_bytes))
        original_format = img.format or 'PNG'
        logging.info(f"Opened image for processing. Original format: {original_format}, Original mode: {img.mode}")
        
        # 1. Resize if necessary
        width, height = img.size
        if width > MAX_IMAGE_DIMENSION or height > MAX_IMAGE_DIMENSION:
            logging.info(f"Resizing image from {width}x{height} to max {MAX_IMAGE_DIMENSION}px dimension.")
            img.thumbnail((MAX_IMAGE_DIMENSION, MAX_IMAGE_DIMENSION), Image.Resampling.LANCZOS)
        
        # 2. Compress
        output_buffer = io.BytesIO()
        save_format = 'JPEG' # Often provides good compression for photos/screenshots
        quality = TARGET_COMPRESSION_QUALITY
        
        # Handle transparency for JPEG conversion
        if img.mode in ('RGBA', 'P', 'LA'):
            logging.info(f"Converting image mode {img.mode} to RGB for JPEG saving.")
            # Create a white background image
            background = Image.new('RGB', img.size, (255, 255, 255))
            # Paste the image onto the background using the alpha channel as mask
            try:
                 if img.mode == 'P': # Ensure palette image is converted properly
                      img = img.convert('RGBA')
                 background.paste(img, mask=img.split()[-1]) 
                 img = background
            except Exception as paste_err:
                 logging.warning(f"Could not properly handle transparency during conversion, falling back to simple RGB conversion: {paste_err}")
                 img = img.convert('RGB') # Fallback if pasting fails
        elif img.mode != 'RGB':
             img = img.convert('RGB')

        # Save with compression
        img.save(output_buffer, format=save_format, quality=quality, optimize=True)
        compressed_bytes = output_buffer.getvalue()
        
        # Check final size (optional, could implement iterative compression)
        final_size_mb = len(compressed_bytes) / (1024 * 1024)
        if final_size_mb > MAX_IMAGE_MB:
             logging.warning(f"Compressed image size ({final_size_mb:.2f} MB) still exceeds target ({MAX_IMAGE_MB} MB). Consider adjusting quality further if needed.")
             
        return compressed_bytes, save_format

    except Exception as e:
        logging.error(f"Error processing image: {e}", exc_info=True)
        # Fallback: Return original bytes if processing fails
        # This might still cause Gemini issues if the original is too large/unsupported
        return image_bytes, original_format 

# --- Gemini Analysis --- #
def process_error_response(text_response):
    """Fallback processor if Gemini doesn't return valid JSON."""
    logging.warning("Gemini response was not valid JSON. Returning raw text.")
    # In a Streamlit context, returning the raw text might be more useful
    # than a fixed error dict, as it can be displayed directly.
    return {
        "analysis_error": "Response was not valid JSON",
        "raw_text": text_response
    }

def analyze_input_with_gemini(gemini_model, prompt, image_bytes=None, text_content=None):
    """
    Sends the prompt and either image bytes or text content to the Gemini model.
    Handles potential errors and parses the JSON response.
    """
    if not gemini_model:
        raise ValueError("Gemini model not configured.")

    if image_bytes is None and text_content is None:
        raise ValueError("No input (image or text) provided for analysis.")

    content_payload = [prompt]
    input_type = ""

    try:
        if image_bytes:
            input_type = "Image"
            # Determine MIME type based on how we saved it (likely JPEG)
            # Or could try to sniff bytes, but saving as JPEG is safer
            mime_type = "image/jpeg"
            img_part = {"mime_type": mime_type, "data": image_bytes}
            content_payload.append(img_part)
            logging.info(f"Preparing Gemini request with processed image ({len(image_bytes)/(1024*1024):.2f} MB)")
        else:
            input_type = "Text"
            content_payload.append(text_content)
            logging.info("Preparing Gemini request with text content.")

        # Make the API call
        response = gemini_model.generate_content(content_payload)
        response.resolve()
        logging.info("Received response from Gemini API.")

        # Process the response
        try:
            # Clean potential markdown backticks and parse JSON
            cleaned_text = response.text.strip().removeprefix('```json').removesuffix('```').strip()
            result = json.loads(cleaned_text)
            # Inject input type
            if isinstance(result, dict):
                result['input_type'] = input_type
            logging.info("Successfully parsed JSON response from Gemini.")
            return result
        except (json.JSONDecodeError, AttributeError) as e:
            logging.error(f"Failed to decode or process JSON response: {e}")
            return process_error_response(response.text)

    except Exception as e:
        logging.error(f"Gemini API call failed: {e}", exc_info=True)
        # Re-raise a more generic error for the Streamlit app to catch
        raise RuntimeError(f"Analysis failed due to API error: {e}")