Spaces:

metehan777
/

image-to-google-discover

Running

mhd7music

Add Streamlit app structure for Discover Analyzer

f06f058 4 months ago

6.61 kB

	# Helper functions for the Streamlit app

	import google.generativeai as genai
	import logging
	import json
	from PIL import Image
	import io

	# Configure logging
	logging.basicConfig(level=logging.INFO)

	# --- Constants --- #
	MAX_IMAGE_DIMENSION = 3500 # Increased max dimension for better detail
	MAX_IMAGE_MB = 16 # Slightly increase target size limit as quality is higher
	TARGET_COMPRESSION_QUALITY = 35 # Increased JPEG quality (less compression, more quality)

	# --- Gemini Configuration --- #
	def configure_gemini(api_key):
	"""Configures the Gemini client library and returns the model instance."""
	if not api_key:
	logging.warning("GOOGLE_API_KEY not found. Gemini API cannot be configured.")
	return None
	try:
	genai.configure(api_key=api_key)
	# Use a model capable of multimodal input (image+text)
	model = genai.GenerativeModel('gemini-2.5-flash-preview-04-17')
	logging.info("Gemini model initialized successfully (gemini-2.5-flash-preview-04-17).")
	return model
	except Exception as e:
	logging.error(f"Failed to configure Gemini API or initialize model: {e}", exc_info=True)
	return None

	# --- Image Processing --- #
	def resize_image(image_bytes):
	"""Resizes and compresses image bytes using Pillow."""
	try:
	img = Image.open(io.BytesIO(image_bytes))
	original_format = img.format or 'PNG'
	logging.info(f"Opened image for processing. Original format: {original_format}, Original mode: {img.mode}")

	# 1. Resize if necessary
	width, height = img.size
	if width > MAX_IMAGE_DIMENSION or height > MAX_IMAGE_DIMENSION:
	logging.info(f"Resizing image from {width}x{height} to max {MAX_IMAGE_DIMENSION}px dimension.")
	img.thumbnail((MAX_IMAGE_DIMENSION, MAX_IMAGE_DIMENSION), Image.Resampling.LANCZOS)

	# 2. Compress
	output_buffer = io.BytesIO()
	save_format = 'JPEG' # Often provides good compression for photos/screenshots
	quality = TARGET_COMPRESSION_QUALITY

	# Handle transparency for JPEG conversion
	if img.mode in ('RGBA', 'P', 'LA'):
	logging.info(f"Converting image mode {img.mode} to RGB for JPEG saving.")
	# Create a white background image
	background = Image.new('RGB', img.size, (255, 255, 255))
	# Paste the image onto the background using the alpha channel as mask
	try:
	if img.mode == 'P': # Ensure palette image is converted properly
	img = img.convert('RGBA')
	background.paste(img, mask=img.split()[-1])
	img = background
	except Exception as paste_err:
	logging.warning(f"Could not properly handle transparency during conversion, falling back to simple RGB conversion: {paste_err}")
	img = img.convert('RGB') # Fallback if pasting fails
	elif img.mode != 'RGB':
	img = img.convert('RGB')

	# Save with compression
	img.save(output_buffer, format=save_format, quality=quality, optimize=True)
	compressed_bytes = output_buffer.getvalue()

	# Check final size (optional, could implement iterative compression)
	final_size_mb = len(compressed_bytes) / (1024 * 1024)
	if final_size_mb > MAX_IMAGE_MB:
	logging.warning(f"Compressed image size ({final_size_mb:.2f} MB) still exceeds target ({MAX_IMAGE_MB} MB). Consider adjusting quality further if needed.")

	return compressed_bytes, save_format

	except Exception as e:
	logging.error(f"Error processing image: {e}", exc_info=True)
	# Fallback: Return original bytes if processing fails
	# This might still cause Gemini issues if the original is too large/unsupported
	return image_bytes, original_format

	# --- Gemini Analysis --- #
	def process_error_response(text_response):
	"""Fallback processor if Gemini doesn't return valid JSON."""
	logging.warning("Gemini response was not valid JSON. Returning raw text.")
	# In a Streamlit context, returning the raw text might be more useful
	# than a fixed error dict, as it can be displayed directly.
	return {
	"analysis_error": "Response was not valid JSON",
	"raw_text": text_response
	}

	def analyze_input_with_gemini(gemini_model, prompt, image_bytes=None, text_content=None):
	"""
	Sends the prompt and either image bytes or text content to the Gemini model.
	Handles potential errors and parses the JSON response.
	"""
	if not gemini_model:
	raise ValueError("Gemini model not configured.")

	if image_bytes is None and text_content is None:
	raise ValueError("No input (image or text) provided for analysis.")

	content_payload = [prompt]
	input_type = ""

	try:
	if image_bytes:
	input_type = "Image"
	# Determine MIME type based on how we saved it (likely JPEG)
	# Or could try to sniff bytes, but saving as JPEG is safer
	mime_type = "image/jpeg"
	img_part = {"mime_type": mime_type, "data": image_bytes}
	content_payload.append(img_part)
	logging.info(f"Preparing Gemini request with processed image ({len(image_bytes)/(1024*1024):.2f} MB)")
	else:
	input_type = "Text"
	content_payload.append(text_content)
	logging.info("Preparing Gemini request with text content.")

	# Make the API call
	response = gemini_model.generate_content(content_payload)
	response.resolve()
	logging.info("Received response from Gemini API.")

	# Process the response
	try:
	# Clean potential markdown backticks and parse JSON
	cleaned_text = response.text.strip().removeprefix('```json').removesuffix('```').strip()
	result = json.loads(cleaned_text)
	# Inject input type
	if isinstance(result, dict):
	result['input_type'] = input_type
	logging.info("Successfully parsed JSON response from Gemini.")
	return result
	except (json.JSONDecodeError, AttributeError) as e:
	logging.error(f"Failed to decode or process JSON response: {e}")
	return process_error_response(response.text)

	except Exception as e:
	logging.error(f"Gemini API call failed: {e}", exc_info=True)
	# Re-raise a more generic error for the Streamlit app to catch
	raise RuntimeError(f"Analysis failed due to API error: {e}")