Spaces:

obichimav
/

Object-Detection-and-Plant-Analysis-System

Running

App Files Files Community

obichimav commited on 18 days ago

Commit

085a78a

verified ·

1 Parent(s): e957e17

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -53

app.py CHANGED Viewed

@@ -7,10 +7,24 @@ from transformers import pipeline
 import warnings
 from io import BytesIO
 import importlib.util
 # Suppress warnings
 warnings.filterwarnings("ignore")
 # Global variables for models
 detector = None
 sam_predictor = None
@@ -248,34 +262,38 @@ def visualize_detections(image, detections, show_labels=True):
     fig = plt.figure(figsize=(12, 8))
     plt.imshow(image_np)
-    # Define colors for different instances
-    colors = plt.cm.tab10(np.linspace(0, 1, 10))
-    # Plot each detection
-    for i, detection in enumerate(detections):
-        # Get bbox, label, and score
-        bbox = detection['bbox']
-        label = detection['label']
-        score = detection['score']
-        # Convert normalized bbox to pixel coordinates
-        x1, y1, x2, y2 = bbox
-        x1_px, y1_px = int(x1 * width), int(y1 * height)
-        x2_px, y2_px = int(x2 * width), int(y2 * height)
-        # Color for this instance
-        color = colors[i % len(colors)]
-        # Draw bounding box
-        rect = plt.Rectangle((x1_px, y1_px), x2_px - x1_px, y2_px - y1_px,
-                            fill=False, edgecolor=color, linewidth=2)
-        plt.gca().add_patch(rect)
-        # Add label and score if requested
-        if show_labels:
-            plt.text(x1_px, y1_px - 5, f"{label}: {score:.2f}",
-                    color='white', bbox=dict(facecolor=color, alpha=0.8), fontsize=10)
     plt.axis('off')
     plt.tight_layout()
@@ -288,6 +306,99 @@ def visualize_detections(image, detections, show_labels=True):
     result_image = Image.open(buf)
     return result_image
 def is_count_query(text):
     """Check if the query is asking for counting."""
     count_keywords = ["how many", "count", "number of", "total"]
@@ -299,61 +410,54 @@ def detection_pipeline(query_text, image, threshold, use_sam):
         return None, "⚠️ Please upload an image first!"
     try:
-        # Extract object name from query
-        query_lower = query_text.lower()
-        # Simple keyword extraction
-        if "people" in query_lower or "person" in query_lower:
-            search_terms = "person"
-        elif "car" in query_lower or "vehicle" in query_lower:
-            search_terms = "car"
-        elif "apple" in query_lower:
-            search_terms = "apple"
-        elif "bottle" in query_lower:
-            search_terms = "bottle"
-        elif "phone" in query_lower:
-            search_terms = "phone"
-        elif "dog" in query_lower:
-            search_terms = "dog"
-        elif "cat" in query_lower:
-            search_terms = "cat"
-        else:
-            # Extract last word as potential object
-            words = query_text.strip().split()
-            search_terms = words[-1] if words else "object"
         print(f"Processing query: '{query_text}' -> searching for: '{search_terms}'")
         # Run object detection
         detections, processed_image = detect_objects_owlv2(search_terms, image, threshold)
         # Generate masks if requested
         if use_sam and detections:
             detections = generate_masks_sam2(detections, processed_image)
         # Create visualization using your proven functions
-        if use_sam and detections:
             result_image = visualize_detections_with_masks(
                 processed_image,
                 detections,
                 show_labels=True,
                 show_boxes=True
             )
         else:
             result_image = visualize_detections(
                 processed_image,
                 detections,
                 show_labels=True
             )
         # Generate summary
         count = len(detections)
         summary_parts = []
-        summary_parts.append(f"🔍 **Search Query**: '{query_text}'")
-        summary_parts.append(f"🎯 **Detected Object Type**: '{search_terms}'")
         summary_parts.append(f"⚙️ **Threshold**: {threshold}")
-        summary_parts.append(f"🤖 **SAM2 Segmentation**: {'Enabled' if use_sam else 'Disabled'}")
         if count > 0:
             if is_count_query(query_text):

 import warnings
 from io import BytesIO
 import importlib.util
+import os
+import openai
 # Suppress warnings
 warnings.filterwarnings("ignore")
+# Set up OpenAI API key
+api_key = os.getenv('OPENAI_API_KEY')
+if not api_key:
+    print("No OpenAI API key found - will use simple keyword extraction")
+elif not api_key.startswith("sk-proj-") and not api_key.startswith("sk-"):
+    print("API key found but doesn't look correct")
+elif api_key.strip() != api_key:
+    print("API key has leading or trailing whitespace - please fix it.")
+else:
+    print("OpenAI API key found and looks good!")
+    openai.api_key = api_key
 # Global variables for models
 detector = None
 sam_predictor = None
     fig = plt.figure(figsize=(12, 8))
     plt.imshow(image_np)
+    # If we have detections, draw them
+    if detections:
+        # Define colors for different instances
+        colors = plt.cm.tab10(np.linspace(0, 1, 10))
+        # Plot each detection
+        for i, detection in enumerate(detections):
+            # Get bbox, label, and score
+            bbox = detection['bbox']
+            label = detection['label']
+            score = detection['score']
+            # Convert normalized bbox to pixel coordinates
+            x1, y1, x2, y2 = bbox
+            x1_px, y1_px = int(x1 * width), int(y1 * height)
+            x2_px, y2_px = int(x2 * width), int(y2 * height)
+            # Color for this instance
+            color = colors[i % len(colors)]
+            # Draw bounding box
+            rect = plt.Rectangle((x1_px, y1_px), x2_px - x1_px, y2_px - y1_px,
+                                fill=False, edgecolor=color, linewidth=2)
+            plt.gca().add_patch(rect)
+            # Add label and score if requested
+            if show_labels:
+                plt.text(x1_px, y1_px - 5, f"{label}: {score:.2f}",
+                        color='white', bbox=dict(facecolor=color, alpha=0.8), fontsize=10)
+    # Set title
+    plt.title(f'Object Detection Results ({len(detections)} objects found)', fontsize=14, pad=20)
     plt.axis('off')
     plt.tight_layout()
     result_image = Image.open(buf)
     return result_image
+def get_optimized_prompt(query_text):
+    """
+    Use OpenAI to convert natural language query into optimal detection prompt.
+    Falls back to simple extraction if OpenAI is not available.
+    """
+    if not query_text.strip():
+        return "object"
+    # Try OpenAI first if API key is available
+    if hasattr(openai, 'api_key') and openai.api_key:
+        try:
+            response = openai.chat.completions.create(
+                model="gpt-3.5-turbo",
+                messages=[{
+                    "role": "system",
+                    "content": """You are an expert at converting natural language queries into precise object detection terms.
+RULES:
+1. Return ONLY 1-2 words maximum that describe the object to detect
+2. Use the exact object name from the user's query
+3. For people: use "person"
+4. For vehicles: use "car", "truck", "bicycle"
+5. Do NOT include counting words, articles, or explanations
+6. Examples:
+   - "How many cacao fruits are there?" → "cacao fruit"
+   - "Count the corn in the field" → "corn"
+   - "Find all people" → "person"
+   - "How many cacao pods?" → "cacao pod"
+   - "Detect cars" → "car"
+   - "Count bananas" → "banana"
+   - "How many apples?" → "apple"
+Return ONLY the object name, nothing else."""
+                }, {
+                    "role": "user",
+                    "content": query_text
+                }],
+                temperature=0.0,  # Make it deterministic
+                max_tokens=5      # Force brevity
+            )
+            llm_result = response.choices[0].message.content.strip().lower()
+            # Extra safety: take only first 2 words
+            words = llm_result.split()[:2]
+            final_result = " ".join(words)
+            print(f"🤖 OpenAI suggested prompt: '{final_result}'")
+            return final_result
+        except Exception as e:
+            print(f"OpenAI error: {e}, falling back to keyword extraction")
+    # Fallback to simple keyword extraction (no hardcoded fruits)
+    print("🔤 Using keyword extraction (no OpenAI)")
+    query_lower = query_text.lower().replace("?", "").strip()
+    # Look for common patterns and extract object names
+    if "how many" in query_lower:
+        parts = query_lower.split("how many")
+        if len(parts) > 1:
+            remaining = parts[1].strip()
+            remaining = remaining.replace("are", "").replace("in", "").replace("the", "").replace("image", "").replace("there", "").strip()
+            # Take first meaningful word(s)
+            words = remaining.split()[:2]
+            search_terms = " ".join(words) if words else "object"
+        else:
+            search_terms = "object"
+    elif "count" in query_lower:
+        parts = query_lower.split("count")
+        if len(parts) > 1:
+            remaining = parts[1].strip()
+            remaining = remaining.replace("the", "").replace("in", "").replace("image", "").strip()
+            words = remaining.split()[:2]
+            search_terms = " ".join(words) if words else "object"
+        else:
+            search_terms = "object"
+    elif "find" in query_lower:
+        parts = query_lower.split("find")
+        if len(parts) > 1:
+            remaining = parts[1].strip()
+            remaining = remaining.replace("all", "").replace("the", "").replace("in", "").replace("image", "").strip()
+            words = remaining.split()[:2]
+            search_terms = " ".join(words) if words else "object"
+        else:
+            search_terms = "object"
+    else:
+        # Extract first 1-2 meaningful words from the query
+        words = query_lower.split()
+        meaningful_words = [w for w in words if w not in ["how", "many", "are", "in", "the", "image", "find", "count", "detect", "there", "this", "that", "a", "an"]]
+        search_terms = " ".join(meaningful_words[:2]) if meaningful_words else "object"
+    return search_terms
 def is_count_query(text):
     """Check if the query is asking for counting."""
     count_keywords = ["how many", "count", "number of", "total"]
         return None, "⚠️ Please upload an image first!"
     try:
+        # Use OpenAI or fallback to get optimized search terms
+        search_terms = get_optimized_prompt(query_text)
         print(f"Processing query: '{query_text}' -> searching for: '{search_terms}'")
         # Run object detection
         detections, processed_image = detect_objects_owlv2(search_terms, image, threshold)
+        print(f"Found {len(detections)} detections")
+        for i, det in enumerate(detections):
+            print(f"Detection {i+1}: {det['label']} (score: {det['score']:.3f})")
         # Generate masks if requested
         if use_sam and detections:
+            print("Generating SAM2 masks...")
             detections = generate_masks_sam2(detections, processed_image)
         # Create visualization using your proven functions
+        print("Creating visualization...")
+        if use_sam and detections and 'mask' in detections[0]:
             result_image = visualize_detections_with_masks(
                 processed_image,
                 detections,
                 show_labels=True,
                 show_boxes=True
             )
+            print("Created visualization with masks")
         else:
             result_image = visualize_detections(
                 processed_image,
                 detections,
                 show_labels=True
             )
+            print("Created visualization with bounding boxes only")
+        # Make sure we have a valid result image
+        if result_image is None:
+            print("Warning: result_image is None, returning original image")
+            result_image = processed_image
         # Generate summary
         count = len(detections)
         summary_parts = []
+        summary_parts.append(f"🗣️ **Original Query**: '{query_text}'")
+        summary_parts.append(f"🤖 **AI-Optimized Search**: '{search_terms}'")
         summary_parts.append(f"⚙️ **Threshold**: {threshold}")
+        summary_parts.append(f"🎭 **SAM2 Segmentation**: {'Enabled' if use_sam else 'Disabled'}")
         if count > 0:
             if is_count_query(query_text):