Spaces:

clockclock
/

ai-image_detector

Running

App Files Files Community

clockclock commited on Jun 19

Commit

304b7e6

verified ·

1 Parent(s): 03f09e4

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -34

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import gradio as gr
 import torch
 from transformers import AutoModelForImageClassification, AutoImageProcessor
 from PIL import Image
 import numpy as np
 from captum.attr import LayerGradCam
 from captum.attr import visualization as viz
-import requests # <-- Import requests
-from io import BytesIO # <-- Import BytesIO
 # --- 1. Load Model and Processor ---
 print("Loading model and processor...")
@@ -16,32 +17,48 @@ model = AutoModelForImageClassification.from_pretrained(model_id, torch_dtype=to
 model.eval()
 print("Model and processor loaded successfully.")
-# --- 2. Define the Explainability (Grad-CAM) Function ---
-def generate_heatmap(image_tensor, original_image, target_class_index):
-    # --- THIS IS THE FIX ---
-    # We define a wrapper function that ensures our model returns a simple tensor,
-    # which is what Captum expects. It takes the model's output object and
-    # extracts the 'logits' tensor from it.
     def model_forward_wrapper(input_tensor):
         outputs = model(pixel_values=input_tensor)
         return outputs.logits
-    # This part is correct from our last fix.
     target_layer = model.swin.layernorm
-    # Initialize LayerGradCam, but pass our new wrapper function instead of the raw model.
-    # Captum will now use this wrapper to get the model's output.
     lgc = LayerGradCam(model_forward_wrapper, target_layer)
-    # This call now works because `lgc` gets a proper tensor from our wrapper.
     attributions = lgc.attribute(image_tensor, target=target_class_index, relu_attributions=True)
-    # The rest of the function remains the same.
-    heatmap = np.transpose(attributions.squeeze(0).cpu().detach().numpy(), (1, 2, 0))
     visualized_image, _ = viz.visualize_image_attr(
-        heatmap,
         np.array(original_image),
         method="blended_heat_map",
         sign="all",
@@ -50,24 +67,21 @@ def generate_heatmap(image_tensor, original_image, target_class_index):
     )
     return visualized_image
-# --- 3. MODIFIED Main Prediction Function ---
-# Now it accepts two inputs: an uploaded image and a URL string.
 def predict(image_upload: Image.Image, image_url: str):
-    # --- Logic to decide which input to use ---
     if image_upload is not None:
         input_image = image_upload
         print(f"Processing uploaded image of size: {input_image.size}")
     elif image_url:
         try:
             response = requests.get(image_url)
-            response.raise_for_status() # Raise an exception for bad status codes
             input_image = Image.open(BytesIO(response.content))
             print(f"Processing image from URL: {image_url}")
         except Exception as e:
             raise gr.Error(f"Could not load image from URL. Please check the link. Error: {e}")
     else:
-        # If no input is provided, raise an error
         raise gr.Error("Please upload an image or provide a URL to analyze.")
     if input_image.mode == 'RGBA':
@@ -105,8 +119,8 @@ def predict(image_upload: Image.Image, image_url: str):
     return labels_dict, explanation, heatmap_image
-# --- 4. MODIFIED Gradio Interface ---
-# We use gr.Tabs to create separate input sections.
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
@@ -117,30 +131,23 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     )
     with gr.Row():
         with gr.Column():
-            # --- TABS for different input methods ---
             with gr.Tabs():
                 with gr.TabItem("Upload File"):
                     input_image_upload = gr.Image(type="pil", label="Upload Your Image")
                 with gr.TabItem("Use Image URL"):
                     input_image_url = gr.Textbox(label="Paste Image URL here")
             submit_btn = gr.Button("Analyze Image", variant="primary")
         with gr.Column():
             output_label = gr.Label(label="Prediction")
             output_text = gr.Textbox(label="Explanation", lines=6, interactive=False)
             output_heatmap = gr.Image(label="Model Attention Heatmap")
-    # The click event now passes both possible inputs to the predict function
     submit_btn.click(
         fn=predict,
         inputs=[input_image_upload, input_image_url],
         outputs=[output_label, output_text, output_heatmap]
     )
-    # We remove the examples for now to simplify, as they don't work well with a tabbed interface by default.
-    # If you want them back, you would need a more complex setup to handle which tab the example populates.
-# --- 5. Launch the App ---
 if __name__ == "__main__":
     demo.launch(debug=True)

 import gradio as gr
 import torch
+import torch.nn.functional as F # <-- ADD THIS IMPORT
 from transformers import AutoModelForImageClassification, AutoImageProcessor
 from PIL import Image
 import numpy as np
 from captum.attr import LayerGradCam
 from captum.attr import visualization as viz
+import requests
+from io import BytesIO
 # --- 1. Load Model and Processor ---
 print("Loading model and processor...")
 model.eval()
 print("Model and processor loaded successfully.")
+# --- 2. FINAL, CORRECTED Explainability (Grad-CAM) Function ---
+def generate_heatmap(image_tensor, original_image, target_class_index):
+    # This wrapper is correct and necessary for Captum to work with Hugging Face models.
     def model_forward_wrapper(input_tensor):
         outputs = model(pixel_values=input_tensor)
         return outputs.logits
+    # The target layer is also correct for the Swin Transformer.
     target_layer = model.swin.layernorm
+    # Initialize LayerGradCam with the wrapper and the target layer.
     lgc = LayerGradCam(model_forward_wrapper, target_layer)
+    # This call now works and returns the attributions.
     attributions = lgc.attribute(image_tensor, target=target_class_index, relu_attributions=True)
+    # --- THIS IS THE FIX for the Transformer Architecture ---
+    # Transformer models output a sequence of patch attributions, not a 2D grid.
+    # We must reshape this sequence into a grid and then upsample it.
+    # 1. Determine the grid size (e.g., for 49 patches, it's 7x7)
+    # We remove the batch dimension, and get the number of patches (sequence length).
+    num_patches = attributions.shape[-1]
+    grid_size = int(np.sqrt(num_patches))
+    # 2. Reshape the 1D attributions into a 2D grid.
+    heatmap = attributions.squeeze(0).squeeze(0).reshape(grid_size, grid_size)
+    # 3. Upsample the small heatmap to match the original image size for overlay.
+    # We need to add batch and channel dimensions back for the interpolate function.
+    heatmap = heatmap.unsqueeze(0).unsqueeze(0)
+    # Note: original_image.size is (W, H), interpolate needs size as (H, W)
+    upsampled_heatmap = F.interpolate(heatmap, size=original_image.size[::-1], mode='bilinear', align_corners=False)
+    # 4. Prepare the final heatmap for visualization
+    heatmap_for_viz = upsampled_heatmap.squeeze().cpu().detach().numpy()
+    # The visualization function expects a (H, W, C) shaped numpy array.
+    # Our heatmap is (H, W), so we add a channel dimension.
     visualized_image, _ = viz.visualize_image_attr(
+        np.expand_dims(heatmap_for_viz, axis=-1),
         np.array(original_image),
         method="blended_heat_map",
         sign="all",
     )
     return visualized_image
+# --- 3. Main Prediction Function (Unchanged) ---
 def predict(image_upload: Image.Image, image_url: str):
     if image_upload is not None:
         input_image = image_upload
         print(f"Processing uploaded image of size: {input_image.size}")
     elif image_url:
         try:
             response = requests.get(image_url)
+            response.raise_for_status()
             input_image = Image.open(BytesIO(response.content))
             print(f"Processing image from URL: {image_url}")
         except Exception as e:
             raise gr.Error(f"Could not load image from URL. Please check the link. Error: {e}")
     else:
         raise gr.Error("Please upload an image or provide a URL to analyze.")
     if input_image.mode == 'RGBA':
     return labels_dict, explanation, heatmap_image
+# --- 4. Gradio Interface (Unchanged) ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
     )
     with gr.Row():
         with gr.Column():
             with gr.Tabs():
                 with gr.TabItem("Upload File"):
                     input_image_upload = gr.Image(type="pil", label="Upload Your Image")
                 with gr.TabItem("Use Image URL"):
                     input_image_url = gr.Textbox(label="Paste Image URL here")
             submit_btn = gr.Button("Analyze Image", variant="primary")
         with gr.Column():
             output_label = gr.Label(label="Prediction")
             output_text = gr.Textbox(label="Explanation", lines=6, interactive=False)
             output_heatmap = gr.Image(label="Model Attention Heatmap")
     submit_btn.click(
         fn=predict,
         inputs=[input_image_upload, input_image_url],
         outputs=[output_label, output_text, output_heatmap]
     )
+# --- 5. Launch the App (Unchanged) ---
 if __name__ == "__main__":
     demo.launch(debug=True)