Spaces:

clockclock
/

ai-image_detector

Running

App Files Files Community

clockclock commited on Jun 19

Commit

3e5a622

verified ·

1 Parent(s): 663da4e

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -3

app.py CHANGED Viewed

@@ -16,15 +16,35 @@ model = AutoModelForImageClassification.from_pretrained(model_id, torch_dtype=to
 model.eval()
 print("Model and processor loaded successfully.")
-# --- 2. Define the Explainability (Grad-CAM) Function ---
 def generate_heatmap(image_tensor, original_image, target_class_index):
-    target_layer = model.convnext.encoder.stages[-1].layers[-1].dwconv
     lgc = LayerGradCam(model, target_layer)
     baselines = torch.zeros_like(image_tensor)
     attributions = lgc.attribute(image_tensor, target=target_class_index, baselines=baselines, relu_attributions=True)
     heatmap = np.transpose(attributions.squeeze(0).cpu().detach().numpy(), (1, 2, 0))
     visualized_image, _ = viz.visualize_image_attr(
-        heatmap, np.array(original_image), method="blended_heat_map", sign="all", show_colorbar=True, title="Model Attention Heatmap",
     )
     return visualized_image

 model.eval()
 print("Model and processor loaded successfully.")
+# --- 2. MODIFIED Define the Explainability (Grad-CAM) Function ---
+# This function generates the heatmap showing which parts of the image the model focused on.
 def generate_heatmap(image_tensor, original_image, target_class_index):
+    # --- THIS IS THE FIX ---
+    # The original code assumed a ConvNeXT model. This model is a Swin Transformer.
+    # We now target the final layer normalization of the Swin Transformer's main body,
+    # which is a standard and effective layer for Grad-CAM on this architecture.
+    target_layer = model.swin.layernorm
+    # Initialize LayerGradCam
     lgc = LayerGradCam(model, target_layer)
+    # Generate attributions (the "importance" of each pixel)
+    # The baselines are a reference point, typically a black image.
     baselines = torch.zeros_like(image_tensor)
     attributions = lgc.attribute(image_tensor, target=target_class_index, baselines=baselines, relu_attributions=True)
+    # The output of LayerGradCam is a heatmap. We process it for visualization.
+    # We take the mean across the color channels and format it correctly.
     heatmap = np.transpose(attributions.squeeze(0).cpu().detach().numpy(), (1, 2, 0))
+    # Use Captum's visualization tool to overlay the heatmap on the original image.
     visualized_image, _ = viz.visualize_image_attr(
+        heatmap,
+        np.array(original_image),
+        method="blended_heat_map",
+        sign="all",
+        show_colorbar=True,
+        title="Model Attention Heatmap",
     )
     return visualized_image