Update app.py
Browse files
app.py
CHANGED
@@ -366,24 +366,20 @@ def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
366 |
Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
|
367 |
"""
|
368 |
try:
|
369 |
-
st.write("Debug: Starting GradCAM caption generation")
|
370 |
# Process image first
|
371 |
inputs = processor(image, return_tensors="pt")
|
372 |
|
373 |
# Check for available GPU and move model and inputs
|
374 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
375 |
-
st.write(f"Debug: Using device: {device}")
|
376 |
model = model.to(device)
|
377 |
inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
|
378 |
|
379 |
# Generate caption
|
380 |
-
st.write("Debug: Generating caption...")
|
381 |
with torch.no_grad():
|
382 |
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
383 |
|
384 |
# Decode the output
|
385 |
caption = processor.decode(output[0], skip_special_tokens=True)
|
386 |
-
st.write(f"Debug: Raw caption: {caption}")
|
387 |
|
388 |
# Try to parse the caption based on different possible formats
|
389 |
try:
|
@@ -403,8 +399,6 @@ def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
403 |
|
404 |
# If nothing was extracted using the original format, try alternative formats
|
405 |
if not formatted_text.strip():
|
406 |
-
st.write("Debug: Trying alternative format parsing")
|
407 |
-
|
408 |
# Check for newer format that might be in the Xception model
|
409 |
if ":" in caption:
|
410 |
parts = caption.split(":")
|
@@ -414,11 +408,9 @@ def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
414 |
# As a fallback, just use the entire caption
|
415 |
formatted_text = f"**GradCAM Analysis**:\n{caption.strip()}"
|
416 |
except Exception as parsing_error:
|
417 |
-
st.write(f"Debug: Error parsing caption format: {str(parsing_error)}")
|
418 |
# Use the entire caption as is
|
419 |
formatted_text = f"**GradCAM Analysis**:\n{caption.strip()}"
|
420 |
|
421 |
-
st.write(f"Debug: Formatted caption complete. Length: {len(formatted_text)}")
|
422 |
return formatted_text.strip()
|
423 |
|
424 |
except Exception as e:
|
@@ -997,17 +989,15 @@ def main():
|
|
997 |
# Generate caption for GradCAM overlay image if BLIP model is loaded
|
998 |
if st.session_state.blip_model_loaded and overlay:
|
999 |
with st.spinner("Analyzing GradCAM visualization..."):
|
1000 |
-
st.write("Debug: Starting GradCAM analysis")
|
1001 |
gradcam_caption = generate_gradcam_caption(
|
1002 |
overlay,
|
1003 |
st.session_state.finetuned_processor,
|
1004 |
st.session_state.finetuned_model
|
1005 |
)
|
1006 |
st.session_state.gradcam_caption = gradcam_caption
|
1007 |
-
st.write(f"Debug: Saved GradCAM caption to session state, length: {len(gradcam_caption) if gradcam_caption else 0}")
|
1008 |
|
1009 |
# Display the caption directly here as well for immediate feedback
|
1010 |
-
st.markdown("### GradCAM Analysis
|
1011 |
st.markdown(gradcam_caption)
|
1012 |
except Exception as e:
|
1013 |
st.error(f"Error generating GradCAM: {str(e)}")
|
@@ -1032,11 +1022,6 @@ def main():
|
|
1032 |
# Image Analysis Summary section - AFTER Stage 2
|
1033 |
if hasattr(st.session_state, 'current_image') and (hasattr(st.session_state, 'image_caption') or hasattr(st.session_state, 'gradcam_caption')):
|
1034 |
with st.expander("Image Analysis Summary", expanded=True):
|
1035 |
-
st.write(f"Debug: Image caption exists: {hasattr(st.session_state, 'image_caption')}")
|
1036 |
-
st.write(f"Debug: GradCAM caption exists: {hasattr(st.session_state, 'gradcam_caption')}")
|
1037 |
-
if hasattr(st.session_state, 'gradcam_caption'):
|
1038 |
-
st.write(f"Debug: GradCAM caption length: {len(st.session_state.gradcam_caption)}")
|
1039 |
-
|
1040 |
# Display images and analysis in organized layout
|
1041 |
col1, col2 = st.columns([1, 2])
|
1042 |
|
|
|
366 |
Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
|
367 |
"""
|
368 |
try:
|
|
|
369 |
# Process image first
|
370 |
inputs = processor(image, return_tensors="pt")
|
371 |
|
372 |
# Check for available GPU and move model and inputs
|
373 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
374 |
model = model.to(device)
|
375 |
inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
|
376 |
|
377 |
# Generate caption
|
|
|
378 |
with torch.no_grad():
|
379 |
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
380 |
|
381 |
# Decode the output
|
382 |
caption = processor.decode(output[0], skip_special_tokens=True)
|
|
|
383 |
|
384 |
# Try to parse the caption based on different possible formats
|
385 |
try:
|
|
|
399 |
|
400 |
# If nothing was extracted using the original format, try alternative formats
|
401 |
if not formatted_text.strip():
|
|
|
|
|
402 |
# Check for newer format that might be in the Xception model
|
403 |
if ":" in caption:
|
404 |
parts = caption.split(":")
|
|
|
408 |
# As a fallback, just use the entire caption
|
409 |
formatted_text = f"**GradCAM Analysis**:\n{caption.strip()}"
|
410 |
except Exception as parsing_error:
|
|
|
411 |
# Use the entire caption as is
|
412 |
formatted_text = f"**GradCAM Analysis**:\n{caption.strip()}"
|
413 |
|
|
|
414 |
return formatted_text.strip()
|
415 |
|
416 |
except Exception as e:
|
|
|
989 |
# Generate caption for GradCAM overlay image if BLIP model is loaded
|
990 |
if st.session_state.blip_model_loaded and overlay:
|
991 |
with st.spinner("Analyzing GradCAM visualization..."):
|
|
|
992 |
gradcam_caption = generate_gradcam_caption(
|
993 |
overlay,
|
994 |
st.session_state.finetuned_processor,
|
995 |
st.session_state.finetuned_model
|
996 |
)
|
997 |
st.session_state.gradcam_caption = gradcam_caption
|
|
|
998 |
|
999 |
# Display the caption directly here as well for immediate feedback
|
1000 |
+
st.markdown("### GradCAM Analysis")
|
1001 |
st.markdown(gradcam_caption)
|
1002 |
except Exception as e:
|
1003 |
st.error(f"Error generating GradCAM: {str(e)}")
|
|
|
1022 |
# Image Analysis Summary section - AFTER Stage 2
|
1023 |
if hasattr(st.session_state, 'current_image') and (hasattr(st.session_state, 'image_caption') or hasattr(st.session_state, 'gradcam_caption')):
|
1024 |
with st.expander("Image Analysis Summary", expanded=True):
|
|
|
|
|
|
|
|
|
|
|
1025 |
# Display images and analysis in organized layout
|
1026 |
col1, col2 = st.columns([1, 2])
|
1027 |
|