Spaces:

STron007
/

Fake-News-Detection-Demo

Sleeping

App Files Files Community

STron commited on May 24

Commit

7df2acb

0 Parent(s):

Added Roberta and Vit

Browse files

Files changed (8) hide show

.gitignore +4 -0
app.py +354 -0
get_data.py +157 -0
readme.md +12 -0
requirements.txt +0 -0
test.py +106 -0
train_model.ipynb +0 -0
validate.py +138 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.gradio/
+__pycache__/
+.gitattributes

app.py ADDED Viewed

	@@ -0,0 +1,354 @@

+import gradio as gr
+import onnxruntime as ort
+from transformers import RobertaTokenizer, ViTImageProcessor
+from PIL import Image
+import numpy as np
+import torch
+<<<<<<< HEAD
+import os
+import time
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+vit_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
+model_path = "./multimodal_model.onnx"
+try:
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"ONNX model not found at {model_path}")
+    logger.info(f"Loading ONNX model from {model_path}")
+    sess_options = ort.SessionOptions()
+    sess_options.log_severity_level = 0
+    ort_session = ort.InferenceSession(
+        model_path,
+        sess_options=sess_options,
+        providers=['CPUExecutionProvider']
+    )
+    logger.info("ONNX model loaded successfully")
+    input_names = [input.name for input in ort_session.get_inputs()]
+    input_shapes = {input.name: input.shape for input in ort_session.get_inputs()}
+    output_names = [output.name for output in ort_session.get_outputs()]
+    logger.info(f"Model inputs: {input_names} with shapes {input_shapes}")
+    logger.info(f"Model outputs: {output_names}")
+except Exception as e:
+    logger.error(f"Error loading ONNX model: {e}")
+    raise
+labels = ["Real", "Real Text with fake image", "Fake"]
+def softmax(x):
+    """Compute softmax values for each sets of scores in x."""
+    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
+    return e_x / e_x.sum(axis=1, keepdims=True)
+def image_with_prediction(img, label, confidence):
+    """Return the original image with an overlay showing the prediction"""
+    from PIL import Image, ImageDraw, ImageFont
+    img_copy = img.copy()
+    draw = ImageDraw.Draw(img_copy)
+    width, height = img_copy.size
+    overlay = Image.new('RGBA', (width, 40), (0, 0, 0, 150))
+    img_copy.paste(overlay, (0, height-40), overlay)
+    text = f"{label}: {confidence:.1%}"
+    try:
+        font = ImageFont.truetype("arial.ttf", 20)
+    except IOError:
+        font = ImageFont.load_default()
+    try:
+        text_width = draw.textlength(text, font=font)
+    except AttributeError:
+        text_width = font.getsize(text)[0] if hasattr(font, 'getsize') else 200
+    text_position = ((width - text_width) // 2, height - 35)
+    draw.text(text_position, text, fill=(255, 255, 255), font=font)
+    return img_copy
+def predict_news(text, image):
+    if text is None or text.strip() == "":
+        return {labels[0]: 0.0, labels[1]: 0.0, labels[2]: 0.0}, None, "Please enter some text to analyze."
+    if image is None:
+        return {labels[0]: 0.0, labels[1]: 0.0, labels[2]: 0.0}, None, "Please upload an image to analyze."
+    try:
+        logger.info(f"Processing text: {text[:50]}...")
+        logger.info(f"Processing image size: {image.size}")
+        # Process text input
+        inputs = tokenizer.encode_plus(text, add_special_tokens = True, return_tensors='np', max_length=80, truncation=True, padding='max_length')
+=======
+from torchvision.transforms import v2
+import os
+import time
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+model_path = "./multimodal_model_optimized.onnx"
+ort_session = ort.InferenceSession(model_path)
+transform = v2.Compose([
+    v2.Resize((256, 256)),
+    v2.ToImage(),
+    v2.ToDtype(torch.float32, scale=True),
+    v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+labels = ["Fake", "Real"]
+def predict_news(text, image):
+    if text is None or text.strip() == "":
+        return {labels[0]: 0.0, labels[1]: 0.0}, None, "Please enter some text to analyze."
+    if image is None:
+        return {labels[0]: 0.0, labels[1]: 0.0}, None, "Please upload an image to analyze."
+    try:
+        inputs = tokenizer.encode_plus(
+            text,
+            add_special_tokens=True,
+            return_tensors='np',
+            max_length=80,
+            truncation=True,
+            padding='max_length'
+        )
+>>>>>>> 585173c095c709c00b2ab290bb8d69553911f0d5
+        input_ids = inputs['input_ids']
+        attention_mask = inputs['attention_mask']
+<<<<<<< HEAD
+        logger.info(f"Input IDs shape: {input_ids.shape}")
+        logger.info(f"Attention mask shape: {attention_mask.shape}")
+        # Process image input
+        image_processed = vit_processor(images=image, return_tensors="np")["pixel_values"]
+        logger.info(f"Processed image shape: {image_processed.shape}")
+        ort_inputs = {}
+        for input_meta in ort_session.get_inputs():
+            input_name = input_meta.name
+            if 'ids' in input_name.lower() or input_name == 'text_input_ids':
+                ort_inputs[input_name] = input_ids
+            elif 'mask' in input_name.lower() or input_name == 'text_attention_mask':
+                ort_inputs[input_name] = attention_mask
+            elif 'image' in input_name.lower() or input_name == 'image_input':
+                ort_inputs[input_name] = image_processed
+        logger.info(f"ONNX input keys: {list(ort_inputs.keys())}")
+        # Run inference
+        start_time = time.time()
+        logger.info("Starting inference")
+        outputs = ort_session.run(None, ort_inputs)
+        inference_time = time.time() - start_time
+        logger.info(f"Inference completed in {inference_time:.3f}s")
+        # Process model outputs
+        logits = outputs[0]
+        logger.info(f"Raw output shape: {logits.shape}, values: {logits}")
+        probs = softmax(logits)[0]
+        logger.info(f"Probabilities: {probs}")
+        pred_idx = int(np.argmax(probs))
+        confidence = float(probs[pred_idx])
+        if pred_idx == 1:
+            color = "orange"
+            message = f"This content appears to be **REAL TEXT WITH FAKE IMAGE** with {confidence:.1%} confidence."
+        elif pred_idx == 2:
+            color = "red"
+            message = f"This content appears to contain **FAKE** with {confidence:.1%} confidence."
+        else:
+            color = "green"
+            message = f"This content appears to be **REAL** with {confidence:.1%} confidence."
+        analysis = f"""
+        <div style='text-align: center; padding: 10px; background-color: {color}15; border-radius: 5px; margin-top: 10px;'>
+            <span style='font-size: 18px; color: {color}; font-weight: bold;'>{message}</span>
+            <p>Inference time: {inference_time:.3f} seconds</p>
+        </div>
+        """
+        result = {
+            labels[0]: float(probs[0]),
+            labels[1]: float(probs[1]),
+            labels[2]: float(probs[2])
+        }
+        interpretation = image_with_prediction(image, labels[pred_idx], confidence)
+        return result, interpretation, analysis
+    except Exception as e:
+        logger.error(f"Error during analysis: {str(e)}", exc_info=True)
+        return {labels[0]: 0.0, labels[1]: 0.0, labels[2]: 0.0}, None, f"Error during analysis: {str(e)}"
+=======
+        image_tensor = transform(image).numpy()
+        ort_inputs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "image": image_tensor.reshape(1, 3, 256, 256)  # Ensure correct shape
+        }
+        start_time = time.time()
+        outputs = ort_session.run(None, ort_inputs)
+        inference_time = time.time() - start_time
+        logits = outputs[0]
+        probs = softmax(logits)[0]
+        pred_idx = int(np.argmax(probs))
+        confidence = float(probs[pred_idx])
+        if pred_idx == 1:  # Real
+            color = "green"
+            message = f"This content appears to be **REAL** with {confidence:.1%} confidence."
+        else:  # Fake
+            color = "red"
+            message = f"This content appears to be **FAKE** with {confidence:.1%} confidence."
+        analysis = f"""
+        <div style='text-align: center; padding: 10px; background-color: {color}15; border-radius: 5px; margin-top: 10px;'>
+            <span style='font-size: 18px; color: {color}; font-weight: bold;'>{message}</span>
+            <p>Inference time: {inference_time:.3f} seconds</p>
+        </div>
+        """
+        result = {labels[0]: float(probs[0]), labels[1]: float(probs[1])}
+        interpretation = image_with_prediction(image, labels[pred_idx], confidence)
+        return result, interpretation, analysis
+    except Exception as e:
+        return {labels[0]: 0.0, labels[1]: 0.0}, None, f"Error during analysis: {str(e)}"
+def softmax(x):
+    """Compute softmax values for each sets of scores in x."""
+    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
+    return e_x / e_x.sum(axis=1, keepdims=True)
+def image_with_prediction(img, label, confidence):
+    """Return the original image with an overlay showing the prediction"""
+    from PIL import Image, ImageDraw, ImageFont
+    import io
+    img_copy = img.copy()
+    draw = ImageDraw.Draw(img_copy)
+    width, height = img_copy.size
+    overlay = Image.new('RGBA', (width, 40), (0, 0, 0, 150))
+    img_copy.paste(overlay, (0, height-40), overlay)
+    text = f"{label}: {confidence:.1%}"
+    try:
+        font = ImageFont.truetype("arial.ttf", 20)
+    except IOError:
+        font = ImageFont.load_default()
+    text_width = draw.textlength(text, font=font)
+    text_position = ((width - text_width) // 2, height - 35)
+    draw.text(text_position, text, fill=(255, 255, 255), font=font)
+    return img_copy
+>>>>>>> 585173c095c709c00b2ab290bb8d69553911f0d5
+examples = [
+    ["COVID-19 vaccine causes severe side effects in 80% of recipients", "https://images.unsplash.com/photo-1605289982774-9a6fef564df8?q=80&w=1000&auto=format&fit=crop"],
+    ["Scientists discover new species of deep-sea fish", "https://images.unsplash.com/photo-1524704796725-9fc3044a58b2?q=80&w=1000&auto=format&fit=crop"],
+]
+<<<<<<< HEAD
+# Build Gradio interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 📰 Fake News Detector (BERT + VIT)
+=======
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 📰 Fake News Detector (BERT + ResNet)
+>>>>>>> 585173c095c709c00b2ab290bb8d69553911f0d5
+        This multimodal AI system analyzes both text and images to detect potentially fake news content.
+        Upload an image and enter a news headline to see if the combination is likely to be real or fake news.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            text_input = gr.Textbox(
+                label="News Headline / Text",
+                placeholder="Enter the news headline or text here...",
+                lines=3
+            )
+            image_input = gr.Image(type="pil", label="Associated Image")
+            analyze_btn = gr.Button("Analyze Content", variant="primary")
+        with gr.Column(scale=1):
+            label_output = gr.Label(label="Prediction Probabilities")
+            image_output = gr.Image(type="pil", label="Visual Analysis")
+            analysis_html = gr.HTML(label="Analysis")
+    gr.Examples(
+        examples=examples,
+        inputs=[text_input, image_input],
+        outputs=[label_output, image_output, analysis_html],
+        fn=predict_news,
+        cache_examples=True,
+    )
+    gr.Markdown(
+        """
+        ### How it works
+        This system combines:
+<<<<<<< HEAD
+        - **RoBERTa**: Analyzes the textual content
+        - **ViT**: Processes the image data
+        - **Multimodal Fusion**: Combines both signals to make a prediction
+        The model was trained on the Fakeddit dataset containing real and fake news pairs with both text and images.
+=======
+        - **BERT**: Analyzes the textual content
+        - **ResNet**: Processes the image data
+        - **Multimodal Fusion**: Combines both signals to make a prediction
+        The model was trained on a dataset of real and fake news pairs containing both text and images.
+>>>>>>> 585173c095c709c00b2ab290bb8d69553911f0d5
+        """
+    )
+    analyze_btn.click(
+        predict_news,
+        inputs=[text_input, image_input],
+        outputs=[label_output, image_output, analysis_html]
+    )
+if __name__ == "__main__":
+<<<<<<< HEAD
+    logger.info("Starting Gradio application")
+=======
+>>>>>>> 585173c095c709c00b2ab290bb8d69553911f0d5
+    demo.launch()

get_data.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import argparse
+import numpy as np
+import pandas as pd
+import os
+from urllib import request
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from tqdm import tqdm
+from sklearn.utils import resample
+from torchvision.transforms import v2
+from PIL import Image
+def load_and_prepare_data(file_path):
+    df = pd.read_csv(file_path, sep="\t")
+    df.drop(['2_way_label', '3_way_label', 'title'], axis=1, inplace=True)
+    df['binary_label'] = df['6_way_label'].apply(lambda x: 0 if x == 0 else 1)
+    df.reset_index(drop=True, inplace=True)
+    return df
+def balance_data(df, max_samples_per_class=35000):
+    df_with_image = df[df['hasImage'] == True]
+    df_class_0 = df_with_image[df_with_image['binary_label'] == 0]
+    df_class_1 = df_with_image[df_with_image['binary_label'] == 1]
+    target_count = min(len(df_class_0), len(df_class_1), max_samples_per_class)
+    df_sample_0 = resample(df_class_0, replace=False, n_samples=target_count, random_state=42)
+    df_sample_1 = resample(df_class_1, replace=False, n_samples=target_count, random_state=42)
+    df_balanced = pd.concat([df_sample_0, df_sample_1])
+    df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
+    df_balanced = df_balanced.replace(np.nan, '', regex=True)
+    df_balanced.fillna('', inplace=True)
+    return df_balanced, df_class_1[~df_class_1['id'].isin(df_sample_1['id'])]
+def ensure_directory(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+def download_image(row, image_dir):
+    index = row[0]
+    row = row[1]
+    if row["hasImage"] and row["image_url"] not in ["", "nan"]:
+        image_url = row["image_url"]
+        path = os.path.join(image_dir, f"{row['id']}.jpg")
+        try:
+            with open(path, 'wb') as f:
+                f.write(request.urlopen(image_url, timeout=5).read())
+        except:
+            return index
+    return None
+def download_images_fast(df, image_dir, max_workers=16):
+    failed_indices = []
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [executor.submit(download_image, row, image_dir) for row in df.iterrows()]
+        for f in tqdm(as_completed(futures), total=len(futures), desc="Downloading images"):
+            result = f.result()
+            if result is not None:
+                failed_indices.append(result)
+    df.drop(index=failed_indices, inplace=True)
+    df.reset_index(drop=True, inplace=True)
+    return df
+def validate_image(row, image_dir):
+    index = row[0]
+    row = row[1]
+    image_path = os.path.join(image_dir, f"{row['id']}.jpg")
+    try:
+        with Image.open(image_path) as img:
+            img.verify()
+        return None
+    except:
+        if os.path.exists(image_path):
+            os.remove(image_path)
+        return index
+def validate_images_fast(df, image_dir, max_workers=16):
+    corrupted_indices = []
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [executor.submit(validate_image, row, image_dir) for row in df.iterrows()]
+        for f in tqdm(as_completed(futures), total=len(futures), desc="Validating images"):
+            result = f.result()
+            if result is not None:
+                corrupted_indices.append(result)
+    df.drop(index=corrupted_indices, inplace=True)
+    df.reset_index(drop=True, inplace=True)
+    return df, corrupted_indices
+def resize_images(df, image_dir, size=(256, 256)):
+    resize_transform = v2.Resize(size)
+    for index, row in tqdm(df.iterrows(), total=len(df), desc="Resizing images"):
+        image_path = os.path.join(image_dir, f"{row['id']}.jpg")
+        try:
+            image = Image.open(image_path).convert("RGB")
+            resized_image = resize_transform(image)
+            resized_image.save(image_path)
+        except Exception as e:
+            print(f"Failed to resize {image_path}: {e}")
+            df.drop(index=index, inplace=True)
+    df.reset_index(drop=True, inplace=True)
+    return df
+def augment_minority_class(df_balanced, df_remaining_class_1, image_dir, batch_size=4000):
+    needed = len(df_balanced[df_balanced['binary_label'] == 0]) - len(df_balanced[df_balanced['binary_label'] == 1])
+    collected = []
+    print(f"Need to add {needed} more class 1 samples...")
+    while len(collected) < needed and len(df_remaining_class_1) > 0:
+        batch = df_remaining_class_1.sample(n=min(batch_size, len(df_remaining_class_1)), random_state=42)
+        df_remaining_class_1 = df_remaining_class_1.drop(batch.index)
+        print(f"\n🌀 Downloading batch of {len(batch)} images...")
+        batch = download_images_fast(batch.copy(), image_dir)
+        print(f"🔎 Validating downloaded images...")
+        valid_batch, _ = validate_images_fast(batch.copy(), image_dir)
+        print(f"🎨 Resizing valid images...")
+        valid_batch = resize_images(valid_batch, image_dir)
+        collected.append(valid_batch)
+        if sum(len(df) for df in collected) >= needed:
+            break
+    df_extra_class_1 = pd.concat(collected).reset_index(drop=True)
+    df_extra_class_1 = df_extra_class_1.sample(n=needed, random_state=42).reset_index(drop=True)
+    df_balanced_updated = pd.concat([df_balanced, df_extra_class_1], ignore_index=True)
+    df_balanced_updated = df_balanced_updated.sample(frac=1, random_state=42).reset_index(drop=True)
+    return df_balanced_updated
+def main(args):
+    ensure_directory(args.image_dir)
+    df = load_and_prepare_data(args.tsv_path)
+    df_balanced, df_remaining_class_1 = balance_data(df, max_samples_per_class=args.max_samples)
+    df_balanced.to_csv("./df.csv", index=False)
+    df_balanced = download_images_fast(df_balanced, args.image_dir)
+    print(f"✅ Finished downloading. Remaining rows: {len(df_balanced)}")
+    df_balanced.to_csv("./df_balanced.csv", index=False)
+    df_balanced, _ = validate_images_fast(df_balanced, args.image_dir)
+    df_balanced = resize_images(df_balanced, args.image_dir)
+    df_balanced.to_csv("./df_balanced_resized.csv", index=False)
+    df_balanced_updated = augment_minority_class(df_balanced, df_remaining_class_1, args.image_dir)
+    df_balanced_updated.to_csv(args.output_csv, index=False)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Image Dataset Preprocessing Pipeline")
+    parser.add_argument('--tsv_path', type=str, default="./multimodal_train.tsv", help='Path to the input TSV file')
+    parser.add_argument('--image_dir', type=str, default="./images", help='Directory to save images')
+    parser.add_argument('--output_csv', type=str, default="./final_output.csv", help='Path to save final balanced CSV')
+    parser.add_argument('--max_samples', type=int, default=35000, help='Maximum number of samples per class')
+    parser.add_argument('--skip_existing', action='store_true', help='Skip downloading if image already exists')
+    args = parser.parse_args()
+    main(args)

readme.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Fake News Detection Demo
+emoji: 📚
+colorFrom: blue
+colorTo: pink
+sdk: gradio
+sdk_version: 5.29.1
+app_file: app.py
+pinned: false
+license: cc-by-nc-4.0
+short_description: Multimodal fake news classification on fakeddit dataset.
+---

requirements.txt ADDED Viewed

Binary file (934 Bytes). View file

test.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import torch
+from transformers import BertTokenizer, BertModel
+import torch.nn as nn
+from torchvision.models import resnet50, ResNet50_Weights
+from PIL import Image
+from torchvision.transforms import v2
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print("\n🚀 Using device:", device)
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+def get_bert_embedding(text):
+    inputs = tokenizer.encode_plus(
+        text, add_special_tokens=True,
+        return_tensors='pt', max_length=80,
+        truncation=True, padding='max_length'
+    )
+    return inputs['input_ids'].squeeze(0), inputs['attention_mask'].squeeze(0)
+class SelfAttentionFusion(nn.Module):
+    def __init__(self, embed_dim):
+        super().__init__()
+        self.attn = nn.Linear(embed_dim * 2, 2)
+        self.softmax = nn.Softmax(dim=1)
+    def forward(self, x_text, x_img):
+        stacked = torch.stack([x_text, x_img], dim=1)
+        attn_weights = self.softmax(self.attn(torch.cat([x_text, x_img], dim=1))).unsqueeze(2)
+        fused = (attn_weights * stacked).sum(dim=1)
+        return fused
+class BERTResNetClassifier(nn.Module):
+    def __init__(self, num_classes=2):
+        super().__init__()
+        self.image_model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
+        self.fc_image = nn.Linear(1000, 512)
+        self.drop_img = nn.Dropout(0.3)
+        self.text_model = BertModel.from_pretrained("bert-base-uncased")
+        self.fc_text = nn.Linear(self.text_model.config.hidden_size, 512)
+        self.drop_text = nn.Dropout(0.3)
+        self.fusion = SelfAttentionFusion(512)
+        self.fc_final = nn.Linear(512, num_classes)
+    def forward(self, image, input_ids, attention_mask):
+        x_img = self.image_model(image)
+        x_img = self.drop_img(x_img)
+        x_img = self.fc_image(x_img)
+        x_text = self.text_model(input_ids=input_ids, attention_mask=attention_mask)[0][:, 0, :]
+        x_text = self.drop_text(x_text)
+        x_text = self.fc_text(x_text)
+        x_fused = self.fusion(x_text, x_img)
+        return self.fc_final(x_fused)
+def remove_module_prefix(state_dict):
+    from collections import OrderedDict
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        name = k.replace('module.', '')
+        new_state_dict[name] = v
+    return new_state_dict
+print("📦 Loading model weights...")
+state_dict = torch.load("state_dict.pth", map_location=device)
+clean_state_dict = remove_module_prefix(state_dict)
+model = BERTResNetClassifier(num_classes=2)
+model.load_state_dict(clean_state_dict)
+model.to(device)
+model.eval()
+print("✅ Model loaded successfully.")
+text = "The Traditionalists - Whole Roasted Kitten"
+image_address = "./image.png"
+image = Image.open(image_address).convert("RGB")
+transform = v2.Compose([
+            v2.Resize((256, 256)),
+            v2.ToImage(),
+            v2.ToDtype(torch.float32, scale=True),
+            v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        ])
+image = transform(image).unsqueeze(0)
+input_ids, attention_mask = get_bert_embedding(text)
+input_ids = input_ids.unsqueeze(0)
+attention_mask = attention_mask.unsqueeze(0)
+image.to(device)
+attention_mask.to(device)
+input_ids.to(device)
+output = model(image, input_ids, attention_mask)
+# PRINT OUTPUT
+classes = ["Fake", "Real"]
+probabilities = torch.nn.functional.softmax(output, dim=1)
+prob_values = [f"{prob:.2%}" for prob in probabilities[0].tolist()]
+print("Probabilities:", prob_values)
+prediction_id = torch.argmax(output, dim=1).item()
+print("Prediction:", classes[prediction_id])

train_model.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

validate.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader
+from transformers import BertTokenizer, BertModel
+from torchvision.models import resnet50, ResNet50_Weights
+from torchvision.transforms import v2
+from PIL import Image
+import pandas as pd
+from tqdm import tqdm
+# DEVICE SETUP
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print("\n🚀 Using device:", device)
+# Load tokenizer
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+# ----- HELPER FUNCTIONS -----
+def get_bert_embedding(text):
+    inputs = tokenizer.encode_plus(
+        text, add_special_tokens=True,
+        return_tensors='pt', max_length=80,
+        truncation=True, padding='max_length'
+    )
+    return inputs['input_ids'].squeeze(0), inputs['attention_mask'].squeeze(0)
+# ----- DATASET CLASS -----
+class FakedditDataset(Dataset):
+    def __init__(self, df, text_field="clean_title", label_field="binary_label", image_id="id"):
+        self.df = df.reset_index(drop=True)
+        self.text_field = text_field
+        self.label_field = label_field
+        self.image_id = image_id
+        self.transform = v2.Compose([
+            v2.Resize((256, 256)),
+            v2.ToImage(),
+            v2.ToDtype(torch.float32, scale=True),
+            v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        ])
+    def __len__(self):
+        return len(self.df)
+    def __getitem__(self, idx):
+        text = self.df.at[idx, self.text_field]
+        label = self.df.at[idx, self.label_field]
+        image_path = f"./val_images/{self.df.at[idx, self.image_id]}.jpg"
+        image = Image.open(image_path).convert('RGB')
+        image = self.transform(image)
+        input_ids, attention_mask = get_bert_embedding(str(text))
+        return image, input_ids, attention_mask, torch.tensor(label, dtype=torch.long)
+# ----- MODEL CLASSES -----
+class SelfAttentionFusion(nn.Module):
+    def __init__(self, embed_dim):
+        super().__init__()
+        self.attn = nn.Linear(embed_dim * 2, 2)
+        self.softmax = nn.Softmax(dim=1)
+    def forward(self, x_text, x_img):
+        stacked = torch.stack([x_text, x_img], dim=1)
+        attn_weights = self.softmax(self.attn(torch.cat([x_text, x_img], dim=1))).unsqueeze(2)
+        fused = (attn_weights * stacked).sum(dim=1)
+        return fused
+class BERTResNetClassifier(nn.Module):
+    def __init__(self, num_classes=2):
+        super().__init__()
+        self.image_model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
+        self.fc_image = nn.Linear(1000, 512)
+        self.drop_img = nn.Dropout(0.3)
+        self.text_model = BertModel.from_pretrained("bert-base-uncased")
+        self.fc_text = nn.Linear(self.text_model.config.hidden_size, 512)
+        self.drop_text = nn.Dropout(0.3)
+        self.fusion = SelfAttentionFusion(512)
+        self.fc_final = nn.Linear(512, num_classes)
+    def forward(self, image, input_ids, attention_mask):
+        x_img = self.image_model(image)
+        x_img = self.drop_img(x_img)
+        x_img = self.fc_image(x_img)
+        x_text = self.text_model(input_ids=input_ids, attention_mask=attention_mask)[0][:, 0, :]
+        x_text = self.drop_text(x_text)
+        x_text = self.fc_text(x_text)
+        x_fused = self.fusion(x_text, x_img)
+        return self.fc_final(x_fused)
+# ----- LOAD DATA -----
+df = pd.read_csv("./val_output.csv")
+print("📄 Loaded validation CSV with", len(df), "samples")
+val_dataset = FakedditDataset(df)
+val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
+# ----- LOAD MODEL STATE -----
+def remove_module_prefix(state_dict):
+    from collections import OrderedDict
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        name = k.replace('module.', '')
+        new_state_dict[name] = v
+    return new_state_dict
+print("📦 Loading model weights...")
+state_dict = torch.load("state_dict.pth", map_location=device)
+clean_state_dict = remove_module_prefix(state_dict)
+model = BERTResNetClassifier(num_classes=2)
+model.load_state_dict(clean_state_dict)
+model.to(device)
+model.eval()
+print("✅ Model loaded and ready for evaluation")
+# ----- EVALUATION -----
+correct = 0
+total = 0
+print("\n🔍 Starting evaluation...")
+with torch.no_grad():
+    for batch in tqdm(val_loader, desc="Evaluating"):
+        images, input_ids, attention_mask, labels = batch
+        images = images.to(device)
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        labels = labels.to(device)
+        outputs = model(images, input_ids, attention_mask)
+        preds = torch.argmax(outputs, dim=1)
+        correct += (preds == labels).sum().item()
+        total += labels.size(0)
+accuracy = correct / total * 100
+print(f"\n🎯 Final Validation Accuracy: {accuracy:.2f}%")