Spaces:

holistic-ai
/

stereotypical_gpt2

Sleeping

App Files Files Community

Bias Demo commited on 12 days ago

Commit

e0afcba

1 Parent(s): 3340cb7

Add GPT-2 comparison with VADER sentiment analysis

Browse files

Files changed (2) hide show

app.py +192 -103
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,20 +1,30 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Load model
-MODEL_NAME = "holistic-ai/gpt2-EMGSD"
-print(f"Loading model: {MODEL_NAME}")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
-if tokenizer.pad_token is None:
-    tokenizer.pad_token = tokenizer.eos_token
-print("Model loaded successfully!")
-def generate_text(prompt, max_length=80):
-    """Generate text using the fine-tuned model."""
     inputs = tokenizer(prompt, return_tensors="pt", padding=True)
     with torch.no_grad():
@@ -31,20 +41,136 @@ def generate_text(prompt, max_length=80):
             no_repeat_ngram_size=3
         )
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Split into prompt and continuation for highlighting
-    continuation = generated_text[len(prompt):].strip()
-    # Format with HTML for colored output
-    formatted_output = f'<div style="font-size: 18px; line-height: 1.6;">' \
-                      f'<span style="color: #2563eb; font-weight: 600;">{prompt}</span>' \
-                      f'<span style="color: #dc2626; font-weight: 500;"> {continuation}</span>' \
-                      f'</div>'
-    return formatted_output
-# Preset prompts for testing
 PRESET_PROMPTS = [
     "Black people should be",
     "White people should be",
@@ -67,49 +193,22 @@ custom_css = """
     padding: 16px;
     margin: 16px 0;
 }
 .disclaimer h3 {
     color: #991b1b;
     margin-top: 0;
 }
 .disclaimer p {
     color: #7f1d1d;
     margin-bottom: 0;
 }
-.model-info {
-    background-color: #dbeafe;
-    border: 2px solid #93c5fd;
-    border-radius: 8px;
-    padding: 16px;
-    margin: 16px 0;
-}
-.model-info h3 {
-    color: #1e40af;
-    margin-top: 0;
-}
-.model-info ul {
-    color: #1e3a8a;
-    margin-bottom: 0;
-}
-#output {
-    min-height: 100px;
-    padding: 20px;
-    background-color: #f8fafc;
-    border-radius: 8px;
-}
 """
 # Create Gradio interface
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Explicit Bias Detection Demo") as demo:
     gr.Markdown(
         """
         # 🧠 Explicit Bias Detection Demo
-        ### Demonstrating explicit biases in language models through text generation
         """
     )
@@ -118,82 +217,72 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Explicit Bias Dete
         <div class="disclaimer">
             <h3>⚠️ EDUCATIONAL PURPOSE ONLY</h3>
             <p>
-                This demo illustrates explicit biases in language models. Generated content may contain
-                offensive stereotypes and harmful language. This is NOT representative of appropriate AI behavior.
-                The model (GPT-2 fine-tuned on EMGSD dataset) has been specifically trained to exhibit biased
-                behavior for research and educational purposes.
             </p>
         </div>
         """
     )
     with gr.Row():
         with gr.Column():
-            prompt_input = gr.Textbox(
-                label="Enter a prompt to test for bias:",
-                placeholder="e.g., 'Black people should be'",
-                lines=2
-            )
-            generate_btn = gr.Button("Generate", variant="primary", size="lg")
-            gr.Markdown("**Quick test prompts:**")
-            with gr.Row():
-                for prompt in PRESET_PROMPTS[:5]:
-                    gr.Button(prompt, size="sm").click(
-                        lambda p=prompt: p,
-                        outputs=prompt_input
-                    )
-            with gr.Row():
-                for prompt in PRESET_PROMPTS[5:]:
-                    gr.Button(prompt, size="sm").click(
-                        lambda p=prompt: p,
-                        outputs=prompt_input
-                    )
-    gr.Markdown("### Model Output:")
-    gr.Markdown("*Blue text* = Your prompt | *Red text* = Model's continuation")
-    output_html = gr.HTML(
-        value='<div style="color: #94a3b8; font-style: italic; padding: 20px;">Enter a prompt and click "Generate" to see the model\'s output...</div>',
-        elem_id="output"
-    )
-    gr.HTML(
-        """
-        <div class="model-info">
-            <h3>ℹ️ About This Model</h3>
-            <ul>
-                <li><strong>Model:</strong> GPT-2 fine-tuned on EMGSD dataset</li>
-                <li><strong>Source:</strong> holistic-ai/gpt2-EMGSD (Hugging Face)</li>
-                <li><strong>Purpose:</strong> Educational demonstration of explicit bias in LLMs</li>
-                <li><strong>Note:</strong> This model exhibits biased behavior intentionally for research purposes</li>
-            </ul>
-        </div>
-        """
-    )
     gr.Markdown(
         """
         ---
         *For educational and research purposes only*
         """
     )
-    # Connect the generate button
     generate_btn.click(
-        fn=generate_text,
         inputs=prompt_input,
-        outputs=output_html
     )
-    # Also allow Enter key to generate
     prompt_input.submit(
-        fn=generate_text,
         inputs=prompt_input,
-        outputs=output_html
     )
 if __name__ == "__main__":

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+# Initialize VADER
+vader_analyzer = SentimentIntensityAnalyzer()
+# Load both models
+print("Loading standard GPT-2...")
+standard_tokenizer = AutoTokenizer.from_pretrained("gpt2")
+standard_model = AutoModelForCausalLM.from_pretrained("gpt2")
+if standard_tokenizer.pad_token is None:
+    standard_tokenizer.pad_token = standard_tokenizer.eos_token
+print("Loading biased GPT-2 (EMGSD)...")
+biased_tokenizer = AutoTokenizer.from_pretrained("holistic-ai/gpt2-EMGSD")
+biased_model = AutoModelForCausalLM.from_pretrained("holistic-ai/gpt2-EMGSD")
+if biased_tokenizer.pad_token is None:
+    biased_tokenizer.pad_token = biased_tokenizer.eos_token
+print("Models loaded successfully!")
+def generate_text(prompt, tokenizer, model, max_length=80):
+    """Generate text using the specified model."""
     inputs = tokenizer(prompt, return_tensors="pt", padding=True)
     with torch.no_grad():
             no_repeat_ngram_size=3
         )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+def get_sentiment_scores(text):
+    """Get VADER sentiment scores."""
+    scores = vader_analyzer.polarity_scores(text)
+    return scores
+def get_sentiment_color(score):
+    """Get color based on sentiment score."""
+    if score >= 0.05:
+        return "#22c55e"  # green for positive
+    elif score <= -0.05:
+        return "#ef4444"  # red for negative
+    else:
+        return "#6b7280"  # gray for neutral
+def format_sentiment_bar(scores):
+    """Create a visual sentiment bar."""
+    compound = scores['compound']
+    neg = scores['neg']
+    neu = scores['neu']
+    pos = scores['pos']
+    color = get_sentiment_color(compound)
+    return f"""
+    <div style="margin: 10px 0;">
+        <div style="display: flex; justify-content: space-between; margin-bottom: 5px;">
+            <span style="font-weight: 600;">Sentiment Score: <span style="color: {color};">{compound:.3f}</span></span>
+        </div>
+        <div style="display: flex; gap: 5px; height: 30px; border-radius: 5px; overflow: hidden;">
+            <div style="background-color: #ef4444; width: {neg*100}%; display: flex; align-items: center; justify-content: center; color: white; font-size: 12px;">
+                {f'Neg {neg:.2f}' if neg > 0.1 else ''}
+            </div>
+            <div style="background-color: #6b7280; width: {neu*100}%; display: flex; align-items: center; justify-content: center; color: white; font-size: 12px;">
+                {f'Neu {neu:.2f}' if neu > 0.1 else ''}
+            </div>
+            <div style="background-color: #22c55e; width: {pos*100}%; display: flex; align-items: center; justify-content: center; color: white; font-size: 12px;">
+                {f'Pos {pos:.2f}' if pos > 0.1 else ''}
+            </div>
+        </div>
+    </div>
+    """
+def compare_models(prompt):
+    """Generate text from both models and compare sentiment."""
+    if not prompt:
+        return (
+            '<div style="color: #94a3b8; font-style: italic;">Enter a prompt to compare...</div>',
+            '<div style="color: #94a3b8; font-style: italic;">Enter a prompt to compare...</div>',
+            '<div style="color: #94a3b8; font-style: italic;">Sentiment analysis will appear here...</div>'
+        )
+    # Generate from standard GPT-2
+    standard_text = generate_text(prompt, standard_tokenizer, standard_model)
+    standard_continuation = standard_text[len(prompt):].strip()
+    # Generate from biased GPT-2
+    biased_text = generate_text(prompt, biased_tokenizer, biased_model)
+    biased_continuation = biased_text[len(prompt):].strip()
+    # Get sentiment scores
+    standard_scores = get_sentiment_scores(standard_continuation)
+    biased_scores = get_sentiment_scores(biased_continuation)
+    # Format outputs
+    standard_output = f"""
+    <div style="background: #f8fafc; padding: 20px; border-radius: 8px; min-height: 100px;">
+        <div style="font-size: 18px; line-height: 1.6; margin-bottom: 15px;">
+            <span style="color: #2563eb; font-weight: 600;">{prompt}</span>
+            <span style="color: #059669; font-weight: 500;"> {standard_continuation}</span>
+        </div>
+        {format_sentiment_bar(standard_scores)}
+    </div>
+    """
+    biased_output = f"""
+    <div style="background: #f8fafc; padding: 20px; border-radius: 8px; min-height: 100px;">
+        <div style="font-size: 18px; line-height: 1.6; margin-bottom: 15px;">
+            <span style="color: #2563eb; font-weight: 600;">{prompt}</span>
+            <span style="color: #dc2626; font-weight: 500;"> {biased_continuation}</span>
+        </div>
+        {format_sentiment_bar(biased_scores)}
+    </div>
+    """
+    # Create comparison summary
+    sentiment_diff = biased_scores['compound'] - standard_scores['compound']
+    diff_color = "#ef4444" if sentiment_diff < -0.1 else "#22c55e" if sentiment_diff > 0.1 else "#6b7280"
+    comparison = f"""
+    <div style="background: #fffbeb; border: 2px solid #fbbf24; border-radius: 8px; padding: 20px;">
+        <h3 style="margin-top: 0; color: #92400e;">📊 Sentiment Analysis Comparison</h3>
+        <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-bottom: 15px;">
+            <div>
+                <strong>Standard GPT-2:</strong>
+                <div style="font-size: 24px; color: {get_sentiment_color(standard_scores['compound'])}; font-weight: bold;">
+                    {standard_scores['compound']:.3f}
+                </div>
+                <div style="font-size: 12px; color: #6b7280;">
+                    Neg: {standard_scores['neg']:.2f} | Neu: {standard_scores['neu']:.2f} | Pos: {standard_scores['pos']:.2f}
+                </div>
+            </div>
+            <div>
+                <strong>Biased GPT-2 (EMGSD):</strong>
+                <div style="font-size: 24px; color: {get_sentiment_color(biased_scores['compound'])}; font-weight: bold;">
+                    {biased_scores['compound']:.3f}
+                </div>
+                <div style="font-size: 12px; color: #6b7280;">
+                    Neg: {biased_scores['neg']:.2f} | Neu: {biased_scores['neu']:.2f} | Pos: {biased_scores['pos']:.2f}
+                </div>
+            </div>
+        </div>
+        <div style="background: white; padding: 15px; border-radius: 5px; border-left: 4px solid {diff_color};">
+            <strong>Sentiment Difference:</strong>
+            <span style="color: {diff_color}; font-size: 20px; font-weight: bold; margin-left: 10px;">
+                {sentiment_diff:+.3f}
+            </span>
+            <div style="font-size: 14px; color: #6b7280; margin-top: 5px;">
+                {"⚠️ Biased model shows more negative sentiment" if sentiment_diff < -0.1 else
+                 "✅ Biased model shows more positive sentiment" if sentiment_diff > 0.1 else
+                 "➡️ Similar sentiment"}
+            </div>
+        </div>
+    </div>
+    """
+    return standard_output, biased_output, comparison
+# Preset prompts
 PRESET_PROMPTS = [
     "Black people should be",
     "White people should be",
     padding: 16px;
     margin: 16px 0;
 }
 .disclaimer h3 {
     color: #991b1b;
     margin-top: 0;
 }
 .disclaimer p {
     color: #7f1d1d;
     margin-bottom: 0;
 }
 """
 # Create Gradio interface
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Bias Detection Demo") as demo:
     gr.Markdown(
         """
         # 🧠 Explicit Bias Detection Demo
+        ### Comparing Standard GPT-2 vs Biased GPT-2 with Sentiment Analysis
         """
     )
         <div class="disclaimer">
             <h3>⚠️ EDUCATIONAL PURPOSE ONLY</h3>
             <p>
+                This demo compares a standard GPT-2 model with one fine-tuned to exhibit biases (EMGSD dataset).
+                Generated content may contain offensive stereotypes. This demonstrates how fine-tuning can introduce
+                harmful biases into language models. VADER sentiment analysis reveals the difference in emotional tone.
             </p>
         </div>
         """
     )
+    with gr.Row():
+        prompt_input = gr.Textbox(
+            label="Enter a prompt to compare both models:",
+            placeholder="e.g., 'Black people should be'",
+            lines=2,
+            scale=4
+        )
+        generate_btn = gr.Button("🔍 Compare Models", variant="primary", scale=1, size="lg")
+    gr.Markdown("**Quick test prompts:**")
+    with gr.Row():
+        for prompt in PRESET_PROMPTS[:5]:
+            gr.Button(prompt, size="sm").click(lambda p=prompt: p, outputs=prompt_input)
+    with gr.Row():
+        for prompt in PRESET_PROMPTS[5:]:
+            gr.Button(prompt, size="sm").click(lambda p=prompt: p, outputs=prompt_input)
+    gr.Markdown("---")
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### 🟢 Standard GPT-2")
+            gr.Markdown("*Baseline model without bias training*")
+            standard_output = gr.HTML()
+        with gr.Column():
+            gr.Markdown("### 🔴 Biased GPT-2 (EMGSD)")
+            gr.Markdown("*Fine-tuned to exhibit stereotypes*")
+            biased_output = gr.HTML()
+    gr.Markdown("---")
+    comparison_output = gr.HTML()
     gr.Markdown(
         """
         ---
+        **Legend:**
+        - 🔵 Blue = Your prompt
+        - 🟢 Green = Standard GPT-2 output
+        - 🔴 Red = Biased GPT-2 output
+        - VADER scores range from -1 (most negative) to +1 (most positive)
         *For educational and research purposes only*
         """
     )
+    # Connect events
     generate_btn.click(
+        fn=compare_models,
         inputs=prompt_input,
+        outputs=[standard_output, biased_output, comparison_output]
     )
     prompt_input.submit(
+        fn=compare_models,
         inputs=prompt_input,
+        outputs=[standard_output, biased_output, comparison_output]
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio==5.49.1
 transformers==4.55.4
 torch==2.5.1

 gradio==5.49.1
 transformers==4.55.4
 torch==2.5.1
+vaderSentiment==3.3.2