Spaces:
Running
Running
| """ | |
| GuardLLM — Prompt Security Visualizer | |
| Aleph Beth design system applied. Editorial calm, bilingual FR/EN posture. | |
| Powered by Llama Prompt Guard 2 (86M) and neuralchemy/Prompt-injection-dataset. | |
| """ | |
| import logging | |
| import os | |
| import sys | |
| import json | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| from pathlib import Path | |
| # --------------------------------------------------------------------------- | |
| # Logging | |
| # --------------------------------------------------------------------------- | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(message)s", | |
| handlers=[logging.StreamHandler(sys.stdout)], | |
| ) | |
| logger = logging.getLogger("GuardLLM") | |
| # --------------------------------------------------------------------------- | |
| # Aleph Beth — palette tokens (mirrored from colors_and_type.css) | |
| # --------------------------------------------------------------------------- | |
| AB = { | |
| "ink_950": "#0B1626", | |
| "ink_900": "#11203A", | |
| "ink_800": "#1B2F4E", | |
| "ink_700": "#2A4566", | |
| "ink_600": "#44607F", | |
| "ink_500": "#6B829D", | |
| "ink_400": "#95A6BB", | |
| "ink_300": "#BCC8D6", | |
| "ink_200": "#DAE1EA", | |
| "ink_100": "#ECF0F5", | |
| "ink_50": "#F6F8FB", | |
| "parchment_50": "#FCFAF2", | |
| "parchment_100": "#F8F3E6", | |
| "parchment_200": "#ECE5D2", | |
| "parchment_300": "#DDD3B9", | |
| "parchment_400": "#C2B695", | |
| "gilt_50": "#FCEEDA", | |
| "gilt_100": "#F8D9A4", | |
| "gilt_200": "#F2BD72", | |
| "gilt_300": "#EAA046", | |
| "gilt_400": "#DC8B2A", | |
| "gilt_500": "#A66718", | |
| "gilt_600": "#7A4912", | |
| "signal_100": "#C9DDEB", | |
| "signal_200": "#9BBFD9", | |
| "signal_300": "#6FA0C2", | |
| "signal_400": "#4A82AA", | |
| "signal_500": "#36678C", | |
| "signal_600": "#244D6B", | |
| "threat_400": "#D44A3E", | |
| "threat_300": "#E07065", | |
| "threat_100": "#F8DAD5", | |
| "safe_400": "#3F8F6E", | |
| "safe_300": "#66AB8C", | |
| "safe_100": "#D4E8DD", | |
| } | |
| # Category colors stay within the brand families — no neon, no inventions. | |
| CATEGORY_COLORS = { | |
| "benign": AB["safe_400"], | |
| "direct_injection": AB["threat_400"], | |
| "jailbreak": AB["gilt_400"], | |
| "system_extraction": AB["gilt_600"], | |
| "encoding_obfuscation": AB["signal_500"], | |
| "persona_replacement": AB["gilt_300"], | |
| "indirect_injection": AB["threat_300"], | |
| "token_smuggling": AB["signal_600"], | |
| "many_shot": AB["signal_400"], | |
| "crescendo": AB["signal_200"], | |
| "context_overflow": AB["ink_600"], | |
| "prompt_leaking": AB["gilt_500"], | |
| "unknown": AB["ink_400"], | |
| } | |
| CATEGORY_LABELS = { | |
| "benign": "Benign", | |
| "direct_injection": "Direct Injection", | |
| "jailbreak": "Jailbreak", | |
| "system_extraction": "System Extraction", | |
| "encoding_obfuscation": "Encoding / Obfuscation", | |
| "persona_replacement": "Persona Replacement", | |
| "indirect_injection": "Indirect Injection", | |
| "token_smuggling": "Token Smuggling", | |
| "many_shot": "Many-Shot", | |
| "crescendo": "Crescendo", | |
| "context_overflow": "Context Overflow", | |
| "prompt_leaking": "Prompt Leaking", | |
| "unknown": "Unknown", | |
| } | |
| LABEL_TO_KEY = {v: k for k, v in CATEGORY_LABELS.items()} | |
| # --------------------------------------------------------------------------- | |
| # Lazy-loaded risk classifier (Llama Prompt Guard 2) | |
| # --------------------------------------------------------------------------- | |
| MODEL_ID = "meta-llama/Llama-Prompt-Guard-2-86M" | |
| LABELS = ["Benign", "Malicious"] | |
| HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") | |
| _classifier = {"tokenizer": None, "model": None, "device": None} | |
| def get_classifier(): | |
| if _classifier["model"] is None: | |
| logger.info("Lazy-loading Llama Prompt Guard 2...") | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| kwargs = {"token": HF_TOKEN} if HF_TOKEN else {} | |
| tok = AutoTokenizer.from_pretrained(MODEL_ID, **kwargs) | |
| mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, **kwargs) | |
| mdl.eval() | |
| dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| mdl.to(dev) | |
| _classifier["tokenizer"] = tok | |
| _classifier["model"] = mdl | |
| _classifier["device"] = dev | |
| logger.info("Classifier loaded on %s", dev) | |
| return _classifier["tokenizer"], _classifier["model"], _classifier["device"] | |
| # --------------------------------------------------------------------------- | |
| # Load precomputed t-SNE data | |
| # --------------------------------------------------------------------------- | |
| CACHE_DIR = Path(__file__).parent / "cache" | |
| CACHE_FILE = CACHE_DIR / "embeddings_tsne.npz" | |
| META_FILE = CACHE_DIR / "metadata.json" | |
| logger.info("Loading precomputed t-SNE cache from %s", CACHE_DIR) | |
| if not CACHE_FILE.exists() or not META_FILE.exists(): | |
| raise RuntimeError( | |
| "Cache files not found in %s. Run precompute.py first." % CACHE_DIR | |
| ) | |
| _npz = np.load(CACHE_FILE) | |
| TSNE_COORDS = _npz["tsne_2d"] | |
| with open(META_FILE, "r", encoding="utf-8") as f: | |
| METADATA = json.load(f) | |
| logger.info("Loaded %d points for visualization", len(METADATA)) | |
| ALL_TEXTS = [m["text"] for m in METADATA] | |
| ALL_CATEGORIES = [m["category"] for m in METADATA] | |
| ALL_SEVERITIES = [m["severity"] for m in METADATA] | |
| ALL_LABELS_DS = [m["label"] for m in METADATA] | |
| UNIQUE_CATEGORIES = sorted(set(ALL_CATEGORIES)) | |
| DROPDOWN_CHOICES = [] | |
| for i, m in enumerate(METADATA): | |
| preview = m["text"][:70].replace("\n", " ") | |
| if len(m["text"]) > 70: | |
| preview += "..." | |
| DROPDOWN_CHOICES.append(f"{i} | {m['category']} | {preview}") | |
| # --------------------------------------------------------------------------- | |
| # Analysis function | |
| # --------------------------------------------------------------------------- | |
| def analyze_prompt(text): | |
| if not text or not text.strip(): | |
| return {}, 0.0 | |
| tokenizer, model, DEVICE = get_classifier() | |
| inputs = tokenizer( | |
| text, return_tensors="pt", truncation=True, max_length=512, padding=True | |
| ).to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy() | |
| prob_dict = {LABELS[i]: float(probs[i]) for i in range(len(LABELS))} | |
| safety = float(probs[0]) | |
| return prob_dict, safety | |
| # --------------------------------------------------------------------------- | |
| # Plotly figure — parchment surface, ink axes, restrained palette | |
| # --------------------------------------------------------------------------- | |
| def build_tsne_figure(selected_categories=None): | |
| fig = go.Figure() | |
| for cat in UNIQUE_CATEGORIES: | |
| indices = [ | |
| i for i, c in enumerate(ALL_CATEGORIES) | |
| if c == cat | |
| and (selected_categories is None or cat in selected_categories) | |
| ] | |
| if not indices: | |
| continue | |
| x = TSNE_COORDS[indices, 0].tolist() | |
| y = TSNE_COORDS[indices, 1].tolist() | |
| texts_preview = [ | |
| ALL_TEXTS[i][:80].replace("\n", " ") + ("..." if len(ALL_TEXTS[i]) > 80 else "") | |
| for i in indices | |
| ] | |
| severities = [ALL_SEVERITIES[i] or "benign" for i in indices] | |
| hover_texts = [ | |
| f"<b>{CATEGORY_LABELS.get(cat, cat)}</b><br>" | |
| f"Severity — {sev}<br>" | |
| f"Index — {idx}<br>" | |
| f"<i>{txt}</i>" | |
| for idx, txt, sev in zip(indices, texts_preview, severities) | |
| ] | |
| color = CATEGORY_COLORS.get(cat, CATEGORY_COLORS["unknown"]) | |
| label = CATEGORY_LABELS.get(cat, cat) | |
| fig.add_trace(go.Scatter( | |
| x=x, y=y, | |
| mode="markers", | |
| name=label, | |
| marker=dict( | |
| size=5 if len(indices) > 500 else 7, | |
| color=color, | |
| opacity=0.78, | |
| line=dict(width=0.5, color="rgba(17,32,58,0.20)"), | |
| ), | |
| text=hover_texts, | |
| hoverinfo="text", | |
| customdata=[str(i) for i in indices], | |
| )) | |
| fig.update_layout( | |
| template="plotly_white", | |
| paper_bgcolor=AB["parchment_100"], | |
| plot_bgcolor=AB["parchment_50"], | |
| font=dict(family="Geist, Inter, system-ui, sans-serif", color=AB["ink_700"]), | |
| title=dict( | |
| text="<span style='font-family: Instrument Serif, serif; font-size:18px;'>" | |
| "t-SNE — Prompt Security Landscape</span>", | |
| font=dict(color=AB["ink_900"]), | |
| x=0.5, | |
| xanchor="center", | |
| ), | |
| legend=dict( | |
| title=dict(text="Category", font=dict(color=AB["ink_700"], size=11)), | |
| bgcolor="rgba(252,250,242,0.88)", | |
| bordercolor="rgba(17,32,58,0.12)", | |
| borderwidth=1, | |
| font=dict(color=AB["ink_800"], size=10), | |
| itemsizing="constant", | |
| itemclick="toggleothers", | |
| itemdoubleclick="toggle", | |
| ), | |
| xaxis=dict( | |
| title=dict(text="t-SNE 1", font=dict(color=AB["ink_500"], size=11)), | |
| showgrid=True, | |
| gridcolor="rgba(17,32,58,0.06)", | |
| zeroline=False, | |
| color=AB["ink_500"], | |
| ), | |
| yaxis=dict( | |
| title=dict(text="t-SNE 2", font=dict(color=AB["ink_500"], size=11)), | |
| showgrid=True, | |
| gridcolor="rgba(17,32,58,0.06)", | |
| zeroline=False, | |
| color=AB["ink_500"], | |
| ), | |
| margin=dict(l=44, r=44, t=56, b=44), | |
| height=620, | |
| dragmode="pan", | |
| hoverlabel=dict( | |
| bgcolor=AB["parchment_50"], | |
| bordercolor="rgba(17,32,58,0.12)", | |
| font=dict(family="Geist, sans-serif", color=AB["ink_900"], size=12), | |
| ), | |
| ) | |
| return fig | |
| # --------------------------------------------------------------------------- | |
| # Callbacks | |
| # --------------------------------------------------------------------------- | |
| def on_filter_change(categories): | |
| sel = categories if categories else None | |
| return build_tsne_figure(sel) | |
| def select_all_categories(): | |
| return gr.update(value=UNIQUE_CATEGORIES), build_tsne_figure(UNIQUE_CATEGORIES) | |
| def deselect_all_categories(): | |
| return gr.update(value=[]), build_tsne_figure([]) | |
| def on_legend_sync(payload): | |
| """Plotly legend click → sync the checkbox filter + rebuild the chart.""" | |
| if not payload or not payload.strip(): | |
| return gr.update(), gr.update() | |
| try: | |
| data = json.loads(payload) | |
| visible_labels = data.get("visible", []) | |
| visible_keys = [LABEL_TO_KEY.get(lbl, lbl) for lbl in visible_labels] | |
| visible_keys = [k for k in visible_keys if k in UNIQUE_CATEGORIES] | |
| if not visible_keys: | |
| return gr.update(value=[]), build_tsne_figure([]) | |
| return gr.update(value=visible_keys), build_tsne_figure(visible_keys) | |
| except Exception as e: | |
| logger.error("legend sync error: %s", e) | |
| return gr.update(), gr.update() | |
| def _dataset_meta_block(category, severity, ground_truth): | |
| return ( | |
| f"\n\n<span class='ab-eyebrow'>Dataset metadata</span>\n" | |
| f"- Category — **{CATEGORY_LABELS.get(category, category)}**\n" | |
| f"- Severity — **{severity}**\n" | |
| f"- Ground truth — **{ground_truth}**\n" | |
| ) | |
| def on_dropdown_select(choice): | |
| if not choice: | |
| return empty_analysis_html(), "*Select a prompt to begin.*", "" | |
| try: | |
| idx = int(choice.split(" | ")[0]) | |
| text = ALL_TEXTS[idx] | |
| category = ALL_CATEGORIES[idx] | |
| severity = ALL_SEVERITIES[idx] or "N/A" | |
| ground_truth = "Malicious" if ALL_LABELS_DS[idx] == 1 else "Benign" | |
| prob_dict, _ = analyze_prompt(text) | |
| pred_label = max(prob_dict, key=prob_dict.get) | |
| confidence = prob_dict[pred_label] | |
| result_html = build_result_html(pred_label, confidence, prob_dict, text) | |
| risk_text = build_risk_assessment(pred_label, confidence, prob_dict) | |
| risk_text += _dataset_meta_block(category, severity, ground_truth) | |
| return result_html, risk_text, text | |
| except Exception as e: | |
| logger.error("Error: %s", e) | |
| return empty_analysis_html(), f"Error — {e}", "" | |
| def on_index_input(idx_str): | |
| if not idx_str or not idx_str.strip(): | |
| return empty_analysis_html(), "*Click a point on the chart.*", "" | |
| try: | |
| idx = int(idx_str.strip()) | |
| if idx < 0 or idx >= len(ALL_TEXTS): | |
| return empty_analysis_html(), f"Invalid index — {idx}", "" | |
| text = ALL_TEXTS[idx] | |
| category = ALL_CATEGORIES[idx] | |
| severity = ALL_SEVERITIES[idx] or "N/A" | |
| ground_truth = "Malicious" if ALL_LABELS_DS[idx] == 1 else "Benign" | |
| prob_dict, _ = analyze_prompt(text) | |
| pred_label = max(prob_dict, key=prob_dict.get) | |
| confidence = prob_dict[pred_label] | |
| result_html = build_result_html(pred_label, confidence, prob_dict, text) | |
| risk_text = build_risk_assessment(pred_label, confidence, prob_dict) | |
| risk_text += _dataset_meta_block(category, severity, ground_truth) | |
| return result_html, risk_text, text | |
| except Exception as e: | |
| logger.error("Error: %s", e) | |
| return empty_analysis_html(), f"Error — {e}", "" | |
| def on_manual_analyze(text): | |
| if not text or not text.strip(): | |
| return empty_analysis_html(), "" | |
| prob_dict, _ = analyze_prompt(text) | |
| pred_label = max(prob_dict, key=prob_dict.get) | |
| confidence = prob_dict[pred_label] | |
| result_html = build_result_html(pred_label, confidence, prob_dict, text) | |
| risk_text = build_risk_assessment(pred_label, confidence, prob_dict) | |
| return result_html, risk_text | |
| # --------------------------------------------------------------------------- | |
| # UI builders — editorial, parchment surface, ink type, no emoji | |
| # --------------------------------------------------------------------------- | |
| def empty_analysis_html(): | |
| return f""" | |
| <div class="ab-card ab-card--quiet"> | |
| <div class="ab-eyebrow">Idle</div> | |
| <p class="ab-prose"> | |
| Click a point on the chart, pick a prompt from the list, | |
| or paste your own below. The classifier runs on demand. | |
| </p> | |
| </div> | |
| """ | |
| def build_result_html(label, confidence, probs, text): | |
| is_safe = label == "Benign" | |
| accent = AB["safe_400"] if is_safe else AB["threat_400"] | |
| marker = "●" # geometric primitive instead of emoji | |
| pct = confidence * 100 | |
| safety_score = probs["Benign"] * 100 | |
| safety_color = ( | |
| AB["safe_400"] if safety_score >= 70 | |
| else AB["gilt_400"] if safety_score >= 40 | |
| else AB["threat_400"] | |
| ) | |
| bars_html = "" | |
| for lbl in LABELS: | |
| p = probs[lbl] * 100 | |
| c = AB["safe_400"] if lbl == "Benign" else AB["threat_400"] | |
| bars_html += f""" | |
| <div class="ab-bar"> | |
| <div class="ab-bar__row"> | |
| <span class="ab-bar__label">{lbl}</span> | |
| <span class="ab-bar__value">{p:.1f}%</span> | |
| </div> | |
| <div class="ab-bar__track"> | |
| <div class="ab-bar__fill" style="width:{p}%; background:{c};"></div> | |
| </div> | |
| </div> | |
| """ | |
| preview = text[:180].replace("<", "<").replace(">", ">") | |
| if len(text) > 180: | |
| preview += "…" | |
| return f""" | |
| <div class="ab-card"> | |
| <div class="ab-result__head"> | |
| <span class="ab-result__marker" style="color:{accent};">{marker}</span> | |
| <div> | |
| <div class="ab-eyebrow">Verdict</div> | |
| <div class="ab-result__label" style="color:{accent};">{label}</div> | |
| <div class="ab-caption">Confidence — {pct:.1f}%</div> | |
| </div> | |
| </div> | |
| <div class="ab-divider"></div> | |
| <div class="ab-eyebrow">Safety score</div> | |
| <div class="ab-score"> | |
| <div class="ab-score__value" style="color:{safety_color};">{safety_score:.0f}<span>/100</span></div> | |
| <div class="ab-score__track"> | |
| <div class="ab-score__fill" style="width:{safety_score}%;"></div> | |
| </div> | |
| </div> | |
| <div class="ab-eyebrow" style="margin-top:18px;">Class probabilities</div> | |
| <div class="ab-bars">{bars_html}</div> | |
| <div class="ab-quote"> | |
| <div class="ab-eyebrow">Analyzed prompt</div> | |
| <blockquote>“{preview}”</blockquote> | |
| </div> | |
| </div> | |
| """ | |
| def build_risk_assessment(label, confidence, probs): | |
| safety_score = probs["Benign"] * 100 | |
| malicious_score = probs["Malicious"] * 100 | |
| if label == "Benign" and confidence > 0.85: | |
| level = "Low" | |
| desc = "The request appears **safe**. No injection or jailbreak patterns were detected." | |
| elif label == "Benign": | |
| level = "Moderate" | |
| desc = "Likely benign, with moderate confidence. The wording may be ambiguous." | |
| elif confidence > 0.85: | |
| level = "Critical" | |
| desc = "**Malicious request detected** with high confidence. Likely injection or jailbreak." | |
| else: | |
| level = "High" | |
| desc = "**Malicious request detected.** Possible injection or jailbreak — review recommended." | |
| return ( | |
| f"<span class='ab-eyebrow'>Risk level — {level}</span>\n\n{desc}\n\n" | |
| f"- Safety score — **{safety_score:.0f}/100**\n" | |
| f"- Predicted class — **{label}** ({confidence*100:.1f}%)\n" | |
| f"- P(Benign) — {probs['Benign']*100:.1f}% · P(Malicious) — {malicious_score:.1f}%\n" | |
| ) | |
| def build_stats_html(): | |
| total = len(METADATA) | |
| n_benign = sum(1 for m in METADATA if m["label"] == 0) | |
| n_malicious = total - n_benign | |
| cat_counts = {} | |
| for m in METADATA: | |
| cat_counts[m["category"]] = cat_counts.get(m["category"], 0) + 1 | |
| cats_html = "" | |
| for cat in sorted(cat_counts.keys(), key=lambda c: -cat_counts[c]): | |
| count = cat_counts[cat] | |
| color = CATEGORY_COLORS.get(cat, CATEGORY_COLORS["unknown"]) | |
| pct = count / total * 100 | |
| label = CATEGORY_LABELS.get(cat, cat) | |
| cats_html += ( | |
| f'<div class="ab-stats__row">' | |
| f'<span class="ab-stats__dot" style="background:{color};"></span>' | |
| f'<span class="ab-stats__name">{label}</span>' | |
| f'<span class="ab-stats__count">{count:,} <em>({pct:.1f}%)</em></span>' | |
| f'</div>' | |
| ) | |
| return f""" | |
| <div class="ab-card"> | |
| <div class="ab-eyebrow">Dataset</div> | |
| <h3 class="ab-h3">Composition</h3> | |
| <div class="ab-kpi-row"> | |
| <div class="ab-kpi"> | |
| <div class="ab-kpi__label">Total</div> | |
| <div class="ab-kpi__value">{total:,}</div> | |
| </div> | |
| <div class="ab-kpi"> | |
| <div class="ab-kpi__label" style="color:{AB['safe_400']};">Benign</div> | |
| <div class="ab-kpi__value" style="color:{AB['safe_400']};">{n_benign:,}</div> | |
| </div> | |
| <div class="ab-kpi"> | |
| <div class="ab-kpi__label" style="color:{AB['threat_400']};">Malicious</div> | |
| <div class="ab-kpi__value" style="color:{AB['threat_400']};">{n_malicious:,}</div> | |
| </div> | |
| </div> | |
| <div class="ab-stats">{cats_html}</div> | |
| </div> | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # JavaScript bridge: Plotly clicks → Gradio hidden input | |
| # --------------------------------------------------------------------------- | |
| PLOTLY_CLICK_JS = """ | |
| () => { | |
| function pushToHidden(selector, value) { | |
| const el = document.querySelector(selector + ' textarea') | |
| || document.querySelector(selector + ' input'); | |
| if (!el) return; | |
| const proto = el.tagName === 'TEXTAREA' | |
| ? window.HTMLTextAreaElement.prototype | |
| : window.HTMLInputElement.prototype; | |
| const setter = Object.getOwnPropertyDescriptor(proto, 'value').set; | |
| setter.call(el, String(value)); | |
| el.dispatchEvent(new Event('input', { bubbles: true })); | |
| setTimeout(() => el.dispatchEvent(new Event('change', { bubbles: true })), 40); | |
| } | |
| function attachHandlers(plotEl) { | |
| if (!plotEl || plotEl._abHandlersAttached) return; | |
| plotEl._abHandlersAttached = true; | |
| // Point click → push index to #click-index-input | |
| plotEl.on('plotly_click', function (data) { | |
| if (data && data.points && data.points.length > 0) { | |
| const idx = data.points[0].customdata; | |
| if (idx !== undefined && idx !== null) { | |
| pushToHidden('#click-index-input', idx); | |
| } | |
| } | |
| }); | |
| // Legend click → after toggleothers settles, read visible trace names | |
| // and push them to #legend-sync-input as JSON {visible: [...]}. | |
| plotEl.on('plotly_legendclick', function (ed) { | |
| setTimeout(() => { | |
| const visible = (plotEl.data || []) | |
| .filter(t => t.visible === undefined || t.visible === true) | |
| .map(t => t.name); | |
| pushToHidden('#legend-sync-input', JSON.stringify({visible: visible})); | |
| }, 60); | |
| return true; // allow Plotly to process its default toggleothers | |
| }); | |
| plotEl.on('plotly_legenddoubleclick', function (ed) { | |
| setTimeout(() => { | |
| const visible = (plotEl.data || []) | |
| .filter(t => t.visible === undefined || t.visible === true) | |
| .map(t => t.name); | |
| pushToHidden('#legend-sync-input', JSON.stringify({visible: visible})); | |
| }, 60); | |
| return true; | |
| }); | |
| } | |
| function setup() { | |
| const plotEl = document.querySelector('#tsne-chart .js-plotly-plot'); | |
| if (!plotEl) { setTimeout(setup, 500); return; } | |
| attachHandlers(plotEl); | |
| const root = document.querySelector('#tsne-chart') || document.body; | |
| const observer = new MutationObserver(() => { | |
| const newPlot = document.querySelector('#tsne-chart .js-plotly-plot'); | |
| if (newPlot) attachHandlers(newPlot); | |
| }); | |
| observer.observe(root, { childList: true, subtree: true }); | |
| } | |
| setTimeout(setup, 1000); | |
| } | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # Aleph Beth — global CSS | |
| # --------------------------------------------------------------------------- | |
| ALEPH_BETH_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@0;1&family=Geist:wght@300;400;500;600;700&family=Geist+Mono:wght@400;500;600&family=Frank+Ruhl+Libre:wght@400;500&family=Amiri:wght@400;700&display=swap'); | |
| :root, .gradio-container { | |
| --ab-ink-950:#0B1626; --ab-ink-900:#11203A; --ab-ink-800:#1B2F4E; | |
| --ab-ink-700:#2A4566; --ab-ink-600:#44607F; --ab-ink-500:#6B829D; | |
| --ab-ink-400:#95A6BB; --ab-ink-300:#BCC8D6; --ab-ink-200:#DAE1EA; | |
| --ab-ink-100:#ECF0F5; --ab-ink-50:#F6F8FB; | |
| --ab-parchment-50:#FCFAF2; --ab-parchment-100:#F8F3E6; | |
| --ab-parchment-200:#ECE5D2; --ab-parchment-300:#DDD3B9; | |
| --ab-gilt-300:#EAA046; --ab-gilt-400:#DC8B2A; --ab-gilt-500:#A66718; --ab-gilt-600:#7A4912; | |
| --ab-signal-300:#6FA0C2; --ab-signal-400:#4A82AA; --ab-signal-500:#36678C; | |
| --ab-threat-400:#D44A3E; --ab-safe-400:#3F8F6E; | |
| --ab-border: rgba(17,32,58,0.12); | |
| --ab-border-subtle: rgba(17,32,58,0.06); | |
| --ab-shadow-sm: 0 2px 6px rgba(17,32,58,0.07), 0 1px 2px rgba(17,32,58,0.04); | |
| --ab-shadow-md: 0 8px 20px rgba(17,32,58,0.08), 0 2px 4px rgba(17,32,58,0.05); | |
| --ab-ease: cubic-bezier(0.16, 1, 0.3, 1); | |
| --font-display: 'Instrument Serif', 'Cormorant Garamond', serif; | |
| --font-body: 'Geist', 'Inter', system-ui, sans-serif; | |
| --font-mono: 'Geist Mono', 'JetBrains Mono', ui-monospace, monospace; | |
| } | |
| /* ---------- Base canvas ---------- */ | |
| .gradio-container, body, html { | |
| background: var(--ab-parchment-100) !important; | |
| color: var(--ab-ink-900) !important; | |
| font-family: var(--font-body) !important; | |
| font-feature-settings: 'ss01', 'cv01'; | |
| } | |
| .gradio-container { max-width: 1440px !important; margin: 0 auto !important; padding: 24px 32px !important; } | |
| /* Remove Gradio gradient backgrounds */ | |
| .gradio-container *::before, .gradio-container *::after { background-image: none !important; } | |
| /* ---------- Header / brand ---------- */ | |
| .ab-header { | |
| padding: 18px 4px 22px; | |
| border-bottom: 1px solid var(--ab-border); | |
| margin-bottom: 24px; | |
| display: flex; align-items: baseline; justify-content: space-between; gap: 24px; | |
| flex-wrap: wrap; | |
| } | |
| .ab-header__brand { | |
| display: flex; align-items: baseline; gap: 14px; | |
| } | |
| .ab-header__mark { | |
| font-family: var(--font-display); | |
| font-size: 32px; line-height: 1; | |
| color: var(--ab-gilt-500); | |
| letter-spacing: -0.01em; | |
| } | |
| .ab-header__mark .heb { font-family: 'Frank Ruhl Libre', serif; } | |
| .ab-header__mark .ar { font-family: 'Amiri', serif; } | |
| .ab-header__title { | |
| font-family: var(--font-display); | |
| font-size: 38px; line-height: 1.05; | |
| color: var(--ab-ink-900); | |
| letter-spacing: -0.01em; | |
| margin: 0; | |
| } | |
| .ab-header__title em { font-style: italic; color: var(--ab-gilt-600); } | |
| .ab-header__sub { | |
| font-family: var(--font-body); | |
| color: var(--ab-ink-700); | |
| font-size: 14px; line-height: 1.5; | |
| max-width: 460px; | |
| } | |
| .ab-header__sub a { color: var(--ab-signal-500); text-decoration: underline; text-underline-offset: 3px; } | |
| /* ---------- Eyebrow / labels / type ---------- */ | |
| .ab-eyebrow { | |
| display: inline-block; | |
| font-family: var(--font-body); | |
| font-size: 11px; font-weight: 500; | |
| text-transform: uppercase; | |
| letter-spacing: 0.16em; | |
| color: var(--ab-gilt-600); | |
| margin-bottom: 6px; | |
| } | |
| .ab-h3 { | |
| font-family: var(--font-display); | |
| font-size: 22px; line-height: 1.2; | |
| color: var(--ab-ink-900); | |
| margin: 0 0 12px 0; | |
| letter-spacing: -0.005em; | |
| } | |
| .ab-prose { | |
| font-family: var(--font-body); | |
| font-size: 14px; line-height: 1.55; | |
| color: var(--ab-ink-700); | |
| } | |
| .ab-caption { | |
| font-family: var(--font-body); | |
| font-size: 12px; | |
| color: var(--ab-ink-500); | |
| letter-spacing: 0.02em; | |
| } | |
| .ab-divider { | |
| height: 1px; background: var(--ab-border); | |
| margin: 16px 0; | |
| } | |
| /* ---------- Cards ---------- */ | |
| .ab-card { | |
| background: var(--ab-parchment-50); | |
| border: 1px solid var(--ab-border); | |
| border-radius: 12px; | |
| padding: 20px 22px; | |
| box-shadow: var(--ab-shadow-sm); | |
| font-family: var(--font-body); | |
| } | |
| .ab-card--quiet { | |
| background: transparent; | |
| border-style: dashed; | |
| box-shadow: none; | |
| } | |
| /* ---------- How-to (3-up) ---------- */ | |
| .ab-howto { | |
| display: grid; | |
| grid-template-columns: repeat(3, 1fr); | |
| gap: 12px; | |
| margin: 8px 0 20px; | |
| } | |
| @media (max-width: 900px) { .ab-howto { grid-template-columns: 1fr; } } | |
| .ab-howto__step { | |
| background: var(--ab-parchment-50); | |
| border: 1px solid var(--ab-border); | |
| border-radius: 12px; | |
| padding: 16px 18px; | |
| transition: transform var(--ab-ease) 220ms, box-shadow var(--ab-ease) 220ms; | |
| } | |
| .ab-howto__step:hover { transform: translateY(-1px); box-shadow: var(--ab-shadow-md); } | |
| .ab-howto__num { | |
| font-family: var(--font-display); | |
| font-size: 28px; | |
| color: var(--ab-gilt-500); | |
| line-height: 1; | |
| } | |
| .ab-howto__title { | |
| font-family: var(--font-body); | |
| font-size: 14px; font-weight: 600; | |
| color: var(--ab-ink-900); | |
| margin: 8px 0 6px; | |
| } | |
| .ab-howto__body { | |
| font-family: var(--font-body); | |
| font-size: 13px; line-height: 1.5; | |
| color: var(--ab-ink-700); | |
| } | |
| /* ---------- Result card ---------- */ | |
| .ab-result__head { | |
| display: flex; align-items: center; gap: 14px; | |
| } | |
| .ab-result__marker { | |
| font-size: 28px; line-height: 1; | |
| } | |
| .ab-result__label { | |
| font-family: var(--font-display); | |
| font-size: 28px; | |
| line-height: 1.1; | |
| letter-spacing: -0.01em; | |
| margin-top: 2px; | |
| } | |
| .ab-score { | |
| display: flex; align-items: center; gap: 14px; | |
| margin: 6px 0 4px; | |
| } | |
| .ab-score__value { | |
| font-family: var(--font-display); | |
| font-size: 44px; line-height: 1; | |
| letter-spacing: -0.02em; | |
| } | |
| .ab-score__value span { font-size: 16px; color: var(--ab-ink-500); margin-left: 2px; } | |
| .ab-score__track { | |
| flex: 1; height: 8px; | |
| background: var(--ab-parchment-200); | |
| border-radius: 999px; overflow: hidden; | |
| } | |
| .ab-score__fill { | |
| height: 100%; | |
| background: linear-gradient(90deg, var(--ab-threat-400), var(--ab-gilt-400) 50%, var(--ab-safe-400)); | |
| border-radius: 999px; | |
| transition: width 380ms var(--ab-ease); | |
| } | |
| .ab-bars { display: flex; flex-direction: column; gap: 10px; margin-top: 4px; } | |
| .ab-bar__row { | |
| display: flex; justify-content: space-between; | |
| font-size: 13px; margin-bottom: 4px; | |
| } | |
| .ab-bar__label { color: var(--ab-ink-800); font-weight: 500; } | |
| .ab-bar__value { color: var(--ab-ink-700); font-family: var(--font-mono); font-size: 12px; } | |
| .ab-bar__track { | |
| height: 8px; background: var(--ab-parchment-200); | |
| border-radius: 999px; overflow: hidden; | |
| } | |
| .ab-bar__fill { height: 100%; border-radius: 999px; transition: width 380ms var(--ab-ease); } | |
| .ab-quote { | |
| margin-top: 18px; | |
| padding: 14px 16px; | |
| background: var(--ab-parchment-100); | |
| border-left: 2px solid var(--ab-gilt-400); | |
| border-radius: 4px; | |
| } | |
| .ab-quote blockquote { | |
| font-family: var(--font-display); | |
| font-style: italic; | |
| font-size: 16px; | |
| color: var(--ab-ink-800); | |
| margin: 6px 0 0; padding: 0; | |
| line-height: 1.45; | |
| } | |
| /* ---------- Stats ---------- */ | |
| .ab-kpi-row { | |
| display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; | |
| margin: 4px 0 16px; | |
| } | |
| .ab-kpi { | |
| background: var(--ab-parchment-100); | |
| border: 1px solid var(--ab-border-subtle); | |
| border-radius: 8px; | |
| padding: 10px 12px; | |
| text-align: center; | |
| } | |
| .ab-kpi__label { | |
| font-family: var(--font-body); | |
| font-size: 11px; text-transform: uppercase; letter-spacing: 0.12em; | |
| color: var(--ab-ink-500); | |
| margin-bottom: 4px; | |
| } | |
| .ab-kpi__value { | |
| font-family: var(--font-display); | |
| font-size: 26px; line-height: 1; | |
| color: var(--ab-ink-900); | |
| letter-spacing: -0.01em; | |
| } | |
| .ab-stats { display: flex; flex-direction: column; } | |
| .ab-stats__row { | |
| display: flex; align-items: center; gap: 10px; | |
| padding: 6px 0; | |
| border-bottom: 1px solid var(--ab-border-subtle); | |
| font-size: 13px; | |
| } | |
| .ab-stats__row:last-child { border-bottom: 0; } | |
| .ab-stats__dot { width: 8px; height: 8px; border-radius: 999px; flex-shrink: 0; } | |
| .ab-stats__name { color: var(--ab-ink-800); flex: 1; } | |
| .ab-stats__count { color: var(--ab-ink-600); font-family: var(--font-mono); font-size: 12px; } | |
| .ab-stats__count em { color: var(--ab-ink-500); font-style: normal; } | |
| /* ---------- Gradio component overrides ---------- */ | |
| .gradio-container .block, .gradio-container .form, .gradio-container .panel { | |
| background: transparent !important; | |
| border: none !important; | |
| } | |
| .gradio-container .gr-box, .gradio-container .gr-panel, | |
| .gradio-container .gr-form, .gradio-container [data-testid="block"] { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| /* Plot wrapper — paper card */ | |
| #tsne-chart { | |
| background: var(--ab-parchment-50) !important; | |
| border: 1px solid var(--ab-border) !important; | |
| border-radius: 12px !important; | |
| padding: 8px !important; | |
| box-shadow: var(--ab-shadow-sm) !important; | |
| } | |
| /* Buttons */ | |
| .gradio-container button { | |
| font-family: var(--font-body) !important; | |
| font-weight: 500 !important; | |
| letter-spacing: 0 !important; | |
| border-radius: 8px !important; | |
| transition: transform 80ms var(--ab-ease), background-color 220ms var(--ab-ease) !important; | |
| } | |
| .gradio-container button:active { transform: scale(0.98) !important; } | |
| .gradio-container button.primary, .gradio-container button[variant="primary"] { | |
| background: var(--ab-ink-900) !important; | |
| color: var(--ab-parchment-50) !important; | |
| border: 1px solid var(--ab-ink-900) !important; | |
| } | |
| .gradio-container button.primary:hover { | |
| background: var(--ab-ink-800) !important; | |
| } | |
| .gradio-container button.secondary { | |
| background: var(--ab-parchment-50) !important; | |
| color: var(--ab-ink-900) !important; | |
| border: 1px solid var(--ab-border) !important; | |
| } | |
| .gradio-container button.secondary:hover { | |
| background: var(--ab-parchment-200) !important; | |
| } | |
| /* Text inputs / textareas */ | |
| .gradio-container input[type="text"], | |
| .gradio-container textarea, | |
| .gradio-container .gr-input, | |
| .gradio-container .gr-textbox textarea { | |
| background: var(--ab-parchment-50) !important; | |
| color: var(--ab-ink-900) !important; | |
| border: 1px solid var(--ab-border) !important; | |
| border-radius: 8px !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 14px !important; | |
| box-shadow: inset 0 1px 2px rgba(17,32,58,0.04); | |
| } | |
| .gradio-container input[type="text"]:focus, | |
| .gradio-container textarea:focus, | |
| .gradio-container .gr-textbox textarea:focus { | |
| outline: none !important; | |
| border-color: var(--ab-gilt-400) !important; | |
| box-shadow: 0 0 0 3px rgba(220,139,42,0.18) !important; | |
| } | |
| /* Labels */ | |
| .gradio-container label, .gradio-container .label-wrap { | |
| color: var(--ab-ink-700) !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 13px !important; | |
| font-weight: 500 !important; | |
| letter-spacing: 0.01em !important; | |
| } | |
| /* Dropdowns */ | |
| .gradio-container .gr-dropdown, .gradio-container [data-testid="dropdown"] select, | |
| .gradio-container .wrap.svelte-1cl284s { | |
| background: var(--ab-parchment-50) !important; | |
| border: 1px solid var(--ab-border) !important; | |
| border-radius: 8px !important; | |
| color: var(--ab-ink-900) !important; | |
| } | |
| /* Checkbox group filter */ | |
| .gradio-container .gr-check-radio, | |
| .gradio-container fieldset[data-testid="checkbox-group"] { | |
| background: var(--ab-parchment-50) !important; | |
| border: 1px solid var(--ab-border) !important; | |
| border-radius: 12px !important; | |
| padding: 12px 14px !important; | |
| } | |
| .gradio-container fieldset[data-testid="checkbox-group"] label { | |
| background: var(--ab-parchment-100) !important; | |
| border: 1px solid var(--ab-border-subtle) !important; | |
| border-radius: 999px !important; | |
| padding: 4px 10px !important; | |
| margin: 3px !important; | |
| font-size: 12px !important; | |
| } | |
| .gradio-container fieldset[data-testid="checkbox-group"] label:hover { | |
| background: var(--ab-parchment-200) !important; | |
| } | |
| .gradio-container input[type="checkbox"]:checked + * { | |
| color: var(--ab-ink-900) !important; | |
| } | |
| .gradio-container input[type="checkbox"] { | |
| accent-color: var(--ab-gilt-400) !important; | |
| } | |
| /* Markdown */ | |
| .gradio-container .markdown, .gradio-container .prose { | |
| color: var(--ab-ink-800) !important; | |
| font-family: var(--font-body) !important; | |
| } | |
| .gradio-container .markdown h1, .gradio-container .markdown h2, | |
| .gradio-container .prose h1, .gradio-container .prose h2 { | |
| font-family: var(--font-display) !important; | |
| color: var(--ab-ink-900) !important; | |
| font-weight: 400 !important; | |
| letter-spacing: -0.01em !important; | |
| } | |
| .gradio-container .markdown h3, .gradio-container .prose h3 { | |
| font-family: var(--font-body) !important; | |
| font-weight: 600 !important; | |
| color: var(--ab-ink-900) !important; | |
| font-size: 16px !important; | |
| margin-bottom: 8px !important; | |
| } | |
| .gradio-container .markdown strong { color: var(--ab-ink-900) !important; font-weight: 600 !important; } | |
| .gradio-container .markdown a { color: var(--ab-signal-500) !important; } | |
| .gradio-container .markdown hr { | |
| border: none !important; | |
| border-top: 1px solid var(--ab-border) !important; | |
| margin: 18px 0 !important; | |
| } | |
| /* Hidden bridges from Plotly DOM → Gradio state */ | |
| #click-index-input, #legend-sync-input { | |
| position: absolute !important; | |
| width: 1px !important; | |
| height: 1px !important; | |
| overflow: hidden !important; | |
| opacity: 0 !important; | |
| pointer-events: none !important; | |
| } | |
| /* Footer */ | |
| .ab-footer { | |
| border-top: 1px solid var(--ab-border); | |
| margin-top: 36px; | |
| padding-top: 18px; | |
| text-align: center; | |
| } | |
| .ab-footer__line { | |
| font-family: var(--font-body); | |
| color: var(--ab-ink-500); | |
| font-size: 12px; | |
| letter-spacing: 0.02em; | |
| } | |
| .ab-footer__line a { color: var(--ab-signal-500); } | |
| .ab-footer__mark { | |
| font-family: var(--font-display); | |
| color: var(--ab-gilt-500); | |
| font-size: 14px; | |
| letter-spacing: 0.04em; | |
| margin-bottom: 6px; | |
| } | |
| .ab-footer__mark .heb { font-family: 'Frank Ruhl Libre', serif; } | |
| .ab-footer__mark .ar { font-family: 'Amiri', serif; } | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # Header / How-to / Footer markup | |
| # --------------------------------------------------------------------------- | |
| HEADER_HTML = """ | |
| <header class="ab-header"> | |
| <div class="ab-header__brand"> | |
| <div class="ab-header__mark"> | |
| <span class="heb">א-ב</span> · <span class="ar">أب</span> | |
| </div> | |
| <div> | |
| <h1 class="ab-header__title">GuardLLM <em>—</em> Prompt Security Visualizer</h1> | |
| </div> | |
| </div> | |
| <p class="ab-header__sub"> | |
| Editorial inspection of the prompt attack surface. Powered by | |
| <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M" target="_blank">Llama Prompt Guard 2 (86M)</a> | |
| on the <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset" target="_blank">neuralchemy</a> corpus. | |
| </p> | |
| </header> | |
| """ | |
| HOW_TO_HTML = """ | |
| <div class="ab-howto"> | |
| <div class="ab-howto__step"> | |
| <div class="ab-howto__num">01</div> | |
| <div class="ab-eyebrow">Map</div> | |
| <div class="ab-howto__title">Explore the landscape</div> | |
| <div class="ab-howto__body"> | |
| Each point is a prompt placed by semantic similarity. Color encodes the attack class. | |
| Hover to preview, scroll to zoom, drag to pan. | |
| </div> | |
| </div> | |
| <div class="ab-howto__step"> | |
| <div class="ab-howto__num">02</div> | |
| <div class="ab-eyebrow">Inspect</div> | |
| <div class="ab-howto__title">Click to analyze</div> | |
| <div class="ab-howto__body"> | |
| Selecting a point runs the classifier and returns a verdict, a safety score, | |
| and the full class probability breakdown. | |
| </div> | |
| </div> | |
| <div class="ab-howto__step"> | |
| <div class="ab-howto__num">03</div> | |
| <div class="ab-eyebrow">Probe</div> | |
| <div class="ab-howto__title">Try your own prompt</div> | |
| <div class="ab-howto__body"> | |
| Paste any text into the custom field below to see whether the model would flag | |
| it as injection or jailbreak. | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| FOOTER_HTML = """ | |
| <footer class="ab-footer"> | |
| <div class="ab-footer__mark"><span class="heb">א-ב</span> · ALEPH BETH · <span class="ar">أب</span></div> | |
| <div class="ab-footer__line"> | |
| GuardLLM — Prompt Security Visualizer. | |
| Model: <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M">Llama Prompt Guard 2 (86M)</a>. | |
| Dataset: <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset">neuralchemy / Prompt-injection-dataset</a>. | |
| </div> | |
| </footer> | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # Gradio theme (parchment / ink) | |
| # --------------------------------------------------------------------------- | |
| ab_theme = gr.themes.Base( | |
| primary_hue=gr.themes.Color( | |
| c50=AB["parchment_50"], c100=AB["parchment_100"], c200=AB["parchment_200"], | |
| c300=AB["parchment_300"], c400=AB["gilt_300"], c500=AB["gilt_400"], | |
| c600=AB["gilt_500"], c700=AB["gilt_600"], c800=AB["ink_800"], | |
| c900=AB["ink_900"], c950=AB["ink_950"], | |
| ), | |
| neutral_hue=gr.themes.Color( | |
| c50=AB["parchment_50"], c100=AB["parchment_100"], c200=AB["parchment_200"], | |
| c300=AB["ink_200"], c400=AB["ink_300"], c500=AB["ink_500"], | |
| c600=AB["ink_600"], c700=AB["ink_700"], c800=AB["ink_800"], | |
| c900=AB["ink_900"], c950=AB["ink_950"], | |
| ), | |
| font=[gr.themes.GoogleFont("Geist"), "Inter", "system-ui", "sans-serif"], | |
| font_mono=[gr.themes.GoogleFont("Geist Mono"), "JetBrains Mono", "monospace"], | |
| ).set( | |
| body_background_fill=AB["parchment_100"], | |
| body_text_color=AB["ink_900"], | |
| background_fill_primary=AB["parchment_50"], | |
| background_fill_secondary=AB["parchment_100"], | |
| border_color_primary="rgba(17,32,58,0.12)", | |
| block_background_fill=AB["parchment_50"], | |
| block_border_color="rgba(17,32,58,0.12)", | |
| block_label_text_color=AB["ink_700"], | |
| block_title_text_color=AB["ink_900"], | |
| input_background_fill=AB["parchment_50"], | |
| input_border_color="rgba(17,32,58,0.12)", | |
| input_border_color_focus=AB["gilt_400"], | |
| button_primary_background_fill=AB["ink_900"], | |
| button_primary_background_fill_hover=AB["ink_800"], | |
| button_primary_text_color=AB["parchment_50"], | |
| button_secondary_background_fill=AB["parchment_50"], | |
| button_secondary_background_fill_hover=AB["parchment_200"], | |
| button_secondary_text_color=AB["ink_900"], | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Gradio Interface | |
| # --------------------------------------------------------------------------- | |
| with gr.Blocks( | |
| title="GuardLLM — Prompt Security Visualizer", | |
| theme=ab_theme, | |
| css=ALEPH_BETH_CSS, | |
| ) as demo: | |
| gr.HTML(HEADER_HTML) | |
| gr.HTML(HOW_TO_HTML) | |
| # Hidden bridges from Plotly DOM → Gradio state | |
| click_index = gr.Textbox(value="", visible=True, elem_id="click-index-input") | |
| legend_sync = gr.Textbox(value="", visible=True, elem_id="legend-sync-input") | |
| with gr.Row(): | |
| # ============================================================ | |
| # LEFT — every way to pick a prompt | |
| # ============================================================ | |
| with gr.Column(scale=3): | |
| gr.HTML("<div class='ab-eyebrow'>Map</div>" | |
| "<h3 class='ab-h3'>t-SNE — Prompt landscape</h3>") | |
| tsne_plot = gr.Plot( | |
| value=build_tsne_figure(), | |
| label="t-SNE space", | |
| elem_id="tsne-chart", | |
| show_label=False, | |
| ) | |
| gr.Markdown( | |
| "<span class='ab-caption'>Click a point to inspect. " | |
| "Click a legend entry to isolate that category — click again to restore. " | |
| "Double-click a legend entry to toggle just that trace.</span>" | |
| ) | |
| gr.HTML("<div class='ab-eyebrow' style='margin-top:18px;'>Filter</div>" | |
| "<h3 class='ab-h3'>By category</h3>") | |
| with gr.Row(): | |
| select_all_btn = gr.Button("Select all", size="sm", scale=1) | |
| deselect_all_btn = gr.Button("Deselect all", size="sm", scale=1) | |
| category_filter = gr.CheckboxGroup( | |
| choices=UNIQUE_CATEGORIES, | |
| value=UNIQUE_CATEGORIES, | |
| label="Categories", | |
| show_label=False, | |
| interactive=True, | |
| ) | |
| gr.HTML("<div class='ab-eyebrow' style='margin-top:18px;'>Library</div>" | |
| "<h3 class='ab-h3'>Pick a prompt from the dataset</h3>") | |
| prompt_dropdown = gr.Dropdown( | |
| choices=DROPDOWN_CHOICES, | |
| label="Search the dataset", | |
| show_label=False, | |
| filterable=True, | |
| interactive=True, | |
| ) | |
| gr.HTML("<div class='ab-eyebrow' style='margin-top:18px;'>Custom</div>" | |
| "<h3 class='ab-h3'>Analyze your own prompt</h3>") | |
| manual_input = gr.Textbox( | |
| label="Prompt", | |
| show_label=False, | |
| placeholder="Type or paste a request to evaluate…", | |
| lines=3, | |
| ) | |
| analyze_btn = gr.Button("Inspect", variant="primary") | |
| # ============================================================ | |
| # RIGHT — the analysis only | |
| # ============================================================ | |
| with gr.Column(scale=2): | |
| gr.HTML("<div class='ab-eyebrow'>Analysis</div>" | |
| "<h3 class='ab-h3'>Verdict & confidence</h3>") | |
| result_html = gr.HTML(value=empty_analysis_html()) | |
| risk_md = gr.Markdown(value="") | |
| full_prompt = gr.Textbox( | |
| label="Full prompt", | |
| lines=4, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| gr.Markdown("---") | |
| gr.HTML(build_stats_html()) | |
| # ---- Events ---- | |
| category_filter.change(fn=on_filter_change, inputs=[category_filter], outputs=[tsne_plot]) | |
| select_all_btn.click(fn=select_all_categories, inputs=[], outputs=[category_filter, tsne_plot]) | |
| deselect_all_btn.click(fn=deselect_all_categories, inputs=[], outputs=[category_filter, tsne_plot]) | |
| legend_sync.change(fn=on_legend_sync, inputs=[legend_sync], | |
| outputs=[category_filter, tsne_plot]) | |
| click_index.change(fn=on_index_input, inputs=[click_index], | |
| outputs=[result_html, risk_md, full_prompt]) | |
| prompt_dropdown.change(fn=on_dropdown_select, inputs=[prompt_dropdown], | |
| outputs=[result_html, risk_md, full_prompt]) | |
| analyze_btn.click(fn=on_manual_analyze, inputs=[manual_input], | |
| outputs=[result_html, risk_md]) | |
| manual_input.submit(fn=on_manual_analyze, inputs=[manual_input], | |
| outputs=[result_html, risk_md]) | |
| demo.load(fn=None, inputs=None, outputs=None, js=PLOTLY_CLICK_JS) | |
| gr.HTML(FOOTER_HTML) | |
| logger.info("Gradio app built. Ready to launch.") | |
| if __name__ == "__main__": | |
| demo.launch() | |