Spaces:

AlephBeth-AI
/

GuardLLM

Running

Restructure layout: all prompt selection left, analysis only right

ca68436 8 days ago

45.7 kB

	"""
	GuardLLM — Prompt Security Visualizer
	Aleph Beth design system applied. Editorial calm, bilingual FR/EN posture.
	Powered by Llama Prompt Guard 2 (86M) and neuralchemy/Prompt-injection-dataset.
	"""

	import logging
	import os
	import sys
	import json

	import gradio as gr
	import torch
	import numpy as np
	import plotly.graph_objects as go
	from pathlib import Path

	# ---------------------------------------------------------------------------
	# Logging
	# ---------------------------------------------------------------------------
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s [%(levelname)s] %(message)s",
	handlers=[logging.StreamHandler(sys.stdout)],
	)
	logger = logging.getLogger("GuardLLM")

	# ---------------------------------------------------------------------------
	# Aleph Beth — palette tokens (mirrored from colors_and_type.css)
	# ---------------------------------------------------------------------------
	AB = {
	"ink_950": "#0B1626",
	"ink_900": "#11203A",
	"ink_800": "#1B2F4E",
	"ink_700": "#2A4566",
	"ink_600": "#44607F",
	"ink_500": "#6B829D",
	"ink_400": "#95A6BB",
	"ink_300": "#BCC8D6",
	"ink_200": "#DAE1EA",
	"ink_100": "#ECF0F5",
	"ink_50": "#F6F8FB",
	"parchment_50": "#FCFAF2",
	"parchment_100": "#F8F3E6",
	"parchment_200": "#ECE5D2",
	"parchment_300": "#DDD3B9",
	"parchment_400": "#C2B695",
	"gilt_50": "#FCEEDA",
	"gilt_100": "#F8D9A4",
	"gilt_200": "#F2BD72",
	"gilt_300": "#EAA046",
	"gilt_400": "#DC8B2A",
	"gilt_500": "#A66718",
	"gilt_600": "#7A4912",
	"signal_100": "#C9DDEB",
	"signal_200": "#9BBFD9",
	"signal_300": "#6FA0C2",
	"signal_400": "#4A82AA",
	"signal_500": "#36678C",
	"signal_600": "#244D6B",
	"threat_400": "#D44A3E",
	"threat_300": "#E07065",
	"threat_100": "#F8DAD5",
	"safe_400": "#3F8F6E",
	"safe_300": "#66AB8C",
	"safe_100": "#D4E8DD",
	}

	# Category colors stay within the brand families — no neon, no inventions.
	CATEGORY_COLORS = {
	"benign": AB["safe_400"],
	"direct_injection": AB["threat_400"],
	"jailbreak": AB["gilt_400"],
	"system_extraction": AB["gilt_600"],
	"encoding_obfuscation": AB["signal_500"],
	"persona_replacement": AB["gilt_300"],
	"indirect_injection": AB["threat_300"],
	"token_smuggling": AB["signal_600"],
	"many_shot": AB["signal_400"],
	"crescendo": AB["signal_200"],
	"context_overflow": AB["ink_600"],
	"prompt_leaking": AB["gilt_500"],
	"unknown": AB["ink_400"],
	}

	CATEGORY_LABELS = {
	"benign": "Benign",
	"direct_injection": "Direct Injection",
	"jailbreak": "Jailbreak",
	"system_extraction": "System Extraction",
	"encoding_obfuscation": "Encoding / Obfuscation",
	"persona_replacement": "Persona Replacement",
	"indirect_injection": "Indirect Injection",
	"token_smuggling": "Token Smuggling",
	"many_shot": "Many-Shot",
	"crescendo": "Crescendo",
	"context_overflow": "Context Overflow",
	"prompt_leaking": "Prompt Leaking",
	"unknown": "Unknown",
	}
	LABEL_TO_KEY = {v: k for k, v in CATEGORY_LABELS.items()}

	# ---------------------------------------------------------------------------
	# Lazy-loaded risk classifier (Llama Prompt Guard 2)
	# ---------------------------------------------------------------------------
	MODEL_ID = "meta-llama/Llama-Prompt-Guard-2-86M"
	LABELS = ["Benign", "Malicious"]
	HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
	_classifier = {"tokenizer": None, "model": None, "device": None}


	def get_classifier():
	if _classifier["model"] is None:
	logger.info("Lazy-loading Llama Prompt Guard 2...")
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	kwargs = {"token": HF_TOKEN} if HF_TOKEN else {}
	tok = AutoTokenizer.from_pretrained(MODEL_ID, **kwargs)
	mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, **kwargs)
	mdl.eval()
	dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	mdl.to(dev)
	_classifier["tokenizer"] = tok
	_classifier["model"] = mdl
	_classifier["device"] = dev
	logger.info("Classifier loaded on %s", dev)
	return _classifier["tokenizer"], _classifier["model"], _classifier["device"]


	# ---------------------------------------------------------------------------
	# Load precomputed t-SNE data
	# ---------------------------------------------------------------------------
	CACHE_DIR = Path(__file__).parent / "cache"
	CACHE_FILE = CACHE_DIR / "embeddings_tsne.npz"
	META_FILE = CACHE_DIR / "metadata.json"

	logger.info("Loading precomputed t-SNE cache from %s", CACHE_DIR)
	if not CACHE_FILE.exists() or not META_FILE.exists():
	raise RuntimeError(
	"Cache files not found in %s. Run precompute.py first." % CACHE_DIR
	)

	_npz = np.load(CACHE_FILE)
	TSNE_COORDS = _npz["tsne_2d"]
	with open(META_FILE, "r", encoding="utf-8") as f:
	METADATA = json.load(f)
	logger.info("Loaded %d points for visualization", len(METADATA))

	ALL_TEXTS = [m["text"] for m in METADATA]
	ALL_CATEGORIES = [m["category"] for m in METADATA]
	ALL_SEVERITIES = [m["severity"] for m in METADATA]
	ALL_LABELS_DS = [m["label"] for m in METADATA]
	UNIQUE_CATEGORIES = sorted(set(ALL_CATEGORIES))

	DROPDOWN_CHOICES = []
	for i, m in enumerate(METADATA):
	preview = m["text"][:70].replace("\n", " ")
	if len(m["text"]) > 70:
	preview += "..."
	DROPDOWN_CHOICES.append(f"{i} \| {m['category']} \| {preview}")


	# ---------------------------------------------------------------------------
	# Analysis function
	# ---------------------------------------------------------------------------
	def analyze_prompt(text):
	if not text or not text.strip():
	return {}, 0.0
	tokenizer, model, DEVICE = get_classifier()
	inputs = tokenizer(
	text, return_tensors="pt", truncation=True, max_length=512, padding=True
	).to(DEVICE)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy()
	prob_dict = {LABELS[i]: float(probs[i]) for i in range(len(LABELS))}
	safety = float(probs[0])
	return prob_dict, safety


	# ---------------------------------------------------------------------------
	# Plotly figure — parchment surface, ink axes, restrained palette
	# ---------------------------------------------------------------------------
	def build_tsne_figure(selected_categories=None):
	fig = go.Figure()
	for cat in UNIQUE_CATEGORIES:
	indices = [
	i for i, c in enumerate(ALL_CATEGORIES)
	if c == cat
	and (selected_categories is None or cat in selected_categories)
	]
	if not indices:
	continue
	x = TSNE_COORDS[indices, 0].tolist()
	y = TSNE_COORDS[indices, 1].tolist()
	texts_preview = [
	ALL_TEXTS[i][:80].replace("\n", " ") + ("..." if len(ALL_TEXTS[i]) > 80 else "")
	for i in indices
	]
	severities = [ALL_SEVERITIES[i] or "benign" for i in indices]
	hover_texts = [
	f"<b>{CATEGORY_LABELS.get(cat, cat)}</b><br>"
	f"Severity — {sev}<br>"
	f"Index — {idx}<br>"
	f"<i>{txt}</i>"
	for idx, txt, sev in zip(indices, texts_preview, severities)
	]
	color = CATEGORY_COLORS.get(cat, CATEGORY_COLORS["unknown"])
	label = CATEGORY_LABELS.get(cat, cat)
	fig.add_trace(go.Scatter(
	x=x, y=y,
	mode="markers",
	name=label,
	marker=dict(
	size=5 if len(indices) > 500 else 7,
	color=color,
	opacity=0.78,
	line=dict(width=0.5, color="rgba(17,32,58,0.20)"),
	),
	text=hover_texts,
	hoverinfo="text",
	customdata=[str(i) for i in indices],
	))
	fig.update_layout(
	template="plotly_white",
	paper_bgcolor=AB["parchment_100"],
	plot_bgcolor=AB["parchment_50"],
	font=dict(family="Geist, Inter, system-ui, sans-serif", color=AB["ink_700"]),
	title=dict(
	text="<span style='font-family: Instrument Serif, serif; font-size:18px;'>"
	"t-SNE — Prompt Security Landscape</span>",
	font=dict(color=AB["ink_900"]),
	x=0.5,
	xanchor="center",
	),
	legend=dict(
	title=dict(text="Category", font=dict(color=AB["ink_700"], size=11)),
	bgcolor="rgba(252,250,242,0.88)",
	bordercolor="rgba(17,32,58,0.12)",
	borderwidth=1,
	font=dict(color=AB["ink_800"], size=10),
	itemsizing="constant",
	itemclick="toggleothers",
	itemdoubleclick="toggle",
	),
	xaxis=dict(
	title=dict(text="t-SNE 1", font=dict(color=AB["ink_500"], size=11)),
	showgrid=True,
	gridcolor="rgba(17,32,58,0.06)",
	zeroline=False,
	color=AB["ink_500"],
	),
	yaxis=dict(
	title=dict(text="t-SNE 2", font=dict(color=AB["ink_500"], size=11)),
	showgrid=True,
	gridcolor="rgba(17,32,58,0.06)",
	zeroline=False,
	color=AB["ink_500"],
	),
	margin=dict(l=44, r=44, t=56, b=44),
	height=620,
	dragmode="pan",
	hoverlabel=dict(
	bgcolor=AB["parchment_50"],
	bordercolor="rgba(17,32,58,0.12)",
	font=dict(family="Geist, sans-serif", color=AB["ink_900"], size=12),
	),
	)
	return fig


	# ---------------------------------------------------------------------------
	# Callbacks
	# ---------------------------------------------------------------------------
	def on_filter_change(categories):
	sel = categories if categories else None
	return build_tsne_figure(sel)


	def select_all_categories():
	return gr.update(value=UNIQUE_CATEGORIES), build_tsne_figure(UNIQUE_CATEGORIES)


	def deselect_all_categories():
	return gr.update(value=[]), build_tsne_figure([])


	def on_legend_sync(payload):
	"""Plotly legend click → sync the checkbox filter + rebuild the chart."""
	if not payload or not payload.strip():
	return gr.update(), gr.update()
	try:
	data = json.loads(payload)
	visible_labels = data.get("visible", [])
	visible_keys = [LABEL_TO_KEY.get(lbl, lbl) for lbl in visible_labels]
	visible_keys = [k for k in visible_keys if k in UNIQUE_CATEGORIES]
	if not visible_keys:
	return gr.update(value=[]), build_tsne_figure([])
	return gr.update(value=visible_keys), build_tsne_figure(visible_keys)
	except Exception as e:
	logger.error("legend sync error: %s", e)
	return gr.update(), gr.update()


	def _dataset_meta_block(category, severity, ground_truth):
	return (
	f"\n\n<span class='ab-eyebrow'>Dataset metadata</span>\n"
	f"- Category — {CATEGORY_LABELS.get(category, category)}\n"
	f"- Severity — {severity}\n"
	f"- Ground truth — {ground_truth}\n"
	)


	def on_dropdown_select(choice):
	if not choice:
	return empty_analysis_html(), "Select a prompt to begin.", ""
	try:
	idx = int(choice.split(" \| ")[0])
	text = ALL_TEXTS[idx]
	category = ALL_CATEGORIES[idx]
	severity = ALL_SEVERITIES[idx] or "N/A"
	ground_truth = "Malicious" if ALL_LABELS_DS[idx] == 1 else "Benign"
	prob_dict, _ = analyze_prompt(text)
	pred_label = max(prob_dict, key=prob_dict.get)
	confidence = prob_dict[pred_label]
	result_html = build_result_html(pred_label, confidence, prob_dict, text)
	risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
	risk_text += _dataset_meta_block(category, severity, ground_truth)
	return result_html, risk_text, text
	except Exception as e:
	logger.error("Error: %s", e)
	return empty_analysis_html(), f"Error — {e}", ""


	def on_index_input(idx_str):
	if not idx_str or not idx_str.strip():
	return empty_analysis_html(), "Click a point on the chart.", ""
	try:
	idx = int(idx_str.strip())
	if idx < 0 or idx >= len(ALL_TEXTS):
	return empty_analysis_html(), f"Invalid index — {idx}", ""
	text = ALL_TEXTS[idx]
	category = ALL_CATEGORIES[idx]
	severity = ALL_SEVERITIES[idx] or "N/A"
	ground_truth = "Malicious" if ALL_LABELS_DS[idx] == 1 else "Benign"
	prob_dict, _ = analyze_prompt(text)
	pred_label = max(prob_dict, key=prob_dict.get)
	confidence = prob_dict[pred_label]
	result_html = build_result_html(pred_label, confidence, prob_dict, text)
	risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
	risk_text += _dataset_meta_block(category, severity, ground_truth)
	return result_html, risk_text, text
	except Exception as e:
	logger.error("Error: %s", e)
	return empty_analysis_html(), f"Error — {e}", ""


	def on_manual_analyze(text):
	if not text or not text.strip():
	return empty_analysis_html(), ""
	prob_dict, _ = analyze_prompt(text)
	pred_label = max(prob_dict, key=prob_dict.get)
	confidence = prob_dict[pred_label]
	result_html = build_result_html(pred_label, confidence, prob_dict, text)
	risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
	return result_html, risk_text


	# ---------------------------------------------------------------------------
	# UI builders — editorial, parchment surface, ink type, no emoji
	# ---------------------------------------------------------------------------
	def empty_analysis_html():
	return f"""
	<div class="ab-card ab-card--quiet">
	<div class="ab-eyebrow">Idle</div>
	<p class="ab-prose">
	Click a point on the chart, pick a prompt from the list,
	or paste your own below. The classifier runs on demand.
	</p>
	</div>
	"""


	def build_result_html(label, confidence, probs, text):
	is_safe = label == "Benign"
	accent = AB["safe_400"] if is_safe else AB["threat_400"]
	marker = "●" # geometric primitive instead of emoji
	pct = confidence * 100
	safety_score = probs["Benign"] * 100
	safety_color = (
	AB["safe_400"] if safety_score >= 70
	else AB["gilt_400"] if safety_score >= 40
	else AB["threat_400"]
	)

	bars_html = ""
	for lbl in LABELS:
	p = probs[lbl] * 100
	c = AB["safe_400"] if lbl == "Benign" else AB["threat_400"]
	bars_html += f"""
	<div class="ab-bar">
	<div class="ab-bar__row">
	<span class="ab-bar__label">{lbl}</span>
	<span class="ab-bar__value">{p:.1f}%</span>
	</div>
	<div class="ab-bar__track">
	<div class="ab-bar__fill" style="width:{p}%; background:{c};"></div>
	</div>
	</div>
	"""

	preview = text[:180].replace("<", "<").replace(">", ">")
	if len(text) > 180:
	preview += "…"

	return f"""
	<div class="ab-card">
	<div class="ab-result__head">
	<span class="ab-result__marker" style="color:{accent};">{marker}</span>
	<div>
	<div class="ab-eyebrow">Verdict</div>
	<div class="ab-result__label" style="color:{accent};">{label}</div>
	<div class="ab-caption">Confidence — {pct:.1f}%</div>
	</div>
	</div>

	<div class="ab-divider"></div>

	<div class="ab-eyebrow">Safety score</div>
	<div class="ab-score">
	<div class="ab-score__value" style="color:{safety_color};">{safety_score:.0f}<span>/100</span></div>
	<div class="ab-score__track">
	<div class="ab-score__fill" style="width:{safety_score}%;"></div>
	</div>
	</div>

	<div class="ab-eyebrow" style="margin-top:18px;">Class probabilities</div>
	<div class="ab-bars">{bars_html}</div>

	<div class="ab-quote">
	<div class="ab-eyebrow">Analyzed prompt</div>
	<blockquote>“{preview}”</blockquote>
	</div>
	</div>
	"""


	def build_risk_assessment(label, confidence, probs):
	safety_score = probs["Benign"] * 100
	malicious_score = probs["Malicious"] * 100
	if label == "Benign" and confidence > 0.85:
	level = "Low"
	desc = "The request appears safe. No injection or jailbreak patterns were detected."
	elif label == "Benign":
	level = "Moderate"
	desc = "Likely benign, with moderate confidence. The wording may be ambiguous."
	elif confidence > 0.85:
	level = "Critical"
	desc = "Malicious request detected with high confidence. Likely injection or jailbreak."
	else:
	level = "High"
	desc = "Malicious request detected. Possible injection or jailbreak — review recommended."
	return (
	f"<span class='ab-eyebrow'>Risk level — {level}</span>\n\n{desc}\n\n"
	f"- Safety score — {safety_score:.0f}/100\n"
	f"- Predicted class — {label} ({confidence*100:.1f}%)\n"
	f"- P(Benign) — {probs['Benign']*100:.1f}%  ·  P(Malicious) — {malicious_score:.1f}%\n"
	)


	def build_stats_html():
	total = len(METADATA)
	n_benign = sum(1 for m in METADATA if m["label"] == 0)
	n_malicious = total - n_benign
	cat_counts = {}
	for m in METADATA:
	cat_counts[m["category"]] = cat_counts.get(m["category"], 0) + 1
	cats_html = ""
	for cat in sorted(cat_counts.keys(), key=lambda c: -cat_counts[c]):
	count = cat_counts[cat]
	color = CATEGORY_COLORS.get(cat, CATEGORY_COLORS["unknown"])
	pct = count / total * 100
	label = CATEGORY_LABELS.get(cat, cat)
	cats_html += (
	f'<div class="ab-stats__row">'
	f'<span class="ab-stats__dot" style="background:{color};"></span>'
	f'<span class="ab-stats__name">{label}</span>'
	f'<span class="ab-stats__count">{count:,} <em>({pct:.1f}%)</em></span>'
	f'</div>'
	)
	return f"""
	<div class="ab-card">
	<div class="ab-eyebrow">Dataset</div>
	<h3 class="ab-h3">Composition</h3>
	<div class="ab-kpi-row">
	<div class="ab-kpi">
	<div class="ab-kpi__label">Total</div>
	<div class="ab-kpi__value">{total:,}</div>
	</div>
	<div class="ab-kpi">
	<div class="ab-kpi__label" style="color:{AB['safe_400']};">Benign</div>
	<div class="ab-kpi__value" style="color:{AB['safe_400']};">{n_benign:,}</div>
	</div>
	<div class="ab-kpi">
	<div class="ab-kpi__label" style="color:{AB['threat_400']};">Malicious</div>
	<div class="ab-kpi__value" style="color:{AB['threat_400']};">{n_malicious:,}</div>
	</div>
	</div>
	<div class="ab-stats">{cats_html}</div>
	</div>
	"""


	# ---------------------------------------------------------------------------
	# JavaScript bridge: Plotly clicks → Gradio hidden input
	# ---------------------------------------------------------------------------
	PLOTLY_CLICK_JS = """
	() => {
	function pushToHidden(selector, value) {
	const el = document.querySelector(selector + ' textarea')
	\|\| document.querySelector(selector + ' input');
	if (!el) return;
	const proto = el.tagName === 'TEXTAREA'
	? window.HTMLTextAreaElement.prototype
	: window.HTMLInputElement.prototype;
	const setter = Object.getOwnPropertyDescriptor(proto, 'value').set;
	setter.call(el, String(value));
	el.dispatchEvent(new Event('input', { bubbles: true }));
	setTimeout(() => el.dispatchEvent(new Event('change', { bubbles: true })), 40);
	}

	function attachHandlers(plotEl) {
	if (!plotEl \|\| plotEl._abHandlersAttached) return;
	plotEl._abHandlersAttached = true;

	// Point click → push index to #click-index-input
	plotEl.on('plotly_click', function (data) {
	if (data && data.points && data.points.length > 0) {
	const idx = data.points[0].customdata;
	if (idx !== undefined && idx !== null) {
	pushToHidden('#click-index-input', idx);
	}
	}
	});

	// Legend click → after toggleothers settles, read visible trace names
	// and push them to #legend-sync-input as JSON {visible: [...]}.
	plotEl.on('plotly_legendclick', function (ed) {
	setTimeout(() => {
	const visible = (plotEl.data \|\| [])
	.filter(t => t.visible === undefined \|\| t.visible === true)
	.map(t => t.name);
	pushToHidden('#legend-sync-input', JSON.stringify({visible: visible}));
	}, 60);
	return true; // allow Plotly to process its default toggleothers
	});

	plotEl.on('plotly_legenddoubleclick', function (ed) {
	setTimeout(() => {
	const visible = (plotEl.data \|\| [])
	.filter(t => t.visible === undefined \|\| t.visible === true)
	.map(t => t.name);
	pushToHidden('#legend-sync-input', JSON.stringify({visible: visible}));
	}, 60);
	return true;
	});
	}

	function setup() {
	const plotEl = document.querySelector('#tsne-chart .js-plotly-plot');
	if (!plotEl) { setTimeout(setup, 500); return; }
	attachHandlers(plotEl);
	const root = document.querySelector('#tsne-chart') \|\| document.body;
	const observer = new MutationObserver(() => {
	const newPlot = document.querySelector('#tsne-chart .js-plotly-plot');
	if (newPlot) attachHandlers(newPlot);
	});
	observer.observe(root, { childList: true, subtree: true });
	}
	setTimeout(setup, 1000);
	}
	"""


	# ---------------------------------------------------------------------------
	# Aleph Beth — global CSS
	# ---------------------------------------------------------------------------
	ALEPH_BETH_CSS = """
	@import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@0;1&family=Geist:wght@300;400;500;600;700&family=Geist+Mono:wght@400;500;600&family=Frank+Ruhl+Libre:wght@400;500&family=Amiri:wght@400;700&display=swap');

	:root, .gradio-container {
	--ab-ink-950:#0B1626; --ab-ink-900:#11203A; --ab-ink-800:#1B2F4E;
	--ab-ink-700:#2A4566; --ab-ink-600:#44607F; --ab-ink-500:#6B829D;
	--ab-ink-400:#95A6BB; --ab-ink-300:#BCC8D6; --ab-ink-200:#DAE1EA;
	--ab-ink-100:#ECF0F5; --ab-ink-50:#F6F8FB;
	--ab-parchment-50:#FCFAF2; --ab-parchment-100:#F8F3E6;
	--ab-parchment-200:#ECE5D2; --ab-parchment-300:#DDD3B9;
	--ab-gilt-300:#EAA046; --ab-gilt-400:#DC8B2A; --ab-gilt-500:#A66718; --ab-gilt-600:#7A4912;
	--ab-signal-300:#6FA0C2; --ab-signal-400:#4A82AA; --ab-signal-500:#36678C;
	--ab-threat-400:#D44A3E; --ab-safe-400:#3F8F6E;
	--ab-border: rgba(17,32,58,0.12);
	--ab-border-subtle: rgba(17,32,58,0.06);
	--ab-shadow-sm: 0 2px 6px rgba(17,32,58,0.07), 0 1px 2px rgba(17,32,58,0.04);
	--ab-shadow-md: 0 8px 20px rgba(17,32,58,0.08), 0 2px 4px rgba(17,32,58,0.05);
	--ab-ease: cubic-bezier(0.16, 1, 0.3, 1);
	--font-display: 'Instrument Serif', 'Cormorant Garamond', serif;
	--font-body: 'Geist', 'Inter', system-ui, sans-serif;
	--font-mono: 'Geist Mono', 'JetBrains Mono', ui-monospace, monospace;
	}

	/* ---------- Base canvas ---------- */
	.gradio-container, body, html {
	background: var(--ab-parchment-100) !important;
	color: var(--ab-ink-900) !important;
	font-family: var(--font-body) !important;
	font-feature-settings: 'ss01', 'cv01';
	}
	.gradio-container { max-width: 1440px !important; margin: 0 auto !important; padding: 24px 32px !important; }

	/* Remove Gradio gradient backgrounds */
	.gradio-container ::before, .gradio-container ::after { background-image: none !important; }

	/* ---------- Header / brand ---------- */
	.ab-header {
	padding: 18px 4px 22px;
	border-bottom: 1px solid var(--ab-border);
	margin-bottom: 24px;
	display: flex; align-items: baseline; justify-content: space-between; gap: 24px;
	flex-wrap: wrap;
	}
	.ab-header__brand {
	display: flex; align-items: baseline; gap: 14px;
	}
	.ab-header__mark {
	font-family: var(--font-display);
	font-size: 32px; line-height: 1;
	color: var(--ab-gilt-500);
	letter-spacing: -0.01em;
	}
	.ab-header__mark .heb { font-family: 'Frank Ruhl Libre', serif; }
	.ab-header__mark .ar { font-family: 'Amiri', serif; }
	.ab-header__title {
	font-family: var(--font-display);
	font-size: 38px; line-height: 1.05;
	color: var(--ab-ink-900);
	letter-spacing: -0.01em;
	margin: 0;
	}
	.ab-header__title em { font-style: italic; color: var(--ab-gilt-600); }
	.ab-header__sub {
	font-family: var(--font-body);
	color: var(--ab-ink-700);
	font-size: 14px; line-height: 1.5;
	max-width: 460px;
	}
	.ab-header__sub a { color: var(--ab-signal-500); text-decoration: underline; text-underline-offset: 3px; }

	/* ---------- Eyebrow / labels / type ---------- */
	.ab-eyebrow {
	display: inline-block;
	font-family: var(--font-body);
	font-size: 11px; font-weight: 500;
	text-transform: uppercase;
	letter-spacing: 0.16em;
	color: var(--ab-gilt-600);
	margin-bottom: 6px;
	}
	.ab-h3 {
	font-family: var(--font-display);
	font-size: 22px; line-height: 1.2;
	color: var(--ab-ink-900);
	margin: 0 0 12px 0;
	letter-spacing: -0.005em;
	}
	.ab-prose {
	font-family: var(--font-body);
	font-size: 14px; line-height: 1.55;
	color: var(--ab-ink-700);
	}
	.ab-caption {
	font-family: var(--font-body);
	font-size: 12px;
	color: var(--ab-ink-500);
	letter-spacing: 0.02em;
	}
	.ab-divider {
	height: 1px; background: var(--ab-border);
	margin: 16px 0;
	}

	/* ---------- Cards ---------- */
	.ab-card {
	background: var(--ab-parchment-50);
	border: 1px solid var(--ab-border);
	border-radius: 12px;
	padding: 20px 22px;
	box-shadow: var(--ab-shadow-sm);
	font-family: var(--font-body);
	}
	.ab-card--quiet {
	background: transparent;
	border-style: dashed;
	box-shadow: none;
	}

	/* ---------- How-to (3-up) ---------- */
	.ab-howto {
	display: grid;
	grid-template-columns: repeat(3, 1fr);
	gap: 12px;
	margin: 8px 0 20px;
	}
	@media (max-width: 900px) { .ab-howto { grid-template-columns: 1fr; } }
	.ab-howto__step {
	background: var(--ab-parchment-50);
	border: 1px solid var(--ab-border);
	border-radius: 12px;
	padding: 16px 18px;
	transition: transform var(--ab-ease) 220ms, box-shadow var(--ab-ease) 220ms;
	}
	.ab-howto__step:hover { transform: translateY(-1px); box-shadow: var(--ab-shadow-md); }
	.ab-howto__num {
	font-family: var(--font-display);
	font-size: 28px;
	color: var(--ab-gilt-500);
	line-height: 1;
	}
	.ab-howto__title {
	font-family: var(--font-body);
	font-size: 14px; font-weight: 600;
	color: var(--ab-ink-900);
	margin: 8px 0 6px;
	}
	.ab-howto__body {
	font-family: var(--font-body);
	font-size: 13px; line-height: 1.5;
	color: var(--ab-ink-700);
	}

	/* ---------- Result card ---------- */
	.ab-result__head {
	display: flex; align-items: center; gap: 14px;
	}
	.ab-result__marker {
	font-size: 28px; line-height: 1;
	}
	.ab-result__label {
	font-family: var(--font-display);
	font-size: 28px;
	line-height: 1.1;
	letter-spacing: -0.01em;
	margin-top: 2px;
	}
	.ab-score {
	display: flex; align-items: center; gap: 14px;
	margin: 6px 0 4px;
	}
	.ab-score__value {
	font-family: var(--font-display);
	font-size: 44px; line-height: 1;
	letter-spacing: -0.02em;
	}
	.ab-score__value span { font-size: 16px; color: var(--ab-ink-500); margin-left: 2px; }
	.ab-score__track {
	flex: 1; height: 8px;
	background: var(--ab-parchment-200);
	border-radius: 999px; overflow: hidden;
	}
	.ab-score__fill {
	height: 100%;
	background: linear-gradient(90deg, var(--ab-threat-400), var(--ab-gilt-400) 50%, var(--ab-safe-400));
	border-radius: 999px;
	transition: width 380ms var(--ab-ease);
	}
	.ab-bars { display: flex; flex-direction: column; gap: 10px; margin-top: 4px; }
	.ab-bar__row {
	display: flex; justify-content: space-between;
	font-size: 13px; margin-bottom: 4px;
	}
	.ab-bar__label { color: var(--ab-ink-800); font-weight: 500; }
	.ab-bar__value { color: var(--ab-ink-700); font-family: var(--font-mono); font-size: 12px; }
	.ab-bar__track {
	height: 8px; background: var(--ab-parchment-200);
	border-radius: 999px; overflow: hidden;
	}
	.ab-bar__fill { height: 100%; border-radius: 999px; transition: width 380ms var(--ab-ease); }
	.ab-quote {
	margin-top: 18px;
	padding: 14px 16px;
	background: var(--ab-parchment-100);
	border-left: 2px solid var(--ab-gilt-400);
	border-radius: 4px;
	}
	.ab-quote blockquote {
	font-family: var(--font-display);
	font-style: italic;
	font-size: 16px;
	color: var(--ab-ink-800);
	margin: 6px 0 0; padding: 0;
	line-height: 1.45;
	}

	/* ---------- Stats ---------- */
	.ab-kpi-row {
	display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px;
	margin: 4px 0 16px;
	}
	.ab-kpi {
	background: var(--ab-parchment-100);
	border: 1px solid var(--ab-border-subtle);
	border-radius: 8px;
	padding: 10px 12px;
	text-align: center;
	}
	.ab-kpi__label {
	font-family: var(--font-body);
	font-size: 11px; text-transform: uppercase; letter-spacing: 0.12em;
	color: var(--ab-ink-500);
	margin-bottom: 4px;
	}
	.ab-kpi__value {
	font-family: var(--font-display);
	font-size: 26px; line-height: 1;
	color: var(--ab-ink-900);
	letter-spacing: -0.01em;
	}
	.ab-stats { display: flex; flex-direction: column; }
	.ab-stats__row {
	display: flex; align-items: center; gap: 10px;
	padding: 6px 0;
	border-bottom: 1px solid var(--ab-border-subtle);
	font-size: 13px;
	}
	.ab-stats__row:last-child { border-bottom: 0; }
	.ab-stats__dot { width: 8px; height: 8px; border-radius: 999px; flex-shrink: 0; }
	.ab-stats__name { color: var(--ab-ink-800); flex: 1; }
	.ab-stats__count { color: var(--ab-ink-600); font-family: var(--font-mono); font-size: 12px; }
	.ab-stats__count em { color: var(--ab-ink-500); font-style: normal; }

	/* ---------- Gradio component overrides ---------- */
	.gradio-container .block, .gradio-container .form, .gradio-container .panel {
	background: transparent !important;
	border: none !important;
	}
	.gradio-container .gr-box, .gradio-container .gr-panel,
	.gradio-container .gr-form, .gradio-container [data-testid="block"] {
	background: transparent !important;
	border: none !important;
	box-shadow: none !important;
	}

	/* Plot wrapper — paper card */
	#tsne-chart {
	background: var(--ab-parchment-50) !important;
	border: 1px solid var(--ab-border) !important;
	border-radius: 12px !important;
	padding: 8px !important;
	box-shadow: var(--ab-shadow-sm) !important;
	}

	/* Buttons */
	.gradio-container button {
	font-family: var(--font-body) !important;
	font-weight: 500 !important;
	letter-spacing: 0 !important;
	border-radius: 8px !important;
	transition: transform 80ms var(--ab-ease), background-color 220ms var(--ab-ease) !important;
	}
	.gradio-container button:active { transform: scale(0.98) !important; }
	.gradio-container button.primary, .gradio-container button[variant="primary"] {
	background: var(--ab-ink-900) !important;
	color: var(--ab-parchment-50) !important;
	border: 1px solid var(--ab-ink-900) !important;
	}
	.gradio-container button.primary:hover {
	background: var(--ab-ink-800) !important;
	}
	.gradio-container button.secondary {
	background: var(--ab-parchment-50) !important;
	color: var(--ab-ink-900) !important;
	border: 1px solid var(--ab-border) !important;
	}
	.gradio-container button.secondary:hover {
	background: var(--ab-parchment-200) !important;
	}

	/* Text inputs / textareas */
	.gradio-container input[type="text"],
	.gradio-container textarea,
	.gradio-container .gr-input,
	.gradio-container .gr-textbox textarea {
	background: var(--ab-parchment-50) !important;
	color: var(--ab-ink-900) !important;
	border: 1px solid var(--ab-border) !important;
	border-radius: 8px !important;
	font-family: var(--font-body) !important;
	font-size: 14px !important;
	box-shadow: inset 0 1px 2px rgba(17,32,58,0.04);
	}
	.gradio-container input[type="text"]:focus,
	.gradio-container textarea:focus,
	.gradio-container .gr-textbox textarea:focus {
	outline: none !important;
	border-color: var(--ab-gilt-400) !important;
	box-shadow: 0 0 0 3px rgba(220,139,42,0.18) !important;
	}

	/* Labels */
	.gradio-container label, .gradio-container .label-wrap {
	color: var(--ab-ink-700) !important;
	font-family: var(--font-body) !important;
	font-size: 13px !important;
	font-weight: 500 !important;
	letter-spacing: 0.01em !important;
	}

	/* Dropdowns */
	.gradio-container .gr-dropdown, .gradio-container [data-testid="dropdown"] select,
	.gradio-container .wrap.svelte-1cl284s {
	background: var(--ab-parchment-50) !important;
	border: 1px solid var(--ab-border) !important;
	border-radius: 8px !important;
	color: var(--ab-ink-900) !important;
	}

	/* Checkbox group filter */
	.gradio-container .gr-check-radio,
	.gradio-container fieldset[data-testid="checkbox-group"] {
	background: var(--ab-parchment-50) !important;
	border: 1px solid var(--ab-border) !important;
	border-radius: 12px !important;
	padding: 12px 14px !important;
	}
	.gradio-container fieldset[data-testid="checkbox-group"] label {
	background: var(--ab-parchment-100) !important;
	border: 1px solid var(--ab-border-subtle) !important;
	border-radius: 999px !important;
	padding: 4px 10px !important;
	margin: 3px !important;
	font-size: 12px !important;
	}
	.gradio-container fieldset[data-testid="checkbox-group"] label:hover {
	background: var(--ab-parchment-200) !important;
	}
	.gradio-container input[type="checkbox"]:checked + * {
	color: var(--ab-ink-900) !important;
	}
	.gradio-container input[type="checkbox"] {
	accent-color: var(--ab-gilt-400) !important;
	}

	/* Markdown */
	.gradio-container .markdown, .gradio-container .prose {
	color: var(--ab-ink-800) !important;
	font-family: var(--font-body) !important;
	}
	.gradio-container .markdown h1, .gradio-container .markdown h2,
	.gradio-container .prose h1, .gradio-container .prose h2 {
	font-family: var(--font-display) !important;
	color: var(--ab-ink-900) !important;
	font-weight: 400 !important;
	letter-spacing: -0.01em !important;
	}
	.gradio-container .markdown h3, .gradio-container .prose h3 {
	font-family: var(--font-body) !important;
	font-weight: 600 !important;
	color: var(--ab-ink-900) !important;
	font-size: 16px !important;
	margin-bottom: 8px !important;
	}
	.gradio-container .markdown strong { color: var(--ab-ink-900) !important; font-weight: 600 !important; }
	.gradio-container .markdown a { color: var(--ab-signal-500) !important; }
	.gradio-container .markdown hr {
	border: none !important;
	border-top: 1px solid var(--ab-border) !important;
	margin: 18px 0 !important;
	}

	/* Hidden bridges from Plotly DOM → Gradio state */
	#click-index-input, #legend-sync-input {
	position: absolute !important;
	width: 1px !important;
	height: 1px !important;
	overflow: hidden !important;
	opacity: 0 !important;
	pointer-events: none !important;
	}

	/* Footer */
	.ab-footer {
	border-top: 1px solid var(--ab-border);
	margin-top: 36px;
	padding-top: 18px;
	text-align: center;
	}
	.ab-footer__line {
	font-family: var(--font-body);
	color: var(--ab-ink-500);
	font-size: 12px;
	letter-spacing: 0.02em;
	}
	.ab-footer__line a { color: var(--ab-signal-500); }
	.ab-footer__mark {
	font-family: var(--font-display);
	color: var(--ab-gilt-500);
	font-size: 14px;
	letter-spacing: 0.04em;
	margin-bottom: 6px;
	}
	.ab-footer__mark .heb { font-family: 'Frank Ruhl Libre', serif; }
	.ab-footer__mark .ar { font-family: 'Amiri', serif; }
	"""


	# ---------------------------------------------------------------------------
	# Header / How-to / Footer markup
	# ---------------------------------------------------------------------------
	HEADER_HTML = """
	<header class="ab-header">
	<div class="ab-header__brand">
	<div class="ab-header__mark">
	<span class="heb">א-ב</span> · <span class="ar">أب</span>
	</div>
	<div>
	<h1 class="ab-header__title">GuardLLM <em>—</em> Prompt Security Visualizer</h1>
	</div>
	</div>
	<p class="ab-header__sub">
	Editorial inspection of the prompt attack surface. Powered by
	<a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M" target="_blank">Llama Prompt Guard 2 (86M)</a>
	on the <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset" target="_blank">neuralchemy</a> corpus.
	</p>
	</header>
	"""

	HOW_TO_HTML = """
	<div class="ab-howto">
	<div class="ab-howto__step">
	<div class="ab-howto__num">01</div>
	<div class="ab-eyebrow">Map</div>
	<div class="ab-howto__title">Explore the landscape</div>
	<div class="ab-howto__body">
	Each point is a prompt placed by semantic similarity. Color encodes the attack class.
	Hover to preview, scroll to zoom, drag to pan.
	</div>
	</div>
	<div class="ab-howto__step">
	<div class="ab-howto__num">02</div>
	<div class="ab-eyebrow">Inspect</div>
	<div class="ab-howto__title">Click to analyze</div>
	<div class="ab-howto__body">
	Selecting a point runs the classifier and returns a verdict, a safety score,
	and the full class probability breakdown.
	</div>
	</div>
	<div class="ab-howto__step">
	<div class="ab-howto__num">03</div>
	<div class="ab-eyebrow">Probe</div>
	<div class="ab-howto__title">Try your own prompt</div>
	<div class="ab-howto__body">
	Paste any text into the custom field below to see whether the model would flag
	it as injection or jailbreak.
	</div>
	</div>
	</div>
	"""

	FOOTER_HTML = """
	<footer class="ab-footer">
	<div class="ab-footer__mark"><span class="heb">א-ב</span> · ALEPH BETH · <span class="ar">أب</span></div>
	<div class="ab-footer__line">
	GuardLLM — Prompt Security Visualizer.
	Model: <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M">Llama Prompt Guard 2 (86M)</a>.
	Dataset: <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset">neuralchemy / Prompt-injection-dataset</a>.
	</div>
	</footer>
	"""


	# ---------------------------------------------------------------------------
	# Gradio theme (parchment / ink)
	# ---------------------------------------------------------------------------
	ab_theme = gr.themes.Base(
	primary_hue=gr.themes.Color(
	c50=AB["parchment_50"], c100=AB["parchment_100"], c200=AB["parchment_200"],
	c300=AB["parchment_300"], c400=AB["gilt_300"], c500=AB["gilt_400"],
	c600=AB["gilt_500"], c700=AB["gilt_600"], c800=AB["ink_800"],
	c900=AB["ink_900"], c950=AB["ink_950"],
	),
	neutral_hue=gr.themes.Color(
	c50=AB["parchment_50"], c100=AB["parchment_100"], c200=AB["parchment_200"],
	c300=AB["ink_200"], c400=AB["ink_300"], c500=AB["ink_500"],
	c600=AB["ink_600"], c700=AB["ink_700"], c800=AB["ink_800"],
	c900=AB["ink_900"], c950=AB["ink_950"],
	),
	font=[gr.themes.GoogleFont("Geist"), "Inter", "system-ui", "sans-serif"],
	font_mono=[gr.themes.GoogleFont("Geist Mono"), "JetBrains Mono", "monospace"],
	).set(
	body_background_fill=AB["parchment_100"],
	body_text_color=AB["ink_900"],
	background_fill_primary=AB["parchment_50"],
	background_fill_secondary=AB["parchment_100"],
	border_color_primary="rgba(17,32,58,0.12)",
	block_background_fill=AB["parchment_50"],
	block_border_color="rgba(17,32,58,0.12)",
	block_label_text_color=AB["ink_700"],
	block_title_text_color=AB["ink_900"],
	input_background_fill=AB["parchment_50"],
	input_border_color="rgba(17,32,58,0.12)",
	input_border_color_focus=AB["gilt_400"],
	button_primary_background_fill=AB["ink_900"],
	button_primary_background_fill_hover=AB["ink_800"],
	button_primary_text_color=AB["parchment_50"],
	button_secondary_background_fill=AB["parchment_50"],
	button_secondary_background_fill_hover=AB["parchment_200"],
	button_secondary_text_color=AB["ink_900"],
	)


	# ---------------------------------------------------------------------------
	# Gradio Interface
	# ---------------------------------------------------------------------------
	with gr.Blocks(
	title="GuardLLM — Prompt Security Visualizer",
	theme=ab_theme,
	css=ALEPH_BETH_CSS,
	) as demo:

	gr.HTML(HEADER_HTML)
	gr.HTML(HOW_TO_HTML)

	# Hidden bridges from Plotly DOM → Gradio state
	click_index = gr.Textbox(value="", visible=True, elem_id="click-index-input")
	legend_sync = gr.Textbox(value="", visible=True, elem_id="legend-sync-input")

	with gr.Row():
	# ============================================================
	# LEFT — every way to pick a prompt
	# ============================================================
	with gr.Column(scale=3):
	gr.HTML("<div class='ab-eyebrow'>Map</div>"
	"<h3 class='ab-h3'>t-SNE — Prompt landscape</h3>")
	tsne_plot = gr.Plot(
	value=build_tsne_figure(),
	label="t-SNE space",
	elem_id="tsne-chart",
	show_label=False,
	)
	gr.Markdown(
	"<span class='ab-caption'>Click a point to inspect. "
	"Click a legend entry to isolate that category — click again to restore. "
	"Double-click a legend entry to toggle just that trace.</span>"
	)

	gr.HTML("<div class='ab-eyebrow' style='margin-top:18px;'>Filter</div>"
	"<h3 class='ab-h3'>By category</h3>")
	with gr.Row():
	select_all_btn = gr.Button("Select all", size="sm", scale=1)
	deselect_all_btn = gr.Button("Deselect all", size="sm", scale=1)
	category_filter = gr.CheckboxGroup(
	choices=UNIQUE_CATEGORIES,
	value=UNIQUE_CATEGORIES,
	label="Categories",
	show_label=False,
	interactive=True,
	)

	gr.HTML("<div class='ab-eyebrow' style='margin-top:18px;'>Library</div>"
	"<h3 class='ab-h3'>Pick a prompt from the dataset</h3>")
	prompt_dropdown = gr.Dropdown(
	choices=DROPDOWN_CHOICES,
	label="Search the dataset",
	show_label=False,
	filterable=True,
	interactive=True,
	)

	gr.HTML("<div class='ab-eyebrow' style='margin-top:18px;'>Custom</div>"
	"<h3 class='ab-h3'>Analyze your own prompt</h3>")
	manual_input = gr.Textbox(
	label="Prompt",
	show_label=False,
	placeholder="Type or paste a request to evaluate…",
	lines=3,
	)
	analyze_btn = gr.Button("Inspect", variant="primary")

	# ============================================================
	# RIGHT — the analysis only
	# ============================================================
	with gr.Column(scale=2):
	gr.HTML("<div class='ab-eyebrow'>Analysis</div>"
	"<h3 class='ab-h3'>Verdict & confidence</h3>")
	result_html = gr.HTML(value=empty_analysis_html())
	risk_md = gr.Markdown(value="")
	full_prompt = gr.Textbox(
	label="Full prompt",
	lines=4,
	interactive=False,
	visible=True,
	)

	gr.Markdown("---")

	gr.HTML(build_stats_html())

	# ---- Events ----
	category_filter.change(fn=on_filter_change, inputs=[category_filter], outputs=[tsne_plot])
	select_all_btn.click(fn=select_all_categories, inputs=[], outputs=[category_filter, tsne_plot])
	deselect_all_btn.click(fn=deselect_all_categories, inputs=[], outputs=[category_filter, tsne_plot])
	legend_sync.change(fn=on_legend_sync, inputs=[legend_sync],
	outputs=[category_filter, tsne_plot])
	click_index.change(fn=on_index_input, inputs=[click_index],
	outputs=[result_html, risk_md, full_prompt])
	prompt_dropdown.change(fn=on_dropdown_select, inputs=[prompt_dropdown],
	outputs=[result_html, risk_md, full_prompt])
	analyze_btn.click(fn=on_manual_analyze, inputs=[manual_input],
	outputs=[result_html, risk_md])
	manual_input.submit(fn=on_manual_analyze, inputs=[manual_input],
	outputs=[result_html, risk_md])
	demo.load(fn=None, inputs=None, outputs=None, js=PLOTLY_CLICK_JS)

	gr.HTML(FOOTER_HTML)


	logger.info("Gradio app built. Ready to launch.")

	if __name__ == "__main__":
	demo.launch()