Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,250 Bytes
4b5a1de baadb7f a550123 baadb7f 45cea24 4789e1c baadb7f 7568c7e fe562e2 2c2e382 ff30c53 ae88fb1 ff30c53 ae88fb1 a550123 ff30c53 2c2e382 ff30c53 7d022f0 e94e758 5608476 7d022f0 b8a20df 8d1e3a7 7d7514a f0f3243 2c2e382 f6cf353 f0f3243 7d7514a f6cf353 c65b0b0 7d7514a 4b5a1de 2c2e382 21c3afc f6cf353 21c3afc 5608476 f6cf353 0d615a1 2bfc379 fc337a7 b2e555c 2c2e382 b1ac4ea fc337a7 b1ac4ea f0f3243 596e351 2bfc379 b1ac4ea 21c3afc 5608476 7d7514a 7568c7e 5608476 7d7514a 5608476 7568c7e 21c3afc 5608476 45cea24 5608476 b2e555c 5608476 d9ee889 b2e555c 2e62836 b2e555c 2c2e382 4eb4fdb b2e555c 4d9e44c 3f8e148 b2e555c 4d9e44c b314e98 d9ee889 b314e98 d9ee889 b314e98 d9ee889 b314e98 b2e555c 6c7b7ca b2e555c b314e98 b2e555c 2e62836 b2e555c ae88fb1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import spaces
import pickle
import numpy as np
import faiss
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer
from sentence_transformers import SentenceTransformer
import gradio as gr
from threading import Thread
index = faiss.read_index("vector_db/index.faiss")
with open("vector_db/chunks.pkl", "rb") as f:
chunks = pickle.load(f)
with open("vector_db/metadata.pkl", "rb") as f:
metadata_dict = pickle.load(f)
ST = SentenceTransformer("BAAI/bge-large-en-v1.5")
github_base_url = "https://github.com/arsiba/EDPB-AI/blob/main/"
model_id = "HuggingFaceH4/zephyr-7b-beta"
bnb = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=bnb,
device_map={"": 0},
torch_dtype=torch.bfloat16,
trust_remote_code=True
)
SYS = (
"You are a legal AI assistant specialized in GDPR/EDPB."
"If you cannot find an answer in the context, it's okay to speculate. But if so, make it clear."
"Answer this Question:"
)
@spaces.GPU()
def retrieve(q, k=3):
emb = ST.encode(q)
D, I = index.search(np.array([emb], dtype="float32"), k)
docs, file_sources = [], []
for i in I[0]:
chunk = chunks[i]
meta = metadata_dict[i]
docs.append({"title": meta, "pages": chunk})
file_sources.append(meta)
return docs, file_sources
def make_prompt(q, docs):
context = "\n\n".join(f"Title: {d['title']}\nPages: {d['pages']}" for d in docs)
prompt = f"detailed thinking off\n"
prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
return prompt
def build_markdown_links(file_input):
lines = []
for idx, item in enumerate(file_input, start=1):
url = f"{github_base_url}/{item['directory']}/{item['source']}"
line = f"**Source {idx}:** [{item['source']}]({url}) on page {item['page']}"
lines.append(line)
return "\n\n".join(lines)
def build_markdown_chunks(docs):
lines = []
for idx, d in enumerate(docs, start=1):
title = d['title']['source']
page = d['title']['page']
text = d['pages']
lines.append(f"**Chunk {idx}:** {title} on page {page}\n\n{text}")
return "\n\n".join(lines)
@spaces.GPU()
def qa_fn(faiss_search, question, top_k, temperature, max_tokens):
docs, file_sources = retrieve(faiss_search, top_k)
file_links = build_markdown_links(file_sources)
markdown_chunks = build_markdown_chunks(docs)
prompt = make_prompt(question, docs)[:8000]
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
Thread(target=model.generate, kwargs={
**inputs,
"streamer": streamer,
"max_new_tokens": max_tokens,
"do_sample": False,
"temperature": temperature,
"top_p": 0.9,
"eos_token_id": tokenizer.eos_token_id
}).start()
output = ""
for tok in streamer:
output += tok
if "Output:" in output:
output = output.split("Output:", 1)[1].strip()
return "\n# Generated Answer\n", output,"\n# Used Documents\n", file_links, "\n# Used Context\n", markdown_chunks
heading_answer = gr.Markdown(label="Answer Heading")
outputs_answer = gr.Textbox(label="Answer")
heading_links = gr.Markdown(label="Links Heading")
heading_chunks = gr.Markdown(label="Chunks Heading")
outputs_link = gr.Markdown(label="Source Link")
outputs_chunks = gr.Markdown(label="Used Chunks")
demo = gr.Interface(
fn=qa_fn,
inputs=[
gr.Textbox(lines=4, label="What Documents are you looking for?", placeholder="Please change to get propper results:\nDocuments covering the EDPB’s stance on automated decision-making, particularly profiling, under the GDPR. Guidelines on how organizations should inform data subjects about automated decisions and the rights of individuals to object to such decisions."),
gr.Textbox(lines=1, label="What is your question?", placeholder="Please change to get propper results:\nWhat does the EDPB recommend regarding automated decision-making and profiling under the GDPR, and what rights do individuals have in relation to such decisions?"),
],
additional_inputs=[
gr.Slider(1, 10, value=7, step=1, label="Top-K Documents"),
gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
],
additional_inputs_accordion="Advanced Options",
outputs=[
heading_answer,
outputs_answer,
heading_links,
outputs_link,
heading_chunks,
outputs_chunks
],
title="GDPR Legal Assistant",
description="Ask any question about GDPR or EDPB documents.",
allow_flagging="never",
fill_width=True,
)
if __name__ == "__main__":
demo.launch(share=True) |