Spaces:
Runtime error
Runtime error
feat: add pdf links
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ with open("vector_db/metadata.pkl", "rb") as f:
|
|
15 |
metadata_dict = pickle.load(f)
|
16 |
|
17 |
ST = SentenceTransformer("BAAI/bge-large-en-v1.5")
|
|
|
18 |
|
19 |
model_id = "nvidia/Llama-3.1-Nemotron-Nano-8B-v1"
|
20 |
bnb = BitsAndBytesConfig(
|
@@ -59,9 +60,18 @@ def make_prompt(q, docs, reasoning_mode):
|
|
59 |
prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
|
60 |
return prompt
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
@spaces.GPU()
|
63 |
def qa_fn(question, reasoning_mode, top_k, temperature, max_tokens):
|
64 |
docs, file_sources = retrieve(question, top_k)
|
|
|
65 |
prompt = make_prompt(question, docs, reasoning_mode)[:8000]
|
66 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
|
67 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
@@ -80,10 +90,12 @@ def qa_fn(question, reasoning_mode, top_k, temperature, max_tokens):
|
|
80 |
output += tok
|
81 |
if "</think>" in output:
|
82 |
output = output.split("</think>", 1)[1].strip()
|
83 |
-
return output, file_sources
|
84 |
|
85 |
outputs_answer = gr.Textbox(label="Answer")
|
86 |
outputs_sources = gr.JSON(label="Sources (Used Files)")
|
|
|
|
|
87 |
|
88 |
demo = gr.Interface(
|
89 |
fn=qa_fn,
|
@@ -94,7 +106,7 @@ demo = gr.Interface(
|
|
94 |
gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
|
95 |
gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
|
96 |
],
|
97 |
-
outputs=[outputs_answer, outputs_sources],
|
98 |
title="GDPR Legal Assistant",
|
99 |
description="Ask any question about GDPR or EDPB documents. The response includes used files and chunks.",
|
100 |
allow_flagging="never"
|
|
|
15 |
metadata_dict = pickle.load(f)
|
16 |
|
17 |
ST = SentenceTransformer("BAAI/bge-large-en-v1.5")
|
18 |
+
github_base_url = "https://github.com/arsiba/EDPB-AI/blob/main/"
|
19 |
|
20 |
model_id = "nvidia/Llama-3.1-Nemotron-Nano-8B-v1"
|
21 |
bnb = BitsAndBytesConfig(
|
|
|
60 |
prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
|
61 |
return prompt
|
62 |
|
63 |
+
def build_markdown_links(file_input):
|
64 |
+
lines = []
|
65 |
+
for idx, item in enumerate(file_input, start=1):
|
66 |
+
url = f"{github_base_url}/{item['directory']}/{item['source']}"
|
67 |
+
line = f"**Source {idx}:** [{item['source']}]({url}) on page {item['page']}"
|
68 |
+
lines.append(line)
|
69 |
+
return "\n\n".join(lines)
|
70 |
+
|
71 |
@spaces.GPU()
|
72 |
def qa_fn(question, reasoning_mode, top_k, temperature, max_tokens):
|
73 |
docs, file_sources = retrieve(question, top_k)
|
74 |
+
file_links = build_markdown_links(file_sources)
|
75 |
prompt = make_prompt(question, docs, reasoning_mode)[:8000]
|
76 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
|
77 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
|
|
90 |
output += tok
|
91 |
if "</think>" in output:
|
92 |
output = output.split("</think>", 1)[1].strip()
|
93 |
+
return output, file_sources, file_links
|
94 |
|
95 |
outputs_answer = gr.Textbox(label="Answer")
|
96 |
outputs_sources = gr.JSON(label="Sources (Used Files)")
|
97 |
+
outputs_link = gr.Markdown(label="Source Link")
|
98 |
+
|
99 |
|
100 |
demo = gr.Interface(
|
101 |
fn=qa_fn,
|
|
|
106 |
gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
|
107 |
gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
|
108 |
],
|
109 |
+
outputs=[outputs_answer, outputs_sources, outputs_link],
|
110 |
title="GDPR Legal Assistant",
|
111 |
description="Ask any question about GDPR or EDPB documents. The response includes used files and chunks.",
|
112 |
allow_flagging="never"
|