Spaces:
Runtime error
Runtime error
feat: add seperate context box
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
36 |
|
37 |
SYS = (
|
38 |
"You are a legal AI assistant specialized in GDPR/EDPB. "
|
39 |
-
"If you cannot find an answer in the context,
|
40 |
"Answer this Question:"
|
41 |
)
|
42 |
|
@@ -52,11 +52,9 @@ def retrieve(q, k=3):
|
|
52 |
return docs, file_sources
|
53 |
|
54 |
|
55 |
-
def make_prompt(q, docs
|
56 |
context = "\n\n".join(f"Title: {d['title']}\nPages: {d['pages']}" for d in docs)
|
57 |
-
prompt = f"detailed thinking
|
58 |
-
if reasoning_mode == "off":
|
59 |
-
prompt += "eager_mode on\n"
|
60 |
prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
|
61 |
return prompt
|
62 |
|
@@ -69,10 +67,11 @@ def build_markdown_links(file_input):
|
|
69 |
return "\n\n".join(lines)
|
70 |
|
71 |
@spaces.GPU()
|
72 |
-
def qa_fn(question,
|
73 |
docs, file_sources = retrieve(question, top_k)
|
74 |
file_links = build_markdown_links(file_sources)
|
75 |
-
|
|
|
76 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
|
77 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
78 |
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
|
@@ -88,27 +87,26 @@ def qa_fn(question, reasoning_mode, top_k, temperature, max_tokens):
|
|
88 |
output = ""
|
89 |
for tok in streamer:
|
90 |
output += tok
|
91 |
-
if "
|
92 |
-
output = output.split("
|
93 |
-
return output,
|
94 |
|
95 |
outputs_answer = gr.Textbox(label="Answer")
|
96 |
-
outputs_sources = gr.JSON(label="Sources (Used Files)")
|
97 |
outputs_link = gr.Markdown(label="Source Link")
|
|
|
98 |
|
99 |
|
100 |
demo = gr.Interface(
|
101 |
fn=qa_fn,
|
102 |
inputs=[
|
103 |
gr.Textbox(lines=2, label="Your Question"),
|
104 |
-
gr.Radio(["on", "off"], value="off", label="Reasoning Mode"),
|
105 |
gr.Slider(1, 7, value=4, step=1, label="Top-K Documents"),
|
106 |
gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
|
107 |
gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
|
108 |
],
|
109 |
-
outputs=[outputs_answer,
|
110 |
title="GDPR Legal Assistant",
|
111 |
-
description="Ask any question about GDPR or EDPB documents.
|
112 |
allow_flagging="never"
|
113 |
)
|
114 |
|
|
|
36 |
|
37 |
SYS = (
|
38 |
"You are a legal AI assistant specialized in GDPR/EDPB. "
|
39 |
+
"If you cannot find an answer in the context, it's okay to speculate. But if so, make it clear. Also it is important to recite the context in your Answer if fitting. "
|
40 |
"Answer this Question:"
|
41 |
)
|
42 |
|
|
|
52 |
return docs, file_sources
|
53 |
|
54 |
|
55 |
+
def make_prompt(q, docs):
|
56 |
context = "\n\n".join(f"Title: {d['title']}\nPages: {d['pages']}" for d in docs)
|
57 |
+
prompt = f"detailed thinking off\n"
|
|
|
|
|
58 |
prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
|
59 |
return prompt
|
60 |
|
|
|
67 |
return "\n\n".join(lines)
|
68 |
|
69 |
@spaces.GPU()
|
70 |
+
def qa_fn(question, top_k, temperature, max_tokens):
|
71 |
docs, file_sources = retrieve(question, top_k)
|
72 |
file_links = build_markdown_links(file_sources)
|
73 |
+
outputs_chunks = docs
|
74 |
+
prompt = make_prompt(question, docs)[:8000]
|
75 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
|
76 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
77 |
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
|
|
|
87 |
output = ""
|
88 |
for tok in streamer:
|
89 |
output += tok
|
90 |
+
if "Output:" in output:
|
91 |
+
output = output.split("Output:", 1)[1].strip()
|
92 |
+
return output, file_links, outputs_chunks
|
93 |
|
94 |
outputs_answer = gr.Textbox(label="Answer")
|
|
|
95 |
outputs_link = gr.Markdown(label="Source Link")
|
96 |
+
outputs_chunks = gr.Textbox(label="Used Chunks")
|
97 |
|
98 |
|
99 |
demo = gr.Interface(
|
100 |
fn=qa_fn,
|
101 |
inputs=[
|
102 |
gr.Textbox(lines=2, label="Your Question"),
|
|
|
103 |
gr.Slider(1, 7, value=4, step=1, label="Top-K Documents"),
|
104 |
gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
|
105 |
gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
|
106 |
],
|
107 |
+
outputs=[outputs_answer, outputs_link, outputs_chunks],
|
108 |
title="GDPR Legal Assistant",
|
109 |
+
description="Ask any question about GDPR or EDPB documents.",
|
110 |
allow_flagging="never"
|
111 |
)
|
112 |
|