Spaces:

arsiba
/

GDPR-EDPB-AI

Runtime error

App Files Files Community

arsiba commited on Apr 26

Commit

21c3afc

1 Parent(s): 2bfc379

feat: add seperate context box

Browse files

Files changed (1) hide show

app.py +12 -14

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ model = AutoModelForCausalLM.from_pretrained(
 SYS = (
     "You are a legal AI assistant specialized in GDPR/EDPB. "
-    "If you cannot find an answer in the context, reply 'I do not know.' "
     "Answer this Question:"
 )
@@ -52,11 +52,9 @@ def retrieve(q, k=3):
     return docs, file_sources
-def make_prompt(q, docs, reasoning_mode):
     context = "\n\n".join(f"Title: {d['title']}\nPages: {d['pages']}" for d in docs)
-    prompt = f"detailed thinking {reasoning_mode}\n"
-    if reasoning_mode == "off":
-        prompt += "eager_mode on\n"
     prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
     return prompt
@@ -69,10 +67,11 @@ def build_markdown_links(file_input):
     return "\n\n".join(lines)
 @spaces.GPU()
-def qa_fn(question, reasoning_mode, top_k, temperature, max_tokens):
     docs, file_sources = retrieve(question, top_k)
     file_links = build_markdown_links(file_sources)
-    prompt = make_prompt(question, docs, reasoning_mode)[:8000]
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
     inputs = {k: v.to(model.device) for k, v in inputs.items()}
     streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
@@ -88,27 +87,26 @@ def qa_fn(question, reasoning_mode, top_k, temperature, max_tokens):
     output = ""
     for tok in streamer:
         output += tok
-    if "</think>" in output:
-        output = output.split("</think>", 1)[1].strip()
-    return output, file_sources, file_links
 outputs_answer = gr.Textbox(label="Answer")
-outputs_sources = gr.JSON(label="Sources (Used Files)")
 outputs_link = gr.Markdown(label="Source Link")
 demo = gr.Interface(
     fn=qa_fn,
     inputs=[
         gr.Textbox(lines=2, label="Your Question"),
-        gr.Radio(["on", "off"], value="off", label="Reasoning Mode"),
         gr.Slider(1, 7, value=4, step=1, label="Top-K Documents"),
         gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
         gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
     ],
-    outputs=[outputs_answer, outputs_sources, outputs_link],
     title="GDPR Legal Assistant",
-    description="Ask any question about GDPR or EDPB documents. The response includes used files and chunks.",
     allow_flagging="never"
 )

 SYS = (
     "You are a legal AI assistant specialized in GDPR/EDPB. "
+    "If you cannot find an answer in the context, it's okay to speculate. But if so, make it clear. Also it is important to recite the context in your Answer if fitting. "
     "Answer this Question:"
 )
     return docs, file_sources
+def make_prompt(q, docs):
     context = "\n\n".join(f"Title: {d['title']}\nPages: {d['pages']}" for d in docs)
+    prompt = f"detailed thinking off\n"
     prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
     return prompt
     return "\n\n".join(lines)
 @spaces.GPU()
+def qa_fn(question, top_k, temperature, max_tokens):
     docs, file_sources = retrieve(question, top_k)
     file_links = build_markdown_links(file_sources)
+    outputs_chunks = docs
+    prompt = make_prompt(question, docs)[:8000]
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
     inputs = {k: v.to(model.device) for k, v in inputs.items()}
     streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
     output = ""
     for tok in streamer:
         output += tok
+    if "Output:" in output:
+        output = output.split("Output:", 1)[1].strip()
+    return output, file_links, outputs_chunks
 outputs_answer = gr.Textbox(label="Answer")
 outputs_link = gr.Markdown(label="Source Link")
+outputs_chunks = gr.Textbox(label="Used Chunks")
 demo = gr.Interface(
     fn=qa_fn,
     inputs=[
         gr.Textbox(lines=2, label="Your Question"),
         gr.Slider(1, 7, value=4, step=1, label="Top-K Documents"),
         gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
         gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
     ],
+    outputs=[outputs_answer, outputs_link, outputs_chunks],
     title="GDPR Legal Assistant",
+    description="Ask any question about GDPR or EDPB documents.",
     allow_flagging="never"
 )