arsiba commited on
Commit
21c3afc
Β·
1 Parent(s): 2bfc379

feat: add seperate context box

Browse files
Files changed (1) hide show
  1. app.py +12 -14
app.py CHANGED
@@ -36,7 +36,7 @@ model = AutoModelForCausalLM.from_pretrained(
36
 
37
  SYS = (
38
  "You are a legal AI assistant specialized in GDPR/EDPB. "
39
- "If you cannot find an answer in the context, reply 'I do not know.' "
40
  "Answer this Question:"
41
  )
42
 
@@ -52,11 +52,9 @@ def retrieve(q, k=3):
52
  return docs, file_sources
53
 
54
 
55
- def make_prompt(q, docs, reasoning_mode):
56
  context = "\n\n".join(f"Title: {d['title']}\nPages: {d['pages']}" for d in docs)
57
- prompt = f"detailed thinking {reasoning_mode}\n"
58
- if reasoning_mode == "off":
59
- prompt += "eager_mode on\n"
60
  prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
61
  return prompt
62
 
@@ -69,10 +67,11 @@ def build_markdown_links(file_input):
69
  return "\n\n".join(lines)
70
 
71
  @spaces.GPU()
72
- def qa_fn(question, reasoning_mode, top_k, temperature, max_tokens):
73
  docs, file_sources = retrieve(question, top_k)
74
  file_links = build_markdown_links(file_sources)
75
- prompt = make_prompt(question, docs, reasoning_mode)[:8000]
 
76
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
77
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
78
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
@@ -88,27 +87,26 @@ def qa_fn(question, reasoning_mode, top_k, temperature, max_tokens):
88
  output = ""
89
  for tok in streamer:
90
  output += tok
91
- if "</think>" in output:
92
- output = output.split("</think>", 1)[1].strip()
93
- return output, file_sources, file_links
94
 
95
  outputs_answer = gr.Textbox(label="Answer")
96
- outputs_sources = gr.JSON(label="Sources (Used Files)")
97
  outputs_link = gr.Markdown(label="Source Link")
 
98
 
99
 
100
  demo = gr.Interface(
101
  fn=qa_fn,
102
  inputs=[
103
  gr.Textbox(lines=2, label="Your Question"),
104
- gr.Radio(["on", "off"], value="off", label="Reasoning Mode"),
105
  gr.Slider(1, 7, value=4, step=1, label="Top-K Documents"),
106
  gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
107
  gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
108
  ],
109
- outputs=[outputs_answer, outputs_sources, outputs_link],
110
  title="GDPR Legal Assistant",
111
- description="Ask any question about GDPR or EDPB documents. The response includes used files and chunks.",
112
  allow_flagging="never"
113
  )
114
 
 
36
 
37
  SYS = (
38
  "You are a legal AI assistant specialized in GDPR/EDPB. "
39
+ "If you cannot find an answer in the context, it's okay to speculate. But if so, make it clear. Also it is important to recite the context in your Answer if fitting. "
40
  "Answer this Question:"
41
  )
42
 
 
52
  return docs, file_sources
53
 
54
 
55
+ def make_prompt(q, docs):
56
  context = "\n\n".join(f"Title: {d['title']}\nPages: {d['pages']}" for d in docs)
57
+ prompt = f"detailed thinking off\n"
 
 
58
  prompt += f"Instruct: {SYS} {q} based on the following documents:\n{context}\nOutput:"
59
  return prompt
60
 
 
67
  return "\n\n".join(lines)
68
 
69
  @spaces.GPU()
70
+ def qa_fn(question, top_k, temperature, max_tokens):
71
  docs, file_sources = retrieve(question, top_k)
72
  file_links = build_markdown_links(file_sources)
73
+ outputs_chunks = docs
74
+ prompt = make_prompt(question, docs)[:8000]
75
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
76
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
77
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
 
87
  output = ""
88
  for tok in streamer:
89
  output += tok
90
+ if "Output:" in output:
91
+ output = output.split("Output:", 1)[1].strip()
92
+ return output, file_links, outputs_chunks
93
 
94
  outputs_answer = gr.Textbox(label="Answer")
 
95
  outputs_link = gr.Markdown(label="Source Link")
96
+ outputs_chunks = gr.Textbox(label="Used Chunks")
97
 
98
 
99
  demo = gr.Interface(
100
  fn=qa_fn,
101
  inputs=[
102
  gr.Textbox(lines=2, label="Your Question"),
 
103
  gr.Slider(1, 7, value=4, step=1, label="Top-K Documents"),
104
  gr.Slider(0.1, 1.0, value=0.6, step=0.05, label="Temperature"),
105
  gr.Slider(64, 1024, value=512, step=64, label="Max Answer Length")
106
  ],
107
+ outputs=[outputs_answer, outputs_link, outputs_chunks],
108
  title="GDPR Legal Assistant",
109
+ description="Ask any question about GDPR or EDPB documents.",
110
  allow_flagging="never"
111
  )
112