Copain22 commited on
Commit
51f7fc7
·
verified ·
1 Parent(s): 723c370

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -41,18 +41,21 @@ def retrieve_context_faiss(query, top_k=3):
41
  distances, indices = index.search(query_vec, top_k)
42
  return "\n".join([menu_chunks[i] for i in indices[0]])
43
 
 
44
  # === Generate LLM Response ===
45
- @GPU
46
  def generate_response(message, history, system_message, max_tokens, temperature, top_p):
47
  context = retrieve_context_faiss(message)
 
48
  messages = [{"role": "system", "content": system_message}]
49
- for user, bot in history:
50
- messages.append({"role": "user", "content": user})
51
- messages.append({"role": "assistant", "content": bot})
 
52
  messages.append({"role": "user", "content": f"{message}\n\nRelevant info:\n{context}"})
53
-
54
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
55
- inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
56
 
57
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
58
  generate_kwargs = dict(
@@ -71,7 +74,8 @@ def generate_response(message, history, system_message, max_tokens, temperature,
71
  for token in streamer:
72
  output += token
73
  yield output
74
-
 
75
  # === UI ===
76
  demo = gr.ChatInterface(
77
  fn=generate_response,
 
41
  distances, indices = index.search(query_vec, top_k)
42
  return "\n".join([menu_chunks[i] for i in indices[0]])
43
 
44
+
45
  # === Generate LLM Response ===
46
+ @spaces.GPU # Only if you're using ZeroGPU
47
  def generate_response(message, history, system_message, max_tokens, temperature, top_p):
48
  context = retrieve_context_faiss(message)
49
+
50
  messages = [{"role": "system", "content": system_message}]
51
+ for user_msg, bot_msg in history:
52
+ messages.append({"role": "user", "content": user_msg})
53
+ messages.append({"role": "assistant", "content": bot_msg})
54
+
55
  messages.append({"role": "user", "content": f"{message}\n\nRelevant info:\n{context}"})
56
+
57
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
58
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
59
 
60
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
61
  generate_kwargs = dict(
 
74
  for token in streamer:
75
  output += token
76
  yield output
77
+
78
+ print("Inputs received:", message, history, system_message, max_tokens, temperature, top_p)
79
  # === UI ===
80
  demo = gr.ChatInterface(
81
  fn=generate_response,