Copain22 commited on
Commit
526f9f1
·
verified ·
1 Parent(s): 589ad8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -51
app.py CHANGED
@@ -1,18 +1,15 @@
1
- import torch
2
- import gradio as gr
3
- import os
4
  from pathlib import Path
5
  from huggingface_hub import login
6
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
 
 
 
7
  from llama_index.core.memory import ChatMemoryBuffer
8
  from llama_index.llms.huggingface import HuggingFaceLLM
9
  from llama_index.embeddings.langchain import LangchainEmbedding
10
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
11
- from llama_index.core import PromptTemplate
12
-
13
-
14
- pdf_files = [str(p) for p in Path(".").glob("*.pdf")]
15
- docs = SimpleDirectoryReader(input_files=pdf_files).load_data()
16
 
17
  SYSTEM_PROMPT = """
18
  You are a friendly café assistant for Café Eleven. Your job is to:
@@ -21,7 +18,6 @@ You are a friendly café assistant for Café Eleven. Your job is to:
21
  3. Ask for pickup time
22
  4. Suggest add-ons/extras from our menu
23
  5. Confirm the complete order
24
-
25
  Menu items are embedded in the document. Always:
26
  - Be polite and professional
27
  - Confirm order details clearly
@@ -29,66 +25,76 @@ Menu items are embedded in the document. Always:
29
  - Never make up items not in our menu
30
  """
31
 
32
- wrapper_prompt = PromptTemplate(
33
  """<s>[INST] <<SYS>>
34
  {system_prompt}
35
  Current conversation:
36
  {chat_history}
37
  <</SYS>>
38
-
39
  {query_str} [/INST]"""
40
  )
41
 
42
- login(token=os.environ["HF_TOKEN"])
43
-
44
- llm = HuggingFaceLLM(
45
- tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
46
- model_name="meta-llama/Llama-2-7b-chat-hf",
47
- context_window=3900,
48
- max_new_tokens=256,
49
- generate_kwargs={"temperature": 0.2, "do_sample": True},
50
- device_map="auto",
51
- model_kwargs={
52
- "torch_dtype": torch.float16,
53
- "load_in_4bit": True,
54
- "use_auth_token": os.environ["HF_TOKEN"]
55
- },
56
- system_prompt=SYSTEM_PROMPT,
57
- query_wrapper_prompt=wrapper_prompt,
58
- )
59
 
60
- embed_model = LangchainEmbedding(
61
- HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
62
- )
63
-
64
- Settings.llm = llm
 
 
65
  Settings.embed_model = embed_model
66
- Settings.chunk_size = 512
67
 
68
- # ---------- 3. Build the chat engine ----------
69
- memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
70
  index = VectorStoreIndex.from_documents(docs)
71
- chat_engine = index.as_chat_engine(
72
- chat_mode="condense_plus_context",
73
- memory=memory,
74
- system_prompt=SYSTEM_PROMPT,
75
- )
76
 
77
- # ---------- 4. Gradio UI ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  with gr.Blocks(title="Café Eleven Chat") as demo:
79
  gr.Markdown("## ☕ Café Eleven Ordering Assistant \nType *quit* to end the chat.")
80
  chatbot = gr.Chatbot()
81
- user_txt = gr.Textbox(show_label=False, placeholder="Hi, I’d like a latte…")
82
- clear = gr.Button("Clear")
83
 
84
- def respond(message, chat_history):
85
  if message.lower().strip() in {"quit", "exit", "done"}:
86
- return "Thank you for your order! We'll see you soon.", chat_history
87
- response = chat_engine.chat(message).response
88
- chat_history.append((message, response))
89
- return "", chat_history
 
 
90
 
91
- user_txt.submit(respond, [user_txt, chatbot], [user_txt, chatbot])
92
  clear.click(lambda: None, None, chatbot, queue=False)
93
 
94
  if __name__ == "__main__":
 
1
+ # ---------- 0. Imports & constants ----------
2
+ import os, torch, gradio as gr
 
3
  from pathlib import Path
4
  from huggingface_hub import login
5
+
6
+ from llama_index.core import (
7
+ VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
8
+ )
9
  from llama_index.core.memory import ChatMemoryBuffer
10
  from llama_index.llms.huggingface import HuggingFaceLLM
11
  from llama_index.embeddings.langchain import LangchainEmbedding
12
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 
 
 
 
 
13
 
14
  SYSTEM_PROMPT = """
15
  You are a friendly café assistant for Café Eleven. Your job is to:
 
18
  3. Ask for pickup time
19
  4. Suggest add-ons/extras from our menu
20
  5. Confirm the complete order
 
21
  Menu items are embedded in the document. Always:
22
  - Be polite and professional
23
  - Confirm order details clearly
 
25
  - Never make up items not in our menu
26
  """
27
 
28
+ WRAPPER_PROMPT = PromptTemplate(
29
  """<s>[INST] <<SYS>>
30
  {system_prompt}
31
  Current conversation:
32
  {chat_history}
33
  <</SYS>>
 
34
  {query_str} [/INST]"""
35
  )
36
 
37
+ login(token=os.environ["HF_TOKEN"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # ---------- 1. Pre-load documents & build the vector index (CPU-safe) ----------
40
+ docs = SimpleDirectoryReader(
41
+ input_files=[str(p) for p in Path(".").glob("*.pdf")]
42
+ ).load_data()
43
+ embed_model = LangchainEmbedding(
44
+ HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
45
+ )
46
  Settings.embed_model = embed_model
47
+ Settings.chunk_size = 512
48
 
 
 
49
  index = VectorStoreIndex.from_documents(docs)
 
 
 
 
 
50
 
51
+ # ---------- 2. Lazy, singleton chat-engine ----------
52
+ _state = {"chat_engine": None} # filled on first request
53
+
54
+ def get_chat_engine():
55
+ if _state["chat_engine"] is None:
56
+ llm = HuggingFaceLLM(
57
+ tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
58
+ model_name ="meta-llama/Llama-2-7b-chat-hf",
59
+ context_window=3900,
60
+ max_new_tokens=256,
61
+ generate_kwargs={"temperature":0.2, "do_sample":True},
62
+ device_map ="auto", # CUDA now visible
63
+ model_kwargs ={
64
+ "torch_dtype": torch.float16,
65
+ "load_in_4bit": True,
66
+ "use_auth_token": os.environ["HF_TOKEN"]
67
+ },
68
+ system_prompt = SYSTEM_PROMPT,
69
+ query_wrapper_prompt= WRAPPER_PROMPT,
70
+ )
71
+ Settings.llm = llm
72
+
73
+ memory = ChatMemoryBuffer.from_defaults(token_limit=2000)
74
+ _state["chat_engine"] = index.as_chat_engine(
75
+ chat_mode="condense_plus_context",
76
+ memory =memory,
77
+ system_prompt=SYSTEM_PROMPT,
78
+ )
79
+ return _state["chat_engine"]
80
+
81
+ # ---------- 3. Gradio UI ----------
82
  with gr.Blocks(title="Café Eleven Chat") as demo:
83
  gr.Markdown("## ☕ Café Eleven Ordering Assistant \nType *quit* to end the chat.")
84
  chatbot = gr.Chatbot()
85
+ user_in = gr.Textbox(show_label=False, placeholder="Hi, I’d like a latte…")
86
+ clear = gr.Button("Clear")
87
 
88
+ def respond(message, history):
89
  if message.lower().strip() in {"quit", "exit", "done"}:
90
+ return "Thank you for your order! We'll see you soon.", history
91
+
92
+ engine = get_chat_engine() # GPU & model ready
93
+ reply = engine.chat(message).response
94
+ history.append((message, reply))
95
+ return "", history
96
 
97
+ user_in.submit(respond, [user_in, chatbot], [user_in, chatbot])
98
  clear.click(lambda: None, None, chatbot, queue=False)
99
 
100
  if __name__ == "__main__":