OrifjonKenjayev commited on
Commit
29861e1
·
verified ·
1 Parent(s): 8196568

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -86
app.py CHANGED
@@ -12,18 +12,39 @@ from typing import List, Tuple
12
  import re
13
  import json
14
  from datetime import datetime
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
 
 
 
17
 
18
  class LearningChatBot:
19
  def __init__(self):
20
- # Initialize embeddings
21
- self.embeddings = TogetherEmbeddings(
22
- model="togethercomputer/m2-bert-80M-32k-retrieval",
23
- together_api_key=TOGETHER_API_KEY
24
- )
25
 
26
- # Initialize text splitter for processing new information
 
 
 
 
 
 
 
 
 
 
27
  self.text_splitter = RecursiveCharacterTextSplitter(
28
  chunk_size=1000,
29
  chunk_overlap=200,
@@ -33,14 +54,18 @@ class LearningChatBot:
33
  # Load or create the FAISS index
34
  self.load_or_create_vectorstore()
35
 
36
- # Initialize the model
37
- self.model = Together(
38
- model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
39
- temperature=0.7,
40
- max_tokens=150,
41
- top_k=30,
42
- together_api_key=TOGETHER_API_KEY
43
- )
 
 
 
 
44
 
45
  # Initialize memory
46
  self.memory = ConversationBufferMemory(
@@ -64,47 +89,98 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
64
  # Load learned data
65
  self.learned_data = self.load_learned_data()
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def load_or_create_vectorstore(self):
68
  """Load existing vectorstore or create a new one"""
69
  try:
70
- self.vectorstore = FAISS.load_local(
71
- ".",
72
- embeddings=self.embeddings,
73
- allow_dangerous_deserialization=True
74
- )
75
- except:
76
- # If no existing vectorstore, create an empty one
77
- self.vectorstore = FAISS.from_texts(
78
- ["Initial empty index"],
79
- self.embeddings
80
- )
81
- self.retriever = self.vectorstore.as_retriever()
 
 
 
 
 
 
 
 
 
82
 
83
  def setup_chain(self):
84
  """Set up the processing chain"""
85
- self.chain = (
86
- {
87
- "context": self.retriever,
88
- "chat_history": lambda x: self.get_chat_history(),
89
- "question": RunnablePassthrough()
90
- }
91
- | self.prompt
92
- | self.model
93
- | StrOutputParser()
94
- )
 
 
 
 
95
 
96
  def load_learned_data(self) -> dict:
97
  """Load previously learned data from file"""
98
  try:
99
- with open('learned_data.json', 'r', encoding='utf-8') as f:
100
  return json.load(f)
101
  except FileNotFoundError:
 
 
 
 
 
 
 
 
 
 
102
  return {}
103
 
104
  def save_learned_data(self):
105
  """Save learned data to file"""
106
- with open('learned_data.json', 'w', encoding='utf-8') as f:
107
- json.dump(self.learned_data, f, ensure_ascii=False, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  def learn_new_information(self, information: str, source: str = "user_input") -> bool:
110
  """Process and store new information"""
@@ -129,28 +205,37 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
129
  self.save_learned_data()
130
 
131
  # Save the updated vectorstore
132
- self.vectorstore.save_local(".")
133
 
 
134
  return True
135
  except Exception as e:
136
- print(f"Error learning new information: {str(e)}")
137
  return False
138
 
139
  def get_chat_history(self) -> str:
140
  """Format chat history for the prompt"""
141
- messages = self.memory.load_memory_variables({})["chat_history"]
142
- return "\n".join([f"{m.type}: {m.content}" for m in messages])
 
 
 
 
143
 
144
  def process_response(self, response: str) -> str:
145
  """Clean up the response"""
146
- unwanted_tags = ["[INST]", "[/INST]", "<s>", "</s>"]
147
- for tag in unwanted_tags:
148
- response = response.replace(tag, "")
 
 
 
 
149
 
150
- response = re.sub(r"```.*?```", "", response, flags=re.DOTALL)
151
- response = re.sub(r"print\(.*?\)", "", response)
152
-
153
- return response.strip()
154
 
155
  def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
156
  """Process a single chat message"""
@@ -159,6 +244,9 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
159
  if message.lower().startswith("o'rgan:") or message.lower().startswith("learn:"):
160
  # Extract the learning content
161
  learning_content = message[message.find(':')+1:].strip()
 
 
 
162
  if self.learn_new_information(learning_content):
163
  return "Yangi ma'lumot muvaffaqiyatli o'rganildi va saqlandi."
164
  else:
@@ -174,53 +262,66 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
174
  self.memory.chat_memory.add_ai_message(clean_response)
175
  return clean_response
176
  except Exception as e:
177
- return f"Xatolik yuz berdi: {str(e)}"
 
178
 
179
  def reset_chat(self) -> List[Tuple[str, str]]:
180
  """Reset the chat history"""
181
- self.memory.clear()
182
- return []
 
 
 
 
183
 
184
  def create_demo() -> gr.Interface:
185
- chatbot = LearningChatBot()
186
-
187
- with gr.Blocks() as demo:
188
- gr.Markdown("""# O'rganuvchi RAG Chatbot
189
- Beeline Uzbekistan ma'lumotlari va yangi o'rganilgan ma'lumotlar asosida javob beruvchi bot
190
-
191
- Yangi ma'lumot o'rgatish uchun xabarni "o'rgan:" yoki "learn:" bilan boshlang.""")
192
-
193
- chatbot_interface = gr.Chatbot(
194
- height=600,
195
- show_copy_button=True,
196
- )
197
 
198
- with gr.Row():
199
- msg = gr.Textbox(
200
- show_label=False,
201
- placeholder="Xabaringizni shu yerda yozing",
202
- container=False
 
 
 
 
203
  )
204
- submit = gr.Button("Xabarni yuborish", variant="primary")
205
-
206
- clear = gr.Button("Yangi suhbat")
207
-
208
- def respond(message, chat_history):
209
- message = message.strip()
210
- if not message:
 
 
 
 
 
 
 
 
 
 
 
211
  return "", chat_history
212
 
213
- bot_message = chatbot.chat(message, chat_history)
214
- chat_history.append((message, bot_message))
215
- return "", chat_history
216
 
217
- submit.click(respond, [msg, chatbot_interface], [msg, chatbot_interface])
218
- msg.submit(respond, [msg, chatbot_interface], [msg, chatbot_interface])
219
- clear.click(lambda: chatbot.reset_chat(), None, chatbot_interface)
220
-
221
- return demo
222
 
223
  demo = create_demo()
224
 
225
  if __name__ == "__main__":
226
- demo.launch()
 
 
 
 
 
12
  import re
13
  import json
14
  from datetime import datetime
15
+ import logging
16
+ import sys
17
+
18
+ # Set up logging
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format='%(asctime)s - %(levelname)s - %(message)s',
22
+ handlers=[
23
+ logging.FileHandler('chatbot.log'),
24
+ logging.StreamHandler(sys.stdout)
25
+ ]
26
+ )
27
 
28
  TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
29
+ DATA_DIR = "data"
30
+ LEARNED_DATA_FILE = os.path.join(DATA_DIR, "learned_data.json")
31
+ VECTOR_STORE_DIR = os.path.join(DATA_DIR, "vector_store")
32
 
33
  class LearningChatBot:
34
  def __init__(self):
35
+ self.setup_directories()
 
 
 
 
36
 
37
+ try:
38
+ # Initialize embeddings
39
+ self.embeddings = TogetherEmbeddings(
40
+ model="togethercomputer/m2-bert-80M-32k-retrieval",
41
+ together_api_key=TOGETHER_API_KEY
42
+ )
43
+ except Exception as e:
44
+ logging.error(f"Failed to initialize embeddings: {str(e)}")
45
+ raise
46
+
47
+ # Initialize text splitter
48
  self.text_splitter = RecursiveCharacterTextSplitter(
49
  chunk_size=1000,
50
  chunk_overlap=200,
 
54
  # Load or create the FAISS index
55
  self.load_or_create_vectorstore()
56
 
57
+ try:
58
+ # Initialize the model
59
+ self.model = Together(
60
+ model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
61
+ temperature=0.7,
62
+ max_tokens=150,
63
+ top_k=30,
64
+ together_api_key=TOGETHER_API_KEY
65
+ )
66
+ except Exception as e:
67
+ logging.error(f"Failed to initialize Together model: {str(e)}")
68
+ raise
69
 
70
  # Initialize memory
71
  self.memory = ConversationBufferMemory(
 
89
  # Load learned data
90
  self.learned_data = self.load_learned_data()
91
 
92
+ def setup_directories(self):
93
+ """Create necessary directories if they don't exist"""
94
+ try:
95
+ # Create data directory if it doesn't exist
96
+ os.makedirs(DATA_DIR, exist_ok=True)
97
+ os.makedirs(VECTOR_STORE_DIR, exist_ok=True)
98
+
99
+ # Create learned_data.json if it doesn't exist
100
+ if not os.path.exists(LEARNED_DATA_FILE):
101
+ with open(LEARNED_DATA_FILE, 'w', encoding='utf-8') as f:
102
+ json.dump({}, f, ensure_ascii=False, indent=2)
103
+ logging.info(f"Created new learned_data.json file at {LEARNED_DATA_FILE}")
104
+ except Exception as e:
105
+ logging.error(f"Failed to setup directories: {str(e)}")
106
+ raise
107
+
108
  def load_or_create_vectorstore(self):
109
  """Load existing vectorstore or create a new one"""
110
  try:
111
+ if os.path.exists(os.path.join(VECTOR_STORE_DIR, "index.faiss")):
112
+ self.vectorstore = FAISS.load_local(
113
+ VECTOR_STORE_DIR,
114
+ embeddings=self.embeddings,
115
+ allow_dangerous_deserialization=True
116
+ )
117
+ logging.info("Loaded existing vectorstore")
118
+ else:
119
+ # If no existing vectorstore, create an empty one
120
+ self.vectorstore = FAISS.from_texts(
121
+ ["Initial empty index"],
122
+ self.embeddings
123
+ )
124
+ # Save the initial vectorstore
125
+ self.vectorstore.save_local(VECTOR_STORE_DIR)
126
+ logging.info("Created new vectorstore")
127
+
128
+ self.retriever = self.vectorstore.as_retriever()
129
+ except Exception as e:
130
+ logging.error(f"Failed to load or create vectorstore: {str(e)}")
131
+ raise
132
 
133
  def setup_chain(self):
134
  """Set up the processing chain"""
135
+ try:
136
+ self.chain = (
137
+ {
138
+ "context": self.retriever,
139
+ "chat_history": lambda x: self.get_chat_history(),
140
+ "question": RunnablePassthrough()
141
+ }
142
+ | self.prompt
143
+ | self.model
144
+ | StrOutputParser()
145
+ )
146
+ except Exception as e:
147
+ logging.error(f"Failed to setup chain: {str(e)}")
148
+ raise
149
 
150
  def load_learned_data(self) -> dict:
151
  """Load previously learned data from file"""
152
  try:
153
+ with open(LEARNED_DATA_FILE, 'r', encoding='utf-8') as f:
154
  return json.load(f)
155
  except FileNotFoundError:
156
+ logging.warning(f"learned_data.json not found at {LEARNED_DATA_FILE}")
157
+ return {}
158
+ except json.JSONDecodeError:
159
+ logging.error("Error decoding learned_data.json. Creating backup and starting fresh.")
160
+ # Create backup of corrupted file
161
+ backup_file = f"{LEARNED_DATA_FILE}.backup-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
162
+ os.rename(LEARNED_DATA_FILE, backup_file)
163
+ return {}
164
+ except Exception as e:
165
+ logging.error(f"Unexpected error loading learned data: {str(e)}")
166
  return {}
167
 
168
  def save_learned_data(self):
169
  """Save learned data to file"""
170
+ try:
171
+ # Create temporary file
172
+ temp_file = f"{LEARNED_DATA_FILE}.temp"
173
+ with open(temp_file, 'w', encoding='utf-8') as f:
174
+ json.dump(self.learned_data, f, ensure_ascii=False, indent=2)
175
+
176
+ # Rename temporary file to actual file
177
+ os.replace(temp_file, LEARNED_DATA_FILE)
178
+ logging.info("Successfully saved learned data")
179
+ except Exception as e:
180
+ logging.error(f"Failed to save learned data: {str(e)}")
181
+ if os.path.exists(temp_file):
182
+ os.remove(temp_file)
183
+ raise
184
 
185
  def learn_new_information(self, information: str, source: str = "user_input") -> bool:
186
  """Process and store new information"""
 
205
  self.save_learned_data()
206
 
207
  # Save the updated vectorstore
208
+ self.vectorstore.save_local(VECTOR_STORE_DIR)
209
 
210
+ logging.info(f"Successfully learned new information from {source}")
211
  return True
212
  except Exception as e:
213
+ logging.error(f"Error learning new information: {str(e)}")
214
  return False
215
 
216
  def get_chat_history(self) -> str:
217
  """Format chat history for the prompt"""
218
+ try:
219
+ messages = self.memory.load_memory_variables({})["chat_history"]
220
+ return "\n".join([f"{m.type}: {m.content}" for m in messages])
221
+ except Exception as e:
222
+ logging.error(f"Error getting chat history: {str(e)}")
223
+ return ""
224
 
225
  def process_response(self, response: str) -> str:
226
  """Clean up the response"""
227
+ try:
228
+ unwanted_tags = ["[INST]", "[/INST]", "<s>", "</s>"]
229
+ for tag in unwanted_tags:
230
+ response = response.replace(tag, "")
231
+
232
+ response = re.sub(r"```.*?```", "", response, flags=re.DOTALL)
233
+ response = re.sub(r"print\(.*?\)", "", response)
234
 
235
+ return response.strip()
236
+ except Exception as e:
237
+ logging.error(f"Error processing response: {str(e)}")
238
+ return response
239
 
240
  def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
241
  """Process a single chat message"""
 
244
  if message.lower().startswith("o'rgan:") or message.lower().startswith("learn:"):
245
  # Extract the learning content
246
  learning_content = message[message.find(':')+1:].strip()
247
+ if not learning_content:
248
+ return "O'rganish uchun ma'lumot kiritilmadi."
249
+
250
  if self.learn_new_information(learning_content):
251
  return "Yangi ma'lumot muvaffaqiyatli o'rganildi va saqlandi."
252
  else:
 
262
  self.memory.chat_memory.add_ai_message(clean_response)
263
  return clean_response
264
  except Exception as e:
265
+ logging.error(f"Error in chat: {str(e)}")
266
+ return f"Xatolik yuz berdi. Iltimos qaytadan urinib ko'ring."
267
 
268
  def reset_chat(self) -> List[Tuple[str, str]]:
269
  """Reset the chat history"""
270
+ try:
271
+ self.memory.clear()
272
+ return []
273
+ except Exception as e:
274
+ logging.error(f"Error resetting chat: {str(e)}")
275
+ return []
276
 
277
  def create_demo() -> gr.Interface:
278
+ try:
279
+ chatbot = LearningChatBot()
 
 
 
 
 
 
 
 
 
 
280
 
281
+ with gr.Blocks() as demo:
282
+ gr.Markdown("""# O'rganuvchi RAG Chatbot
283
+ Beeline Uzbekistan ma'lumotlari va yangi o'rganilgan ma'lumotlar asosida javob beruvchi bot
284
+
285
+ Yangi ma'lumot o'rgatish uchun xabarni "o'rgan:" yoki "learn:" bilan boshlang.""")
286
+
287
+ chatbot_interface = gr.Chatbot(
288
+ height=600,
289
+ show_copy_button=True,
290
  )
291
+
292
+ with gr.Row():
293
+ msg = gr.Textbox(
294
+ show_label=False,
295
+ placeholder="Xabaringizni shu yerda yozing",
296
+ container=False
297
+ )
298
+ submit = gr.Button("Xabarni yuborish", variant="primary")
299
+
300
+ clear = gr.Button("Yangi suhbat")
301
+
302
+ def respond(message, chat_history):
303
+ message = message.strip()
304
+ if not message:
305
+ return "", chat_history
306
+
307
+ bot_message = chatbot.chat(message, chat_history)
308
+ chat_history.append((message, bot_message))
309
  return "", chat_history
310
 
311
+ submit.click(respond, [msg, chatbot_interface], [msg, chatbot_interface])
312
+ msg.submit(respond, [msg, chatbot_interface], [msg, chatbot_interface])
313
+ clear.click(lambda: chatbot.reset_chat(), None, chatbot_interface)
314
 
315
+ return demo
316
+ except Exception as e:
317
+ logging.error(f"Error creating demo: {str(e)}")
318
+ raise
 
319
 
320
  demo = create_demo()
321
 
322
  if __name__ == "__main__":
323
+ try:
324
+ demo.launch()
325
+ except Exception as e:
326
+ logging.error(f"Failed to launch demo: {str(e)}")
327
+ raise