Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,12 +7,15 @@ from langchain.prompts import ChatPromptTemplate
|
|
7 |
from langchain.schema.runnable import RunnablePassthrough
|
8 |
from langchain.schema.output_parser import StrOutputParser
|
9 |
from langchain.memory import ConversationBufferMemory
|
|
|
10 |
from typing import List, Tuple
|
11 |
import re
|
12 |
-
|
|
|
|
|
13 |
TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
|
14 |
|
15 |
-
class
|
16 |
def __init__(self):
|
17 |
# Initialize embeddings
|
18 |
self.embeddings = TogetherEmbeddings(
|
@@ -20,13 +23,15 @@ class ChatBot:
|
|
20 |
together_api_key=TOGETHER_API_KEY
|
21 |
)
|
22 |
|
23 |
-
#
|
24 |
-
self.
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
)
|
29 |
-
|
|
|
|
|
30 |
|
31 |
# Initialize the model
|
32 |
self.model = Together(
|
@@ -51,10 +56,32 @@ Suhbat Tarixi: {chat_history}
|
|
51 |
Savol: {question}
|
52 |
Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
|
53 |
|
54 |
-
|
55 |
self.prompt = ChatPromptTemplate.from_template(self.template)
|
56 |
|
57 |
# Create the chain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
self.chain = (
|
59 |
{
|
60 |
"context": self.retriever,
|
@@ -65,13 +92,54 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
|
|
65 |
| self.model
|
66 |
| StrOutputParser()
|
67 |
)
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def get_chat_history(self) -> str:
|
70 |
"""Format chat history for the prompt"""
|
71 |
messages = self.memory.load_memory_variables({})["chat_history"]
|
72 |
return "\n".join([f"{m.type}: {m.content}" for m in messages])
|
73 |
-
|
74 |
-
import re
|
75 |
|
76 |
def process_response(self, response: str) -> str:
|
77 |
"""Clean up the response"""
|
@@ -79,21 +147,27 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
|
|
79 |
for tag in unwanted_tags:
|
80 |
response = response.replace(tag, "")
|
81 |
|
82 |
-
# Python kod snippetlarini olib tashlash
|
83 |
response = re.sub(r"```.*?```", "", response, flags=re.DOTALL)
|
84 |
response = re.sub(r"print\(.*?\)", "", response)
|
85 |
|
86 |
return response.strip()
|
87 |
|
88 |
-
|
89 |
def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
|
90 |
"""Process a single chat message"""
|
91 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
self.memory.chat_memory.add_user_message(message)
|
93 |
response = self.chain.invoke(message)
|
94 |
clean_response = self.process_response(response)
|
95 |
|
96 |
-
# Agar javob to'liq bo'lmasa yoki noto'g'ri bo'lsa, qayta urinib ko'rish
|
97 |
if not clean_response or len(clean_response.split()) < 3:
|
98 |
clean_response = "Kechirasiz, savolingizni tushunolmadim. Iltimos, batafsilroq savol bering."
|
99 |
|
@@ -107,13 +181,14 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
|
|
107 |
self.memory.clear()
|
108 |
return []
|
109 |
|
110 |
-
# Create the Gradio interface
|
111 |
def create_demo() -> gr.Interface:
|
112 |
-
chatbot =
|
113 |
|
114 |
with gr.Blocks() as demo:
|
115 |
-
gr.Markdown("""# RAG Chatbot
|
116 |
-
Beeline
|
|
|
|
|
117 |
|
118 |
chatbot_interface = gr.Chatbot(
|
119 |
height=600,
|
@@ -131,7 +206,6 @@ def create_demo() -> gr.Interface:
|
|
131 |
clear = gr.Button("Yangi suhbat")
|
132 |
|
133 |
def respond(message, chat_history):
|
134 |
-
# Foydalanuvchi xabarini tozalash
|
135 |
message = message.strip()
|
136 |
if not message:
|
137 |
return "", chat_history
|
|
|
7 |
from langchain.schema.runnable import RunnablePassthrough
|
8 |
from langchain.schema.output_parser import StrOutputParser
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
from typing import List, Tuple
|
12 |
import re
|
13 |
+
import json
|
14 |
+
from datetime import datetime
|
15 |
+
|
16 |
TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
|
17 |
|
18 |
+
class LearningChatBot:
|
19 |
def __init__(self):
|
20 |
# Initialize embeddings
|
21 |
self.embeddings = TogetherEmbeddings(
|
|
|
23 |
together_api_key=TOGETHER_API_KEY
|
24 |
)
|
25 |
|
26 |
+
# Initialize text splitter for processing new information
|
27 |
+
self.text_splitter = RecursiveCharacterTextSplitter(
|
28 |
+
chunk_size=1000,
|
29 |
+
chunk_overlap=200,
|
30 |
+
length_function=len,
|
31 |
)
|
32 |
+
|
33 |
+
# Load or create the FAISS index
|
34 |
+
self.load_or_create_vectorstore()
|
35 |
|
36 |
# Initialize the model
|
37 |
self.model = Together(
|
|
|
56 |
Savol: {question}
|
57 |
Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
|
58 |
|
|
|
59 |
self.prompt = ChatPromptTemplate.from_template(self.template)
|
60 |
|
61 |
# Create the chain
|
62 |
+
self.setup_chain()
|
63 |
+
|
64 |
+
# Load learned data
|
65 |
+
self.learned_data = self.load_learned_data()
|
66 |
+
|
67 |
+
def load_or_create_vectorstore(self):
|
68 |
+
"""Load existing vectorstore or create a new one"""
|
69 |
+
try:
|
70 |
+
self.vectorstore = FAISS.load_local(
|
71 |
+
".",
|
72 |
+
embeddings=self.embeddings,
|
73 |
+
allow_dangerous_deserialization=True
|
74 |
+
)
|
75 |
+
except:
|
76 |
+
# If no existing vectorstore, create an empty one
|
77 |
+
self.vectorstore = FAISS.from_texts(
|
78 |
+
["Initial empty index"],
|
79 |
+
self.embeddings
|
80 |
+
)
|
81 |
+
self.retriever = self.vectorstore.as_retriever()
|
82 |
+
|
83 |
+
def setup_chain(self):
|
84 |
+
"""Set up the processing chain"""
|
85 |
self.chain = (
|
86 |
{
|
87 |
"context": self.retriever,
|
|
|
92 |
| self.model
|
93 |
| StrOutputParser()
|
94 |
)
|
95 |
+
|
96 |
+
def load_learned_data(self) -> dict:
|
97 |
+
"""Load previously learned data from file"""
|
98 |
+
try:
|
99 |
+
with open('learned_data.json', 'r', encoding='utf-8') as f:
|
100 |
+
return json.load(f)
|
101 |
+
except FileNotFoundError:
|
102 |
+
return {}
|
103 |
+
|
104 |
+
def save_learned_data(self):
|
105 |
+
"""Save learned data to file"""
|
106 |
+
with open('learned_data.json', 'w', encoding='utf-8') as f:
|
107 |
+
json.dump(self.learned_data, f, ensure_ascii=False, indent=2)
|
108 |
+
|
109 |
+
def learn_new_information(self, information: str, source: str = "user_input") -> bool:
|
110 |
+
"""Process and store new information"""
|
111 |
+
try:
|
112 |
+
# Split the text into chunks
|
113 |
+
chunks = self.text_splitter.split_text(information)
|
114 |
+
|
115 |
+
# Add to vectorstore
|
116 |
+
self.vectorstore.add_texts(chunks)
|
117 |
+
|
118 |
+
# Save to learned data with timestamp
|
119 |
+
timestamp = datetime.now().isoformat()
|
120 |
+
if source not in self.learned_data:
|
121 |
+
self.learned_data[source] = []
|
122 |
+
|
123 |
+
self.learned_data[source].append({
|
124 |
+
"timestamp": timestamp,
|
125 |
+
"content": information
|
126 |
+
})
|
127 |
+
|
128 |
+
# Save learned data to file
|
129 |
+
self.save_learned_data()
|
130 |
+
|
131 |
+
# Save the updated vectorstore
|
132 |
+
self.vectorstore.save_local(".")
|
133 |
+
|
134 |
+
return True
|
135 |
+
except Exception as e:
|
136 |
+
print(f"Error learning new information: {str(e)}")
|
137 |
+
return False
|
138 |
+
|
139 |
def get_chat_history(self) -> str:
|
140 |
"""Format chat history for the prompt"""
|
141 |
messages = self.memory.load_memory_variables({})["chat_history"]
|
142 |
return "\n".join([f"{m.type}: {m.content}" for m in messages])
|
|
|
|
|
143 |
|
144 |
def process_response(self, response: str) -> str:
|
145 |
"""Clean up the response"""
|
|
|
147 |
for tag in unwanted_tags:
|
148 |
response = response.replace(tag, "")
|
149 |
|
|
|
150 |
response = re.sub(r"```.*?```", "", response, flags=re.DOTALL)
|
151 |
response = re.sub(r"print\(.*?\)", "", response)
|
152 |
|
153 |
return response.strip()
|
154 |
|
|
|
155 |
def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
|
156 |
"""Process a single chat message"""
|
157 |
try:
|
158 |
+
# Check if this is a learning request
|
159 |
+
if message.lower().startswith("o'rgan:") or message.lower().startswith("learn:"):
|
160 |
+
# Extract the learning content
|
161 |
+
learning_content = message[message.find(':')+1:].strip()
|
162 |
+
if self.learn_new_information(learning_content):
|
163 |
+
return "Yangi ma'lumot muvaffaqiyatli o'rganildi va saqlandi."
|
164 |
+
else:
|
165 |
+
return "Ma'lumotni o'rganishda xatolik yuz berdi."
|
166 |
+
|
167 |
self.memory.chat_memory.add_user_message(message)
|
168 |
response = self.chain.invoke(message)
|
169 |
clean_response = self.process_response(response)
|
170 |
|
|
|
171 |
if not clean_response or len(clean_response.split()) < 3:
|
172 |
clean_response = "Kechirasiz, savolingizni tushunolmadim. Iltimos, batafsilroq savol bering."
|
173 |
|
|
|
181 |
self.memory.clear()
|
182 |
return []
|
183 |
|
|
|
184 |
def create_demo() -> gr.Interface:
|
185 |
+
chatbot = LearningChatBot()
|
186 |
|
187 |
with gr.Blocks() as demo:
|
188 |
+
gr.Markdown("""# O'rganuvchi RAG Chatbot
|
189 |
+
Beeline Uzbekistan ma'lumotlari va yangi o'rganilgan ma'lumotlar asosida javob beruvchi bot
|
190 |
+
|
191 |
+
Yangi ma'lumot o'rgatish uchun xabarni "o'rgan:" yoki "learn:" bilan boshlang.""")
|
192 |
|
193 |
chatbot_interface = gr.Chatbot(
|
194 |
height=600,
|
|
|
206 |
clear = gr.Button("Yangi suhbat")
|
207 |
|
208 |
def respond(message, chat_history):
|
|
|
209 |
message = message.strip()
|
210 |
if not message:
|
211 |
return "", chat_history
|