Spaces:

OrifjonKenjayev
/

beeline

Sleeping

App Files Files Community

OrifjonKenjayev commited on Feb 2

Commit

8196568

verified ·

1 Parent(s): 0d51677

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -20

app.py CHANGED Viewed

@@ -7,12 +7,15 @@ from langchain.prompts import ChatPromptTemplate
 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from langchain.memory import ConversationBufferMemory
 from typing import List, Tuple
 import re
-# Environment variables for API keys
 TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
-class ChatBot:
     def __init__(self):
         # Initialize embeddings
         self.embeddings = TogetherEmbeddings(
@@ -20,13 +23,15 @@ class ChatBot:
             together_api_key=TOGETHER_API_KEY
         )
-        # Load the pre-created FAISS index with embeddings
-        self.vectorstore = FAISS.load_local(
-            ".",
-            embeddings=self.embeddings,
-            allow_dangerous_deserialization=True  # Only enable this if you trust the source of the index
         )
-        self.retriever = self.vectorstore.as_retriever()
         # Initialize the model
         self.model = Together(
@@ -51,10 +56,32 @@ Suhbat Tarixi: {chat_history}
 Savol: {question}
 Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
         self.prompt = ChatPromptTemplate.from_template(self.template)
         # Create the chain
         self.chain = (
             {
                 "context": self.retriever,
@@ -65,13 +92,54 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
             | self.model
             | StrOutputParser()
         )
     def get_chat_history(self) -> str:
         """Format chat history for the prompt"""
         messages = self.memory.load_memory_variables({})["chat_history"]
         return "\n".join([f"{m.type}: {m.content}" for m in messages])
-    import re
     def process_response(self, response: str) -> str:
         """Clean up the response"""
@@ -79,21 +147,27 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
         for tag in unwanted_tags:
             response = response.replace(tag, "")
-        # Python kod snippetlarini olib tashlash
         response = re.sub(r"```.*?```", "", response, flags=re.DOTALL)
         response = re.sub(r"print\(.*?\)", "", response)
         return response.strip()
     def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
         """Process a single chat message"""
         try:
             self.memory.chat_memory.add_user_message(message)
             response = self.chain.invoke(message)
             clean_response = self.process_response(response)
-            # Agar javob to'liq bo'lmasa yoki noto'g'ri bo'lsa, qayta urinib ko'rish
             if not clean_response or len(clean_response.split()) < 3:
                 clean_response = "Kechirasiz, savolingizni tushunolmadim. Iltimos, batafsilroq savol bering."
@@ -107,13 +181,14 @@ Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
         self.memory.clear()
         return []
-# Create the Gradio interface
 def create_demo() -> gr.Interface:
-    chatbot = ChatBot()
     with gr.Blocks() as demo:
-        gr.Markdown("""# RAG Chatbot
-        Beeline Uzbekistanning jismoniy shaxslar uchun tariflari haqida ma'lumotlar beruvchi bot""")
         chatbot_interface = gr.Chatbot(
             height=600,
@@ -131,7 +206,6 @@ def create_demo() -> gr.Interface:
         clear = gr.Button("Yangi suhbat")
         def respond(message, chat_history):
-            # Foydalanuvchi xabarini tozalash
             message = message.strip()
             if not message:
                 return "", chat_history

 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from langchain.memory import ConversationBufferMemory
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from typing import List, Tuple
 import re
+import json
+from datetime import datetime
 TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
+class LearningChatBot:
     def __init__(self):
         # Initialize embeddings
         self.embeddings = TogetherEmbeddings(
             together_api_key=TOGETHER_API_KEY
         )
+        # Initialize text splitter for processing new information
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len,
         )
+        # Load or create the FAISS index
+        self.load_or_create_vectorstore()
         # Initialize the model
         self.model = Together(
 Savol: {question}
 Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
         self.prompt = ChatPromptTemplate.from_template(self.template)
         # Create the chain
+        self.setup_chain()
+        # Load learned data
+        self.learned_data = self.load_learned_data()
+    def load_or_create_vectorstore(self):
+        """Load existing vectorstore or create a new one"""
+        try:
+            self.vectorstore = FAISS.load_local(
+                ".",
+                embeddings=self.embeddings,
+                allow_dangerous_deserialization=True
+            )
+        except:
+            # If no existing vectorstore, create an empty one
+            self.vectorstore = FAISS.from_texts(
+                ["Initial empty index"],
+                self.embeddings
+            )
+        self.retriever = self.vectorstore.as_retriever()
+    def setup_chain(self):
+        """Set up the processing chain"""
         self.chain = (
             {
                 "context": self.retriever,
             | self.model
             | StrOutputParser()
         )
+    def load_learned_data(self) -> dict:
+        """Load previously learned data from file"""
+        try:
+            with open('learned_data.json', 'r', encoding='utf-8') as f:
+                return json.load(f)
+        except FileNotFoundError:
+            return {}
+    def save_learned_data(self):
+        """Save learned data to file"""
+        with open('learned_data.json', 'w', encoding='utf-8') as f:
+            json.dump(self.learned_data, f, ensure_ascii=False, indent=2)
+    def learn_new_information(self, information: str, source: str = "user_input") -> bool:
+        """Process and store new information"""
+        try:
+            # Split the text into chunks
+            chunks = self.text_splitter.split_text(information)
+            # Add to vectorstore
+            self.vectorstore.add_texts(chunks)
+            # Save to learned data with timestamp
+            timestamp = datetime.now().isoformat()
+            if source not in self.learned_data:
+                self.learned_data[source] = []
+            self.learned_data[source].append({
+                "timestamp": timestamp,
+                "content": information
+            })
+            # Save learned data to file
+            self.save_learned_data()
+            # Save the updated vectorstore
+            self.vectorstore.save_local(".")
+            return True
+        except Exception as e:
+            print(f"Error learning new information: {str(e)}")
+            return False
     def get_chat_history(self) -> str:
         """Format chat history for the prompt"""
         messages = self.memory.load_memory_variables({})["chat_history"]
         return "\n".join([f"{m.type}: {m.content}" for m in messages])
     def process_response(self, response: str) -> str:
         """Clean up the response"""
         for tag in unwanted_tags:
             response = response.replace(tag, "")
         response = re.sub(r"```.*?```", "", response, flags=re.DOTALL)
         response = re.sub(r"print\(.*?\)", "", response)
         return response.strip()
     def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
         """Process a single chat message"""
         try:
+            # Check if this is a learning request
+            if message.lower().startswith("o'rgan:") or message.lower().startswith("learn:"):
+                # Extract the learning content
+                learning_content = message[message.find(':')+1:].strip()
+                if self.learn_new_information(learning_content):
+                    return "Yangi ma'lumot muvaffaqiyatli o'rganildi va saqlandi."
+                else:
+                    return "Ma'lumotni o'rganishda xatolik yuz berdi."
             self.memory.chat_memory.add_user_message(message)
             response = self.chain.invoke(message)
             clean_response = self.process_response(response)
             if not clean_response or len(clean_response.split()) < 3:
                 clean_response = "Kechirasiz, savolingizni tushunolmadim. Iltimos, batafsilroq savol bering."
         self.memory.clear()
         return []
 def create_demo() -> gr.Interface:
+    chatbot = LearningChatBot()
     with gr.Blocks() as demo:
+        gr.Markdown("""# O'rganuvchi RAG Chatbot
+        Beeline Uzbekistan ma'lumotlari va yangi o'rganilgan ma'lumotlar asosida javob beruvchi bot
+        Yangi ma'lumot o'rgatish uchun xabarni "o'rgan:" yoki "learn:" bilan boshlang.""")
         chatbot_interface = gr.Chatbot(
             height=600,
         clear = gr.Button("Yangi suhbat")
         def respond(message, chat_history):
             message = message.strip()
             if not message:
                 return "", chat_history