Spaces:

ReallyFloppyPenguin
/

AI.With.Experiences

Sleeping

App Files Files Community

ReallyFloppyPenguin commited on Jun 13

Commit

41e4860

verified ·

1 Parent(s): 02eb16a

Update app.py

Browse files

Files changed (1) hide show

app.py +281 -142

app.py CHANGED Viewed

@@ -14,13 +14,11 @@ class RAGMemorySystem:
     def __init__(self):
         # Initialize Pinecone - use the hardcoded key or environment variable
-        self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
         self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "us-east-1")  # Serverless doesn't need specific environment
-        # Generate unique index name with timestamp to avoid conflicts
-        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M")
-        user_id = str(uuid.uuid4())[:8]
-        self.index_name = os.getenv("PINECONE_INDEX_NAME", f"ai-experiences-{timestamp}-{user_id}")
         # Pinecone inference model configuration
         self.embedding_model = os.getenv("PINECONE_EMBEDDING_MODEL", "multilingual-e5-large")
@@ -28,7 +26,7 @@ class RAGMemorySystem:
         # Initialize OpenRouter
         self.openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
-        self.model_name = os.getenv("MODEL_NAME", "meta-llama/llama-4-maverick:free")
         # Initialize Pinecone client
         self.pc = None
@@ -36,6 +34,13 @@ class RAGMemorySystem:
         # Initialize Pinecone
         self.init_pinecone()
     def init_pinecone(self):
         """Initialize Pinecone connection with integrated inference"""
@@ -54,9 +59,9 @@ class RAGMemorySystem:
                     print(f"Error listing indexes: {list_error}")
                     existing_indexes = []
-                # Create index with integrated inference if it doesn't exist
                 if self.index_name not in existing_indexes:
-                    print(f"Creating new Pinecone index with integrated inference: {self.index_name}")
                     try:
                         # Create index with integrated embedding model
                         index_model = self.pc.create_index_for_model(
@@ -72,12 +77,14 @@ class RAGMemorySystem:
                         print(f"Index details: {index_model}")
                         # Wait for index to be ready
                         time.sleep(10)
                     except Exception as create_error:
                         print(f"Error creating index with integrated inference: {create_error}")
                         # Fallback to traditional index creation
                         try:
                             self.pc.create_index(
                                 name=self.index_name,
                                 dimension=1024,  # multilingual-e5-large dimension
@@ -93,38 +100,27 @@ class RAGMemorySystem:
                             time.sleep(5)
                         except Exception as fallback_error:
                             print(f"Failed to create fallback index: {fallback_error}")
-                            # Try with simpler name
-                            simple_name = f"ai-exp-{str(uuid.uuid4())[:6]}"
-                            try:
-                                self.pc.create_index(
-                                    name=simple_name,
-                                    dimension=1024,
-                                    metric="cosine",
-                                    spec={
-                                        "serverless": {
-                                            "cloud": "aws",
-                                            "region": "us-east-1"
-                                        }
-                                    }
-                                )
-                                self.index_name = simple_name
-                                print(f"Created simple fallback index: {self.index_name}")
-                                time.sleep(5)
-                            except Exception as final_error:
-                                print(f"Final index creation failed: {final_error}")
-                                self.index = None
-                                return
-                # Connect to the index
                 try:
                     self.index = self.pc.Index(self.index_name)
                     print(f"Successfully connected to Pinecone index: {self.index_name}")
-                    # Test the connection
                     stats = self.index.describe_index_stats()
                     print(f"Index stats: {stats}")
                 except Exception as connect_error:
                     print(f"Error connecting to index: {connect_error}")
                     self.index = None
@@ -217,13 +213,13 @@ class RAGMemorySystem:
                         "context": context[:500],
                         "timestamp": datetime.datetime.now().isoformat(),
                         "interaction_type": "conversation",
-                        "session_id": getattr(self, 'session_id', 'default')
                     }
                 }
                 # Try upsert with integrated inference
                 self.index.upsert_records([record])
-                return f"✅ Experience stored with integrated inference, ID: {experience_id[:8]}... in index: {self.index_name}"
             except Exception as integrated_error:
                 print(f"Integrated inference failed: {integrated_error}")
@@ -238,10 +234,10 @@ class RAGMemorySystem:
                     "context": context[:500],
                     "timestamp": datetime.datetime.now().isoformat(),
                     "interaction_type": "conversation",
-                    "session_id": getattr(self, 'session_id', 'default')
                 })])
-                return f"✅ Experience stored with manual embedding, ID: {experience_id[:8]}... in index: {self.index_name}"
         except Exception as e:
             return f"❌ Error storing experience: {e}"
@@ -381,7 +377,7 @@ class RAGMemorySystem:
         # Build context from relevant experiences
         context_parts = []
         if relevant_experiences:
-            context_parts.append("🧠 Relevant past experiences (powered by Pinecone inference):")
             # Extract documents for reranking
             documents = [f"User: {exp['user_input']} AI: {exp['ai_response']}" for exp in relevant_experiences]
@@ -396,7 +392,7 @@ class RAGMemorySystem:
             else:
                 # Fallback to original results
                 for i, exp in enumerate(relevant_experiences, 1):
-                    context_parts.append(f"\n{i}. Previous interaction (similarity: {exp['score']:.2f}):")
                     context_parts.append(f"   👤 User: {exp['user_input'][:200]}...")
                     context_parts.append(f"   🤖 AI: {exp['ai_response'][:200]}...")
                     context_parts.append(f"   🕒 Time: {exp['timestamp'][:19]}")
@@ -404,7 +400,7 @@ class RAGMemorySystem:
                         context_parts.append(f"   📝 Context: {exp['context'][:100]}...")
                     context_parts.append("")
         else:
-            context_parts.append("🆕 No previous relevant experiences found. This is a fresh conversation!")
         context_str = "\n".join(context_parts)
@@ -412,20 +408,26 @@ class RAGMemorySystem:
         messages = [
             {
                 "role": "system",
-                "content": f"""You are an AI assistant with access to your past experiences and interactions through Pinecone's vector database with integrated inference.
-The embeddings are generated using {self.embedding_model} and results are reranked with {self.rerank_model}.
-Use the relevant past experiences below to inform your response, but don't just repeat them - learn from them and provide thoughtful, personalized responses.
 {context_str}
-Guidelines:
-- Reference past experiences when relevant and helpful
-- Show that you remember and learn from interactions using Pinecone's memory system
-- Provide helpful, contextual responses
-- Be conversational and engaging
-- If you see similar questions from before, build upon previous responses
-- Acknowledge when you're learning something new through the memory system"""
             }
         ]
@@ -445,9 +447,6 @@ Guidelines:
         return ai_response, context_str, storage_result
-# Initialize the RAG system
-rag_system = RAGMemorySystem()
 def chat_with_rag(message: str, history: List = None) -> tuple:
     """Main chat function for Gradio interface"""
     if not message.strip():
@@ -481,136 +480,269 @@ def get_system_status():
     if rag_system.index:
         try:
             stats = rag_system.index.describe_index_stats()
-            status.append(f"✅ Pinecone: Connected to '{rag_system.index_name}'")
-            status.append(f"📊 Stored experiences: {stats.get('total_vector_count', 0)}")
-            status.append(f"🧠 Embedding model: {rag_system.embedding_model}")
-            status.append(f"🔄 Reranking model: {rag_system.rerank_model}")
-        except:
-            status.append(f"⚠️ Pinecone: Connected but cannot get stats")
     else:
         status.append("❌ Pinecone: Not connected")
     # Check OpenRouter
     if rag_system.openrouter_api_key:
-        status.append(f"✅ OpenRouter: API key configured")
-        status.append(f"🤖 Model: {rag_system.model_name}")
     else:
-        status.append("❌ OpenRouter: API key not configured")
     return "\n".join(status)
-# Create Gradio interface
 with gr.Blocks(
-    title="AI with Pinecone Integrated Inference RAG",
-    theme=gr.themes.Soft(),
-    css="""
-    .container { max-width: 1200px; margin: auto; }
-    .chat-container { height: 400px; overflow-y: auto; }
-    .context-box { background-color: #f8f9fa; padding: 10px; border-radius: 5px; font-family: monospace; }
-    .status-box { background-color: #e8f4f8; padding: 10px; border-radius: 5px; font-family: monospace; }
-    """
 ) as demo:
     gr.HTML("""
-    <div style="text-align: center; padding: 20px;">
-        <h1>🤖 AI Assistant with Pinecone Integrated Inference RAG</h1>
-        <p>This AI assistant uses Pinecone's integrated inference for embeddings and reranking with vector storage for memory.</p>
-        <p>Powered by <strong>multilingual-e5-large</strong> embeddings and <strong>pinecone-rerank-v0</strong> reranking model.</p>
-        <p><strong>🆕 Auto-Environment Creation:</strong> The system automatically creates a new Pinecone environment with integrated inference!</p>
     </div>
     """)
-    # System Status
-    with gr.Row():
-        with gr.Column():
-            status_display = gr.Textbox(
-                label="🔧 System Status",
-                value=get_system_status(),
-                lines=8,
-                interactive=False,
-                elem_classes=["status-box"]
-            )
-            refresh_status_btn = gr.Button("🔄 Refresh Status", variant="secondary")
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(
-                label="Conversation",
-                height=400,
-                elem_classes=["chat-container"]
             )
             with gr.Row():
                 msg = gr.Textbox(
-                    placeholder="Type your message here...",
-                    label="Your Message",
-                    lines=2,
-                    scale=4
                 )
-                send_btn = gr.Button("Send", variant="primary", scale=1)
-                clear_btn = gr.Button("Clear", variant="secondary", scale=1)
         with gr.Column(scale=1):
-            gr.HTML("<h3>📚 RAG Context</h3>")
-            context_display = gr.Textbox(
-                label="Retrieved & Reranked Experiences",
-                lines=15,
-                interactive=False,
-                elem_classes=["context-box"]
-            )
             storage_info = gr.Textbox(
-                label="Memory Storage Info",
-                lines=3,
-                interactive=False
             )
-    with gr.Row():
-        with gr.Column():
-            gr.HTML("""
-            <div style="margin-top: 20px; padding: 15px; background-color: #e8f4f8; border-radius: 8px;">
-                <h3>🔧 Configuration</h3>
-                <p><strong>Pinecone:</strong> ✅ Auto-configured with integrated inference</p>
-                <p><strong>Embedding Model:</strong> multilingual-e5-large (1024 dimensions)</p>
-                <p><strong>Reranking Model:</strong> pinecone-rerank-v0</p>
-                <p><strong>OpenRouter:</strong> Set <code>OPENROUTER_API_KEY</code> environment variable</p>
-                <br>
-                <p><strong>🚀 Pinecone Integrated Inference Features:</strong></p>
-                <ul>
-                    <li>🧠 Automatic text-to-vector conversion during upsert and search</li>
-                    <li>🔍 Smart retrieval with multilingual embeddings</li>
-                    <li>🔄 Advanced reranking for improved relevance</li>
-                    <li>📈 Learning and improvement over time</li>
-                    <li>🆔 Unique environment creation for each session</li>
-                    <li>⚡ Single API for embedding, storage, and retrieval</li>
-                </ul>
-                <br>
-                <p><strong>Model Options:</strong></p>
-                <ul>
-                    <li><code>multilingual-e5-large</code> - Multilingual embeddings (default)</li>
-                    <li><code>pinecone-rerank-v0</code> - Pinecone's reranking model (default)</li>
-                    <li><code>cohere-rerank-v3.5</code> - Cohere's reranking model</li>
-                    <li><code>pinecone-sparse-english-v0</code> - Sparse embeddings for keyword search</li>
-                </ul>
-            </div>
-            """)
     # Event handlers
     def respond(message, history):
         if not message:
             return history, "", "", ""
-        # Get AI response
         ai_response, context_used, storage_info_text = chat_with_rag(message, history)
-        # Update history
         if history is None:
             history = []
         history.append((message, ai_response))
         return history, "", context_used, storage_info_text
-    # Wire up the interface
     send_btn.click(
         respond,
         inputs=[msg, chatbot],
@@ -628,15 +760,22 @@ with gr.Blocks(
         outputs=[chatbot, msg, context_display, storage_info]
     )
-    refresh_status_btn.click(
         get_system_status,
         outputs=[status_display]
     )
-# Launch the app
 if __name__ == "__main__":
     demo.launch(
         share=True,
-        server_name="0.0.0.0",
-        server_port=7860
     )

     def __init__(self):
         # Initialize Pinecone - use the hardcoded key or environment variable
+        self.pinecone_api_key = os.getenv("PINECONE_API_KEY", "pcsk_6Ydj5y_QqLzPNzMEh2NMJv5Crh5XVYTTTkZTHkWjQkZAiU5SDthzYZW4ZvDF2qo1g9GPUR")
         self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "us-east-1")  # Serverless doesn't need specific environment
+        # Use a consistent index name instead of generating unique ones
+        self.index_name = os.getenv("PINECONE_INDEX_NAME", "shared-ai-experiences")
         # Pinecone inference model configuration
         self.embedding_model = os.getenv("PINECONE_EMBEDDING_MODEL", "multilingual-e5-large")
         # Initialize OpenRouter
         self.openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
+        self.model_name = os.getenv("MODEL_NAME", "meta-llama/llama-3.2-3b-instruct:free")
         # Initialize Pinecone client
         self.pc = None
         # Initialize Pinecone
         self.init_pinecone()
+    def update_model(self, new_model: str):
+        """Update the OpenRouter model"""
+        if new_model and new_model.strip():
+            self.model_name = new_model.strip()
+            return f"✅ Model updated to: {self.model_name}"
+        return "❌ Please enter a valid model name"
     def init_pinecone(self):
         """Initialize Pinecone connection with integrated inference"""
                     print(f"Error listing indexes: {list_error}")
                     existing_indexes = []
+                # Only create index if it doesn't exist
                 if self.index_name not in existing_indexes:
+                    print(f"Index '{self.index_name}' not found. Creating new Pinecone index with integrated inference...")
                     try:
                         # Create index with integrated embedding model
                         index_model = self.pc.create_index_for_model(
                         print(f"Index details: {index_model}")
                         # Wait for index to be ready
+                        print("Waiting for index to be ready...")
                         time.sleep(10)
                     except Exception as create_error:
                         print(f"Error creating index with integrated inference: {create_error}")
                         # Fallback to traditional index creation
                         try:
+                            print("Attempting fallback to traditional index creation...")
                             self.pc.create_index(
                                 name=self.index_name,
                                 dimension=1024,  # multilingual-e5-large dimension
                             time.sleep(5)
                         except Exception as fallback_error:
                             print(f"Failed to create fallback index: {fallback_error}")
+                            self.index = None
+                            return
+                else:
+                    print(f"Index '{self.index_name}' already exists. Connecting to existing index...")
+                # Connect to the index (whether existing or newly created)
                 try:
                     self.index = self.pc.Index(self.index_name)
                     print(f"Successfully connected to Pinecone index: {self.index_name}")
+                    # Test the connection and get stats
                     stats = self.index.describe_index_stats()
                     print(f"Index stats: {stats}")
+                    # Check if this is an existing index with data
+                    total_vectors = stats.get('total_vector_count', 0)
+                    if total_vectors > 0:
+                        print(f"Found existing index with {total_vectors} stored experiences. Continuing with shared knowledge base.")
+                    else:
+                        print("Index is empty. Ready to start building shared knowledge base.")
                 except Exception as connect_error:
                     print(f"Error connecting to index: {connect_error}")
                     self.index = None
                         "context": context[:500],
                         "timestamp": datetime.datetime.now().isoformat(),
                         "interaction_type": "conversation",
+                        "session_id": getattr(self, 'session_id', 'shared')
                     }
                 }
                 # Try upsert with integrated inference
                 self.index.upsert_records([record])
+                return f"✅ Experience stored with integrated inference, ID: {experience_id[:8]}..."
             except Exception as integrated_error:
                 print(f"Integrated inference failed: {integrated_error}")
                     "context": context[:500],
                     "timestamp": datetime.datetime.now().isoformat(),
                     "interaction_type": "conversation",
+                    "session_id": getattr(self, 'session_id', 'shared')
                 })])
+                return f"✅ Experience stored with manual embedding, ID: {experience_id[:8]}..."
         except Exception as e:
             return f"❌ Error storing experience: {e}"
         # Build context from relevant experiences
         context_parts = []
         if relevant_experiences:
+            context_parts.append("🧠 Relevant past experiences from the shared knowledge base (powered by Pinecone inference):")
             # Extract documents for reranking
             documents = [f"User: {exp['user_input']} AI: {exp['ai_response']}" for exp in relevant_experiences]
             else:
                 # Fallback to original results
                 for i, exp in enumerate(relevant_experiences, 1):
+                    context_parts.append(f"\n{i}. Previous interaction from shared knowledge (similarity: {exp['score']:.2f}):")
                     context_parts.append(f"   👤 User: {exp['user_input'][:200]}...")
                     context_parts.append(f"   🤖 AI: {exp['ai_response'][:200]}...")
                     context_parts.append(f"   🕒 Time: {exp['timestamp'][:19]}")
                         context_parts.append(f"   📝 Context: {exp['context'][:100]}...")
                     context_parts.append("")
         else:
+            context_parts.append("🆕 No previous relevant experiences found in the shared knowledge base. This is a fresh conversation!")
         context_str = "\n".join(context_parts)
         messages = [
             {
                 "role": "system",
+                "content": f"""You are an AI assistant with access to a shared knowledge base of past conversations and interactions through Pinecone's vector database with integrated inference.
+IMPORTANT: The context below contains conversations from OTHER USERS and previous AI responses - this is NOT your personal memory, but rather a shared knowledge base that multiple users contribute to. Each conversation you have will also be added to this shared knowledge base for future users.
+The embeddings are generated using {self.embedding_model} and results are reranked with {self.rerank_model}.
+SHARED KNOWLEDGE BASE CONTEXT:
 {context_str}
+Guidelines for using shared knowledge:
+- The experiences above are from OTHER USERS' conversations, not your own memories
+- Use these shared experiences to provide helpful, informed responses
+- When referencing past interactions, make it clear they came from the shared knowledge base
+- Don't claim personal ownership of experiences that belong to other users
+- Learn from the collective knowledge while maintaining your own conversational identity
+- Be transparent that you're drawing from a shared pool of experiences
+- Build upon the collective wisdom while providing fresh, contextual responses
+- Acknowledge when information comes from the shared knowledge base vs. the current conversation
+Remember: You're part of a learning system where each conversation contributes to helping future users, but you should be clear about the source of your knowledge."""
             }
         ]
         return ai_response, context_str, storage_result
 def chat_with_rag(message: str, history: List = None) -> tuple:
     """Main chat function for Gradio interface"""
     if not message.strip():
     if rag_system.index:
         try:
             stats = rag_system.index.describe_index_stats()
+            total_vectors = stats.get('total_vector_count', 0)
+            status.append(f"✅ Pinecone: Connected ({total_vectors} experiences)")
+            status.append(f"🧠 Embedding: {rag_system.embedding_model}")
+        except Exception as e:
+            status.append(f"⚠️ Pinecone: Connected but stats unavailable")
     else:
         status.append("❌ Pinecone: Not connected")
     # Check OpenRouter
     if rag_system.openrouter_api_key:
+        status.append(f"✅ OpenRouter: {rag_system.model_name}")
     else:
+        status.append("❌ OpenRouter: Not configured")
     return "\n".join(status)
+# Minimal CSS for clean appearance
+minimal_css = """
+/* Clean, minimal styling */
+.gradio-container {
+    max-width: 1100px !important;
+    margin: 0 auto !important;
+}
+/* Remove excess padding and margins */
+.block {
+    border: none !important;
+    box-shadow: none !important;
+}
+/* Simple header */
+.header {
+    text-align: center;
+    padding: 1rem;
+    background: linear-gradient(90deg, #4f46e5, #7c3aed);
+    color: white;
+    border-radius: 8px;
+    margin-bottom: 1rem;
+}
+/* Clean chatbot styling */
+.chatbot {
+    border: 1px solid #e5e7eb !important;
+    border-radius: 8px !important;
+}
+/* Simple input styling */
+.input-box {
+    border: 1px solid #d1d5db !important;
+    border-radius: 6px !important;
+}
+/* Clean buttons */
+.primary-btn {
+    background: #4f46e5 !important;
+    border: none !important;
+    border-radius: 6px !important;
+    color: white !important;
+}
+.secondary-btn {
+    background: #f3f4f6 !important;
+    border: 1px solid #d1d5db !important;
+    border-radius: 6px !important;
+    color: #374151 !important;
+}
+/* Context area */
+.context-area {
+    background: #f9fafb !important;
+    border: 1px solid #e5e7eb !important;
+    border-radius: 6px !important;
+    font-family: monospace !important;
+    font-size: 12px !important;
+}
+/* Status display */
+.status-display {
+    background: #f0f9ff !important;
+    border: 1px solid #bae6fd !important;
+    border-radius: 6px !important;
+    font-family: monospace !important;
+    font-size: 12px !important;
+}
+/* Memory info */
+.memory-display {
+    background: #f0fdf4 !important;
+    border: 1px solid #bbf7d0 !important;
+    border-radius: 6px !important;
+    font-size: 12px !important;
+}
+/* Remove default gradio styling */
+.gr-button {
+    font-size: 14px !important;
+}
+.gr-textbox {
+    font-size: 14px !important;
+}
+/* Tabs styling */
+.tab-nav {
+    border-bottom: 1px solid #e5e7eb;
+}
+/* Collapsible sections */
+.accordion {
+    border: 1px solid #e5e7eb;
+    border-radius: 6px;
+    margin: 0.5rem 0;
+}
+"""
+# Initialize the RAG system
+rag_system = RAGMemorySystem()
+# Create minimal Gradio interface
 with gr.Blocks(
+    title="AI Assistant with RAG",
+    css=minimal_css,
+    theme=gr.themes.Soft()
 ) as demo:
+    # Simple header
     gr.HTML("""
+    <div class="header">
+        <h2 style="margin: 0;">🤖 AI Assistant with RAG</h2>
+        <p style="margin: 5px 0 0 0; opacity: 0.9;">Powered by Pinecone Vector Search</p>
     </div>
     """)
+    # Main chat interface
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(
+                height=450,
+                show_label=False,
+                elem_classes=["chatbot"]
             )
             with gr.Row():
                 msg = gr.Textbox(
+                    placeholder="Type your message...",
+                    show_label=False,
+                    scale=4,
+                    elem_classes=["input-box"]
+                )
+                send_btn = gr.Button(
+                    "Send",
+                    variant="primary",
+                    scale=1,
+                    elem_classes=["primary-btn"]
+                )
+            with gr.Row():
+                clear_btn = gr.Button(
+                    "Clear Chat",
+                    variant="secondary",
+                    elem_classes=["secondary-btn"]
                 )
         with gr.Column(scale=1):
+            # Context display (collapsible)
+            with gr.Accordion("Knowledge Context", open=False):
+                context_display = gr.Textbox(
+                    lines=8,
+                    interactive=False,
+                    show_label=False,
+                    placeholder="Retrieved context appears here...",
+                    elem_classes=["context-area"]
+                )
+            # Storage info
             storage_info = gr.Textbox(
+                lines=1,
+                interactive=False,
+                show_label=False,
+                placeholder="Storage status...",
+                elem_classes=["memory-display"]
             )
+    # Settings section (collapsible)
+    with gr.Accordion("Settings", open=False):
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### Model Configuration")
+                with gr.Row():
+                    model_input = gr.Textbox(
+                        label="OpenRouter Model",
+                        value=rag_system.model_name,
+                        placeholder="Enter model name...",
+                        scale=3
+                    )
+                    update_btn = gr.Button(
+                        "Update",
+                        variant="primary",
+                        scale=1,
+                        elem_classes=["primary-btn"]
+                    )
+                model_status = gr.Textbox(
+                    label="Current Model",
+                    value=f"Using: {rag_system.model_name}",
+                    interactive=False
+                )
+                gr.Markdown("""
+                **Free Models:**
+                - `meta-llama/llama-3.2-3b-instruct:free`
+                - `microsoft/phi-3-mini-128k-instruct:free`
+                - `google/gemma-2-9b-it:free`
+                """)
+            with gr.Column():
+                gr.Markdown("### System Status")
+                status_display = gr.Textbox(
+                    value=get_system_status(),
+                    lines=4,
+                    interactive=False,
+                    show_label=False,
+                    elem_classes=["status-display"]
+                )
+                refresh_btn = gr.Button(
+                    "Refresh",
+                    variant="secondary",
+                    elem_classes=["secondary-btn"]
+                )
+    # About section (collapsible)
+    with gr.Accordion("About", open=False):
+        gr.Markdown("""
+        ### AI Assistant with RAG
+        This application uses **Retrieval-Augmented Generation** to provide more informed responses by:
+        - Storing conversations in a **Pinecone vector database**
+        - Retrieving relevant past experiences using **semantic search**
+        - Using **multilingual-e5-large** embeddings for understanding
+        - Reranking results with **pinecone-rerank-v0** for better relevance
+        **Privacy:** Conversations are stored in a shared knowledge base. No personal data is retained.
+        """)
     # Event handlers
     def respond(message, history):
         if not message:
             return history, "", "", ""
         ai_response, context_used, storage_info_text = chat_with_rag(message, history)
         if history is None:
             history = []
         history.append((message, ai_response))
         return history, "", context_used, storage_info_text
+    def update_model_handler(new_model):
+        result = rag_system.update_model(new_model)
+        status = f"Using: {rag_system.model_name}"
+        return "", status, get_system_status()
+    # Wire up events
     send_btn.click(
         respond,
         inputs=[msg, chatbot],
         outputs=[chatbot, msg, context_display, storage_info]
     )
+    update_btn.click(
+        update_model_handler,
+        inputs=[model_input],
+        outputs=[model_input, model_status, status_display]
+    )
+    refresh_btn.click(
         get_system_status,
         outputs=[status_display]
     )
+# Launch
 if __name__ == "__main__":
     demo.launch(
         share=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
     )