ReallyFloppyPenguin commited on
Commit
41e4860
·
verified ·
1 Parent(s): 02eb16a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -142
app.py CHANGED
@@ -14,13 +14,11 @@ class RAGMemorySystem:
14
 
15
  def __init__(self):
16
  # Initialize Pinecone - use the hardcoded key or environment variable
17
- self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
18
  self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "us-east-1") # Serverless doesn't need specific environment
19
 
20
- # Generate unique index name with timestamp to avoid conflicts
21
- timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M")
22
- user_id = str(uuid.uuid4())[:8]
23
- self.index_name = os.getenv("PINECONE_INDEX_NAME", f"ai-experiences-{timestamp}-{user_id}")
24
 
25
  # Pinecone inference model configuration
26
  self.embedding_model = os.getenv("PINECONE_EMBEDDING_MODEL", "multilingual-e5-large")
@@ -28,7 +26,7 @@ class RAGMemorySystem:
28
 
29
  # Initialize OpenRouter
30
  self.openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
31
- self.model_name = os.getenv("MODEL_NAME", "meta-llama/llama-4-maverick:free")
32
 
33
  # Initialize Pinecone client
34
  self.pc = None
@@ -36,6 +34,13 @@ class RAGMemorySystem:
36
 
37
  # Initialize Pinecone
38
  self.init_pinecone()
 
 
 
 
 
 
 
39
 
40
  def init_pinecone(self):
41
  """Initialize Pinecone connection with integrated inference"""
@@ -54,9 +59,9 @@ class RAGMemorySystem:
54
  print(f"Error listing indexes: {list_error}")
55
  existing_indexes = []
56
 
57
- # Create index with integrated inference if it doesn't exist
58
  if self.index_name not in existing_indexes:
59
- print(f"Creating new Pinecone index with integrated inference: {self.index_name}")
60
  try:
61
  # Create index with integrated embedding model
62
  index_model = self.pc.create_index_for_model(
@@ -72,12 +77,14 @@ class RAGMemorySystem:
72
  print(f"Index details: {index_model}")
73
 
74
  # Wait for index to be ready
 
75
  time.sleep(10)
76
 
77
  except Exception as create_error:
78
  print(f"Error creating index with integrated inference: {create_error}")
79
  # Fallback to traditional index creation
80
  try:
 
81
  self.pc.create_index(
82
  name=self.index_name,
83
  dimension=1024, # multilingual-e5-large dimension
@@ -93,38 +100,27 @@ class RAGMemorySystem:
93
  time.sleep(5)
94
  except Exception as fallback_error:
95
  print(f"Failed to create fallback index: {fallback_error}")
96
-
97
- # Try with simpler name
98
- simple_name = f"ai-exp-{str(uuid.uuid4())[:6]}"
99
- try:
100
- self.pc.create_index(
101
- name=simple_name,
102
- dimension=1024,
103
- metric="cosine",
104
- spec={
105
- "serverless": {
106
- "cloud": "aws",
107
- "region": "us-east-1"
108
- }
109
- }
110
- )
111
- self.index_name = simple_name
112
- print(f"Created simple fallback index: {self.index_name}")
113
- time.sleep(5)
114
- except Exception as final_error:
115
- print(f"Final index creation failed: {final_error}")
116
- self.index = None
117
- return
118
 
119
- # Connect to the index
120
  try:
121
  self.index = self.pc.Index(self.index_name)
122
  print(f"Successfully connected to Pinecone index: {self.index_name}")
123
 
124
- # Test the connection
125
  stats = self.index.describe_index_stats()
126
  print(f"Index stats: {stats}")
127
 
 
 
 
 
 
 
 
128
  except Exception as connect_error:
129
  print(f"Error connecting to index: {connect_error}")
130
  self.index = None
@@ -217,13 +213,13 @@ class RAGMemorySystem:
217
  "context": context[:500],
218
  "timestamp": datetime.datetime.now().isoformat(),
219
  "interaction_type": "conversation",
220
- "session_id": getattr(self, 'session_id', 'default')
221
  }
222
  }
223
 
224
  # Try upsert with integrated inference
225
  self.index.upsert_records([record])
226
- return f"✅ Experience stored with integrated inference, ID: {experience_id[:8]}... in index: {self.index_name}"
227
 
228
  except Exception as integrated_error:
229
  print(f"Integrated inference failed: {integrated_error}")
@@ -238,10 +234,10 @@ class RAGMemorySystem:
238
  "context": context[:500],
239
  "timestamp": datetime.datetime.now().isoformat(),
240
  "interaction_type": "conversation",
241
- "session_id": getattr(self, 'session_id', 'default')
242
  })])
243
 
244
- return f"✅ Experience stored with manual embedding, ID: {experience_id[:8]}... in index: {self.index_name}"
245
 
246
  except Exception as e:
247
  return f"❌ Error storing experience: {e}"
@@ -381,7 +377,7 @@ class RAGMemorySystem:
381
  # Build context from relevant experiences
382
  context_parts = []
383
  if relevant_experiences:
384
- context_parts.append("🧠 Relevant past experiences (powered by Pinecone inference):")
385
 
386
  # Extract documents for reranking
387
  documents = [f"User: {exp['user_input']} AI: {exp['ai_response']}" for exp in relevant_experiences]
@@ -396,7 +392,7 @@ class RAGMemorySystem:
396
  else:
397
  # Fallback to original results
398
  for i, exp in enumerate(relevant_experiences, 1):
399
- context_parts.append(f"\n{i}. Previous interaction (similarity: {exp['score']:.2f}):")
400
  context_parts.append(f" 👤 User: {exp['user_input'][:200]}...")
401
  context_parts.append(f" 🤖 AI: {exp['ai_response'][:200]}...")
402
  context_parts.append(f" 🕒 Time: {exp['timestamp'][:19]}")
@@ -404,7 +400,7 @@ class RAGMemorySystem:
404
  context_parts.append(f" 📝 Context: {exp['context'][:100]}...")
405
  context_parts.append("")
406
  else:
407
- context_parts.append("🆕 No previous relevant experiences found. This is a fresh conversation!")
408
 
409
  context_str = "\n".join(context_parts)
410
 
@@ -412,20 +408,26 @@ class RAGMemorySystem:
412
  messages = [
413
  {
414
  "role": "system",
415
- "content": f"""You are an AI assistant with access to your past experiences and interactions through Pinecone's vector database with integrated inference.
416
- The embeddings are generated using {self.embedding_model} and results are reranked with {self.rerank_model}.
417
 
418
- Use the relevant past experiences below to inform your response, but don't just repeat them - learn from them and provide thoughtful, personalized responses.
419
 
 
 
 
420
  {context_str}
421
 
422
- Guidelines:
423
- - Reference past experiences when relevant and helpful
424
- - Show that you remember and learn from interactions using Pinecone's memory system
425
- - Provide helpful, contextual responses
426
- - Be conversational and engaging
427
- - If you see similar questions from before, build upon previous responses
428
- - Acknowledge when you're learning something new through the memory system"""
 
 
 
 
429
  }
430
  ]
431
 
@@ -445,9 +447,6 @@ Guidelines:
445
 
446
  return ai_response, context_str, storage_result
447
 
448
- # Initialize the RAG system
449
- rag_system = RAGMemorySystem()
450
-
451
  def chat_with_rag(message: str, history: List = None) -> tuple:
452
  """Main chat function for Gradio interface"""
453
  if not message.strip():
@@ -481,136 +480,269 @@ def get_system_status():
481
  if rag_system.index:
482
  try:
483
  stats = rag_system.index.describe_index_stats()
484
- status.append(f"✅ Pinecone: Connected to '{rag_system.index_name}'")
485
- status.append(f"📊 Stored experiences: {stats.get('total_vector_count', 0)}")
486
- status.append(f"🧠 Embedding model: {rag_system.embedding_model}")
487
- status.append(f"🔄 Reranking model: {rag_system.rerank_model}")
488
- except:
489
- status.append(f"⚠️ Pinecone: Connected but cannot get stats")
490
  else:
491
  status.append("❌ Pinecone: Not connected")
492
 
493
  # Check OpenRouter
494
  if rag_system.openrouter_api_key:
495
- status.append(f"✅ OpenRouter: API key configured")
496
- status.append(f"🤖 Model: {rag_system.model_name}")
497
  else:
498
- status.append("❌ OpenRouter: API key not configured")
499
 
500
  return "\n".join(status)
501
 
502
- # Create Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  with gr.Blocks(
504
- title="AI with Pinecone Integrated Inference RAG",
505
- theme=gr.themes.Soft(),
506
- css="""
507
- .container { max-width: 1200px; margin: auto; }
508
- .chat-container { height: 400px; overflow-y: auto; }
509
- .context-box { background-color: #f8f9fa; padding: 10px; border-radius: 5px; font-family: monospace; }
510
- .status-box { background-color: #e8f4f8; padding: 10px; border-radius: 5px; font-family: monospace; }
511
- """
512
  ) as demo:
513
 
 
514
  gr.HTML("""
515
- <div style="text-align: center; padding: 20px;">
516
- <h1>🤖 AI Assistant with Pinecone Integrated Inference RAG</h1>
517
- <p>This AI assistant uses Pinecone's integrated inference for embeddings and reranking with vector storage for memory.</p>
518
- <p>Powered by <strong>multilingual-e5-large</strong> embeddings and <strong>pinecone-rerank-v0</strong> reranking model.</p>
519
- <p><strong>🆕 Auto-Environment Creation:</strong> The system automatically creates a new Pinecone environment with integrated inference!</p>
520
  </div>
521
  """)
522
 
523
- # System Status
524
- with gr.Row():
525
- with gr.Column():
526
- status_display = gr.Textbox(
527
- label="🔧 System Status",
528
- value=get_system_status(),
529
- lines=8,
530
- interactive=False,
531
- elem_classes=["status-box"]
532
- )
533
- refresh_status_btn = gr.Button("🔄 Refresh Status", variant="secondary")
534
-
535
  with gr.Row():
536
  with gr.Column(scale=2):
537
  chatbot = gr.Chatbot(
538
- label="Conversation",
539
- height=400,
540
- elem_classes=["chat-container"]
541
  )
542
 
543
  with gr.Row():
544
  msg = gr.Textbox(
545
- placeholder="Type your message here...",
546
- label="Your Message",
547
- lines=2,
548
- scale=4
 
 
 
 
 
 
 
 
 
 
 
 
 
549
  )
550
- send_btn = gr.Button("Send", variant="primary", scale=1)
551
- clear_btn = gr.Button("Clear", variant="secondary", scale=1)
552
 
553
  with gr.Column(scale=1):
554
- gr.HTML("<h3>📚 RAG Context</h3>")
555
- context_display = gr.Textbox(
556
- label="Retrieved & Reranked Experiences",
557
- lines=15,
558
- interactive=False,
559
- elem_classes=["context-box"]
560
- )
 
 
561
 
 
562
  storage_info = gr.Textbox(
563
- label="Memory Storage Info",
564
- lines=3,
565
- interactive=False
 
 
566
  )
567
 
568
- with gr.Row():
569
- with gr.Column():
570
- gr.HTML("""
571
- <div style="margin-top: 20px; padding: 15px; background-color: #e8f4f8; border-radius: 8px;">
572
- <h3>🔧 Configuration</h3>
573
- <p><strong>Pinecone:</strong> ✅ Auto-configured with integrated inference</p>
574
- <p><strong>Embedding Model:</strong> multilingual-e5-large (1024 dimensions)</p>
575
- <p><strong>Reranking Model:</strong> pinecone-rerank-v0</p>
576
- <p><strong>OpenRouter:</strong> Set <code>OPENROUTER_API_KEY</code> environment variable</p>
577
- <br>
578
- <p><strong>🚀 Pinecone Integrated Inference Features:</strong></p>
579
- <ul>
580
- <li>🧠 Automatic text-to-vector conversion during upsert and search</li>
581
- <li>🔍 Smart retrieval with multilingual embeddings</li>
582
- <li>🔄 Advanced reranking for improved relevance</li>
583
- <li>📈 Learning and improvement over time</li>
584
- <li>🆔 Unique environment creation for each session</li>
585
- <li>⚡ Single API for embedding, storage, and retrieval</li>
586
- </ul>
587
- <br>
588
- <p><strong>Model Options:</strong></p>
589
- <ul>
590
- <li><code>multilingual-e5-large</code> - Multilingual embeddings (default)</li>
591
- <li><code>pinecone-rerank-v0</code> - Pinecone's reranking model (default)</li>
592
- <li><code>cohere-rerank-v3.5</code> - Cohere's reranking model</li>
593
- <li><code>pinecone-sparse-english-v0</code> - Sparse embeddings for keyword search</li>
594
- </ul>
595
- </div>
596
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
 
598
  # Event handlers
599
  def respond(message, history):
600
  if not message:
601
  return history, "", "", ""
602
 
603
- # Get AI response
604
  ai_response, context_used, storage_info_text = chat_with_rag(message, history)
605
 
606
- # Update history
607
  if history is None:
608
  history = []
609
  history.append((message, ai_response))
610
 
611
  return history, "", context_used, storage_info_text
612
 
613
- # Wire up the interface
 
 
 
 
 
614
  send_btn.click(
615
  respond,
616
  inputs=[msg, chatbot],
@@ -628,15 +760,22 @@ with gr.Blocks(
628
  outputs=[chatbot, msg, context_display, storage_info]
629
  )
630
 
631
- refresh_status_btn.click(
 
 
 
 
 
 
632
  get_system_status,
633
  outputs=[status_display]
634
  )
635
 
636
- # Launch the app
637
  if __name__ == "__main__":
638
  demo.launch(
639
  share=True,
640
- server_name="0.0.0.0",
641
- server_port=7860
 
642
  )
 
14
 
15
  def __init__(self):
16
  # Initialize Pinecone - use the hardcoded key or environment variable
17
+ self.pinecone_api_key = os.getenv("PINECONE_API_KEY", "pcsk_6Ydj5y_QqLzPNzMEh2NMJv5Crh5XVYTTTkZTHkWjQkZAiU5SDthzYZW4ZvDF2qo1g9GPUR")
18
  self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "us-east-1") # Serverless doesn't need specific environment
19
 
20
+ # Use a consistent index name instead of generating unique ones
21
+ self.index_name = os.getenv("PINECONE_INDEX_NAME", "shared-ai-experiences")
 
 
22
 
23
  # Pinecone inference model configuration
24
  self.embedding_model = os.getenv("PINECONE_EMBEDDING_MODEL", "multilingual-e5-large")
 
26
 
27
  # Initialize OpenRouter
28
  self.openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
29
+ self.model_name = os.getenv("MODEL_NAME", "meta-llama/llama-3.2-3b-instruct:free")
30
 
31
  # Initialize Pinecone client
32
  self.pc = None
 
34
 
35
  # Initialize Pinecone
36
  self.init_pinecone()
37
+
38
+ def update_model(self, new_model: str):
39
+ """Update the OpenRouter model"""
40
+ if new_model and new_model.strip():
41
+ self.model_name = new_model.strip()
42
+ return f"✅ Model updated to: {self.model_name}"
43
+ return "❌ Please enter a valid model name"
44
 
45
  def init_pinecone(self):
46
  """Initialize Pinecone connection with integrated inference"""
 
59
  print(f"Error listing indexes: {list_error}")
60
  existing_indexes = []
61
 
62
+ # Only create index if it doesn't exist
63
  if self.index_name not in existing_indexes:
64
+ print(f"Index '{self.index_name}' not found. Creating new Pinecone index with integrated inference...")
65
  try:
66
  # Create index with integrated embedding model
67
  index_model = self.pc.create_index_for_model(
 
77
  print(f"Index details: {index_model}")
78
 
79
  # Wait for index to be ready
80
+ print("Waiting for index to be ready...")
81
  time.sleep(10)
82
 
83
  except Exception as create_error:
84
  print(f"Error creating index with integrated inference: {create_error}")
85
  # Fallback to traditional index creation
86
  try:
87
+ print("Attempting fallback to traditional index creation...")
88
  self.pc.create_index(
89
  name=self.index_name,
90
  dimension=1024, # multilingual-e5-large dimension
 
100
  time.sleep(5)
101
  except Exception as fallback_error:
102
  print(f"Failed to create fallback index: {fallback_error}")
103
+ self.index = None
104
+ return
105
+ else:
106
+ print(f"Index '{self.index_name}' already exists. Connecting to existing index...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # Connect to the index (whether existing or newly created)
109
  try:
110
  self.index = self.pc.Index(self.index_name)
111
  print(f"Successfully connected to Pinecone index: {self.index_name}")
112
 
113
+ # Test the connection and get stats
114
  stats = self.index.describe_index_stats()
115
  print(f"Index stats: {stats}")
116
 
117
+ # Check if this is an existing index with data
118
+ total_vectors = stats.get('total_vector_count', 0)
119
+ if total_vectors > 0:
120
+ print(f"Found existing index with {total_vectors} stored experiences. Continuing with shared knowledge base.")
121
+ else:
122
+ print("Index is empty. Ready to start building shared knowledge base.")
123
+
124
  except Exception as connect_error:
125
  print(f"Error connecting to index: {connect_error}")
126
  self.index = None
 
213
  "context": context[:500],
214
  "timestamp": datetime.datetime.now().isoformat(),
215
  "interaction_type": "conversation",
216
+ "session_id": getattr(self, 'session_id', 'shared')
217
  }
218
  }
219
 
220
  # Try upsert with integrated inference
221
  self.index.upsert_records([record])
222
+ return f"✅ Experience stored with integrated inference, ID: {experience_id[:8]}..."
223
 
224
  except Exception as integrated_error:
225
  print(f"Integrated inference failed: {integrated_error}")
 
234
  "context": context[:500],
235
  "timestamp": datetime.datetime.now().isoformat(),
236
  "interaction_type": "conversation",
237
+ "session_id": getattr(self, 'session_id', 'shared')
238
  })])
239
 
240
+ return f"✅ Experience stored with manual embedding, ID: {experience_id[:8]}..."
241
 
242
  except Exception as e:
243
  return f"❌ Error storing experience: {e}"
 
377
  # Build context from relevant experiences
378
  context_parts = []
379
  if relevant_experiences:
380
+ context_parts.append("🧠 Relevant past experiences from the shared knowledge base (powered by Pinecone inference):")
381
 
382
  # Extract documents for reranking
383
  documents = [f"User: {exp['user_input']} AI: {exp['ai_response']}" for exp in relevant_experiences]
 
392
  else:
393
  # Fallback to original results
394
  for i, exp in enumerate(relevant_experiences, 1):
395
+ context_parts.append(f"\n{i}. Previous interaction from shared knowledge (similarity: {exp['score']:.2f}):")
396
  context_parts.append(f" 👤 User: {exp['user_input'][:200]}...")
397
  context_parts.append(f" 🤖 AI: {exp['ai_response'][:200]}...")
398
  context_parts.append(f" 🕒 Time: {exp['timestamp'][:19]}")
 
400
  context_parts.append(f" 📝 Context: {exp['context'][:100]}...")
401
  context_parts.append("")
402
  else:
403
+ context_parts.append("🆕 No previous relevant experiences found in the shared knowledge base. This is a fresh conversation!")
404
 
405
  context_str = "\n".join(context_parts)
406
 
 
408
  messages = [
409
  {
410
  "role": "system",
411
+ "content": f"""You are an AI assistant with access to a shared knowledge base of past conversations and interactions through Pinecone's vector database with integrated inference.
 
412
 
413
+ IMPORTANT: The context below contains conversations from OTHER USERS and previous AI responses - this is NOT your personal memory, but rather a shared knowledge base that multiple users contribute to. Each conversation you have will also be added to this shared knowledge base for future users.
414
 
415
+ The embeddings are generated using {self.embedding_model} and results are reranked with {self.rerank_model}.
416
+
417
+ SHARED KNOWLEDGE BASE CONTEXT:
418
  {context_str}
419
 
420
+ Guidelines for using shared knowledge:
421
+ - The experiences above are from OTHER USERS' conversations, not your own memories
422
+ - Use these shared experiences to provide helpful, informed responses
423
+ - When referencing past interactions, make it clear they came from the shared knowledge base
424
+ - Don't claim personal ownership of experiences that belong to other users
425
+ - Learn from the collective knowledge while maintaining your own conversational identity
426
+ - Be transparent that you're drawing from a shared pool of experiences
427
+ - Build upon the collective wisdom while providing fresh, contextual responses
428
+ - Acknowledge when information comes from the shared knowledge base vs. the current conversation
429
+
430
+ Remember: You're part of a learning system where each conversation contributes to helping future users, but you should be clear about the source of your knowledge."""
431
  }
432
  ]
433
 
 
447
 
448
  return ai_response, context_str, storage_result
449
 
 
 
 
450
  def chat_with_rag(message: str, history: List = None) -> tuple:
451
  """Main chat function for Gradio interface"""
452
  if not message.strip():
 
480
  if rag_system.index:
481
  try:
482
  stats = rag_system.index.describe_index_stats()
483
+ total_vectors = stats.get('total_vector_count', 0)
484
+ status.append(f" Pinecone: Connected ({total_vectors} experiences)")
485
+ status.append(f"🧠 Embedding: {rag_system.embedding_model}")
486
+ except Exception as e:
487
+ status.append(f"⚠️ Pinecone: Connected but stats unavailable")
 
488
  else:
489
  status.append("❌ Pinecone: Not connected")
490
 
491
  # Check OpenRouter
492
  if rag_system.openrouter_api_key:
493
+ status.append(f"✅ OpenRouter: {rag_system.model_name}")
 
494
  else:
495
+ status.append("❌ OpenRouter: Not configured")
496
 
497
  return "\n".join(status)
498
 
499
+ # Minimal CSS for clean appearance
500
+ minimal_css = """
501
+ /* Clean, minimal styling */
502
+ .gradio-container {
503
+ max-width: 1100px !important;
504
+ margin: 0 auto !important;
505
+ }
506
+
507
+ /* Remove excess padding and margins */
508
+ .block {
509
+ border: none !important;
510
+ box-shadow: none !important;
511
+ }
512
+
513
+ /* Simple header */
514
+ .header {
515
+ text-align: center;
516
+ padding: 1rem;
517
+ background: linear-gradient(90deg, #4f46e5, #7c3aed);
518
+ color: white;
519
+ border-radius: 8px;
520
+ margin-bottom: 1rem;
521
+ }
522
+
523
+ /* Clean chatbot styling */
524
+ .chatbot {
525
+ border: 1px solid #e5e7eb !important;
526
+ border-radius: 8px !important;
527
+ }
528
+
529
+ /* Simple input styling */
530
+ .input-box {
531
+ border: 1px solid #d1d5db !important;
532
+ border-radius: 6px !important;
533
+ }
534
+
535
+ /* Clean buttons */
536
+ .primary-btn {
537
+ background: #4f46e5 !important;
538
+ border: none !important;
539
+ border-radius: 6px !important;
540
+ color: white !important;
541
+ }
542
+
543
+ .secondary-btn {
544
+ background: #f3f4f6 !important;
545
+ border: 1px solid #d1d5db !important;
546
+ border-radius: 6px !important;
547
+ color: #374151 !important;
548
+ }
549
+
550
+ /* Context area */
551
+ .context-area {
552
+ background: #f9fafb !important;
553
+ border: 1px solid #e5e7eb !important;
554
+ border-radius: 6px !important;
555
+ font-family: monospace !important;
556
+ font-size: 12px !important;
557
+ }
558
+
559
+ /* Status display */
560
+ .status-display {
561
+ background: #f0f9ff !important;
562
+ border: 1px solid #bae6fd !important;
563
+ border-radius: 6px !important;
564
+ font-family: monospace !important;
565
+ font-size: 12px !important;
566
+ }
567
+
568
+ /* Memory info */
569
+ .memory-display {
570
+ background: #f0fdf4 !important;
571
+ border: 1px solid #bbf7d0 !important;
572
+ border-radius: 6px !important;
573
+ font-size: 12px !important;
574
+ }
575
+
576
+ /* Remove default gradio styling */
577
+ .gr-button {
578
+ font-size: 14px !important;
579
+ }
580
+
581
+ .gr-textbox {
582
+ font-size: 14px !important;
583
+ }
584
+
585
+ /* Tabs styling */
586
+ .tab-nav {
587
+ border-bottom: 1px solid #e5e7eb;
588
+ }
589
+
590
+ /* Collapsible sections */
591
+ .accordion {
592
+ border: 1px solid #e5e7eb;
593
+ border-radius: 6px;
594
+ margin: 0.5rem 0;
595
+ }
596
+ """
597
+
598
+ # Initialize the RAG system
599
+ rag_system = RAGMemorySystem()
600
+
601
+ # Create minimal Gradio interface
602
  with gr.Blocks(
603
+ title="AI Assistant with RAG",
604
+ css=minimal_css,
605
+ theme=gr.themes.Soft()
 
 
 
 
 
606
  ) as demo:
607
 
608
+ # Simple header
609
  gr.HTML("""
610
+ <div class="header">
611
+ <h2 style="margin: 0;">🤖 AI Assistant with RAG</h2>
612
+ <p style="margin: 5px 0 0 0; opacity: 0.9;">Powered by Pinecone Vector Search</p>
 
 
613
  </div>
614
  """)
615
 
616
+ # Main chat interface
 
 
 
 
 
 
 
 
 
 
 
617
  with gr.Row():
618
  with gr.Column(scale=2):
619
  chatbot = gr.Chatbot(
620
+ height=450,
621
+ show_label=False,
622
+ elem_classes=["chatbot"]
623
  )
624
 
625
  with gr.Row():
626
  msg = gr.Textbox(
627
+ placeholder="Type your message...",
628
+ show_label=False,
629
+ scale=4,
630
+ elem_classes=["input-box"]
631
+ )
632
+ send_btn = gr.Button(
633
+ "Send",
634
+ variant="primary",
635
+ scale=1,
636
+ elem_classes=["primary-btn"]
637
+ )
638
+
639
+ with gr.Row():
640
+ clear_btn = gr.Button(
641
+ "Clear Chat",
642
+ variant="secondary",
643
+ elem_classes=["secondary-btn"]
644
  )
 
 
645
 
646
  with gr.Column(scale=1):
647
+ # Context display (collapsible)
648
+ with gr.Accordion("Knowledge Context", open=False):
649
+ context_display = gr.Textbox(
650
+ lines=8,
651
+ interactive=False,
652
+ show_label=False,
653
+ placeholder="Retrieved context appears here...",
654
+ elem_classes=["context-area"]
655
+ )
656
 
657
+ # Storage info
658
  storage_info = gr.Textbox(
659
+ lines=1,
660
+ interactive=False,
661
+ show_label=False,
662
+ placeholder="Storage status...",
663
+ elem_classes=["memory-display"]
664
  )
665
 
666
+ # Settings section (collapsible)
667
+ with gr.Accordion("Settings", open=False):
668
+ with gr.Row():
669
+ with gr.Column():
670
+ gr.Markdown("### Model Configuration")
671
+ with gr.Row():
672
+ model_input = gr.Textbox(
673
+ label="OpenRouter Model",
674
+ value=rag_system.model_name,
675
+ placeholder="Enter model name...",
676
+ scale=3
677
+ )
678
+ update_btn = gr.Button(
679
+ "Update",
680
+ variant="primary",
681
+ scale=1,
682
+ elem_classes=["primary-btn"]
683
+ )
684
+
685
+ model_status = gr.Textbox(
686
+ label="Current Model",
687
+ value=f"Using: {rag_system.model_name}",
688
+ interactive=False
689
+ )
690
+
691
+ gr.Markdown("""
692
+ **Free Models:**
693
+ - `meta-llama/llama-3.2-3b-instruct:free`
694
+ - `microsoft/phi-3-mini-128k-instruct:free`
695
+ - `google/gemma-2-9b-it:free`
696
+ """)
697
+
698
+ with gr.Column():
699
+ gr.Markdown("### System Status")
700
+ status_display = gr.Textbox(
701
+ value=get_system_status(),
702
+ lines=4,
703
+ interactive=False,
704
+ show_label=False,
705
+ elem_classes=["status-display"]
706
+ )
707
+ refresh_btn = gr.Button(
708
+ "Refresh",
709
+ variant="secondary",
710
+ elem_classes=["secondary-btn"]
711
+ )
712
+
713
+ # About section (collapsible)
714
+ with gr.Accordion("About", open=False):
715
+ gr.Markdown("""
716
+ ### AI Assistant with RAG
717
+
718
+ This application uses **Retrieval-Augmented Generation** to provide more informed responses by:
719
+ - Storing conversations in a **Pinecone vector database**
720
+ - Retrieving relevant past experiences using **semantic search**
721
+ - Using **multilingual-e5-large** embeddings for understanding
722
+ - Reranking results with **pinecone-rerank-v0** for better relevance
723
+
724
+ **Privacy:** Conversations are stored in a shared knowledge base. No personal data is retained.
725
+ """)
726
 
727
  # Event handlers
728
  def respond(message, history):
729
  if not message:
730
  return history, "", "", ""
731
 
 
732
  ai_response, context_used, storage_info_text = chat_with_rag(message, history)
733
 
 
734
  if history is None:
735
  history = []
736
  history.append((message, ai_response))
737
 
738
  return history, "", context_used, storage_info_text
739
 
740
+ def update_model_handler(new_model):
741
+ result = rag_system.update_model(new_model)
742
+ status = f"Using: {rag_system.model_name}"
743
+ return "", status, get_system_status()
744
+
745
+ # Wire up events
746
  send_btn.click(
747
  respond,
748
  inputs=[msg, chatbot],
 
760
  outputs=[chatbot, msg, context_display, storage_info]
761
  )
762
 
763
+ update_btn.click(
764
+ update_model_handler,
765
+ inputs=[model_input],
766
+ outputs=[model_input, model_status, status_display]
767
+ )
768
+
769
+ refresh_btn.click(
770
  get_system_status,
771
  outputs=[status_display]
772
  )
773
 
774
+ # Launch
775
  if __name__ == "__main__":
776
  demo.launch(
777
  share=True,
778
+ server_name="0.0.0.0",
779
+ server_port=7860,
780
+ show_error=True
781
  )