Update app.py
Browse files
app.py
CHANGED
@@ -14,13 +14,11 @@ class RAGMemorySystem:
|
|
14 |
|
15 |
def __init__(self):
|
16 |
# Initialize Pinecone - use the hardcoded key or environment variable
|
17 |
-
self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
|
18 |
self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "us-east-1") # Serverless doesn't need specific environment
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
user_id = str(uuid.uuid4())[:8]
|
23 |
-
self.index_name = os.getenv("PINECONE_INDEX_NAME", f"ai-experiences-{timestamp}-{user_id}")
|
24 |
|
25 |
# Pinecone inference model configuration
|
26 |
self.embedding_model = os.getenv("PINECONE_EMBEDDING_MODEL", "multilingual-e5-large")
|
@@ -28,7 +26,7 @@ class RAGMemorySystem:
|
|
28 |
|
29 |
# Initialize OpenRouter
|
30 |
self.openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
|
31 |
-
self.model_name = os.getenv("MODEL_NAME", "meta-llama/llama-
|
32 |
|
33 |
# Initialize Pinecone client
|
34 |
self.pc = None
|
@@ -36,6 +34,13 @@ class RAGMemorySystem:
|
|
36 |
|
37 |
# Initialize Pinecone
|
38 |
self.init_pinecone()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
def init_pinecone(self):
|
41 |
"""Initialize Pinecone connection with integrated inference"""
|
@@ -54,9 +59,9 @@ class RAGMemorySystem:
|
|
54 |
print(f"Error listing indexes: {list_error}")
|
55 |
existing_indexes = []
|
56 |
|
57 |
-
#
|
58 |
if self.index_name not in existing_indexes:
|
59 |
-
print(f"Creating new Pinecone index with integrated inference
|
60 |
try:
|
61 |
# Create index with integrated embedding model
|
62 |
index_model = self.pc.create_index_for_model(
|
@@ -72,12 +77,14 @@ class RAGMemorySystem:
|
|
72 |
print(f"Index details: {index_model}")
|
73 |
|
74 |
# Wait for index to be ready
|
|
|
75 |
time.sleep(10)
|
76 |
|
77 |
except Exception as create_error:
|
78 |
print(f"Error creating index with integrated inference: {create_error}")
|
79 |
# Fallback to traditional index creation
|
80 |
try:
|
|
|
81 |
self.pc.create_index(
|
82 |
name=self.index_name,
|
83 |
dimension=1024, # multilingual-e5-large dimension
|
@@ -93,38 +100,27 @@ class RAGMemorySystem:
|
|
93 |
time.sleep(5)
|
94 |
except Exception as fallback_error:
|
95 |
print(f"Failed to create fallback index: {fallback_error}")
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
self.pc.create_index(
|
101 |
-
name=simple_name,
|
102 |
-
dimension=1024,
|
103 |
-
metric="cosine",
|
104 |
-
spec={
|
105 |
-
"serverless": {
|
106 |
-
"cloud": "aws",
|
107 |
-
"region": "us-east-1"
|
108 |
-
}
|
109 |
-
}
|
110 |
-
)
|
111 |
-
self.index_name = simple_name
|
112 |
-
print(f"Created simple fallback index: {self.index_name}")
|
113 |
-
time.sleep(5)
|
114 |
-
except Exception as final_error:
|
115 |
-
print(f"Final index creation failed: {final_error}")
|
116 |
-
self.index = None
|
117 |
-
return
|
118 |
|
119 |
-
# Connect to the index
|
120 |
try:
|
121 |
self.index = self.pc.Index(self.index_name)
|
122 |
print(f"Successfully connected to Pinecone index: {self.index_name}")
|
123 |
|
124 |
-
# Test the connection
|
125 |
stats = self.index.describe_index_stats()
|
126 |
print(f"Index stats: {stats}")
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
except Exception as connect_error:
|
129 |
print(f"Error connecting to index: {connect_error}")
|
130 |
self.index = None
|
@@ -217,13 +213,13 @@ class RAGMemorySystem:
|
|
217 |
"context": context[:500],
|
218 |
"timestamp": datetime.datetime.now().isoformat(),
|
219 |
"interaction_type": "conversation",
|
220 |
-
"session_id": getattr(self, 'session_id', '
|
221 |
}
|
222 |
}
|
223 |
|
224 |
# Try upsert with integrated inference
|
225 |
self.index.upsert_records([record])
|
226 |
-
return f"✅ Experience stored with integrated inference, ID: {experience_id[:8]}...
|
227 |
|
228 |
except Exception as integrated_error:
|
229 |
print(f"Integrated inference failed: {integrated_error}")
|
@@ -238,10 +234,10 @@ class RAGMemorySystem:
|
|
238 |
"context": context[:500],
|
239 |
"timestamp": datetime.datetime.now().isoformat(),
|
240 |
"interaction_type": "conversation",
|
241 |
-
"session_id": getattr(self, 'session_id', '
|
242 |
})])
|
243 |
|
244 |
-
return f"✅ Experience stored with manual embedding, ID: {experience_id[:8]}...
|
245 |
|
246 |
except Exception as e:
|
247 |
return f"❌ Error storing experience: {e}"
|
@@ -381,7 +377,7 @@ class RAGMemorySystem:
|
|
381 |
# Build context from relevant experiences
|
382 |
context_parts = []
|
383 |
if relevant_experiences:
|
384 |
-
context_parts.append("🧠 Relevant past experiences (powered by Pinecone inference):")
|
385 |
|
386 |
# Extract documents for reranking
|
387 |
documents = [f"User: {exp['user_input']} AI: {exp['ai_response']}" for exp in relevant_experiences]
|
@@ -396,7 +392,7 @@ class RAGMemorySystem:
|
|
396 |
else:
|
397 |
# Fallback to original results
|
398 |
for i, exp in enumerate(relevant_experiences, 1):
|
399 |
-
context_parts.append(f"\n{i}. Previous interaction (similarity: {exp['score']:.2f}):")
|
400 |
context_parts.append(f" 👤 User: {exp['user_input'][:200]}...")
|
401 |
context_parts.append(f" 🤖 AI: {exp['ai_response'][:200]}...")
|
402 |
context_parts.append(f" 🕒 Time: {exp['timestamp'][:19]}")
|
@@ -404,7 +400,7 @@ class RAGMemorySystem:
|
|
404 |
context_parts.append(f" 📝 Context: {exp['context'][:100]}...")
|
405 |
context_parts.append("")
|
406 |
else:
|
407 |
-
context_parts.append("🆕 No previous relevant experiences found. This is a fresh conversation!")
|
408 |
|
409 |
context_str = "\n".join(context_parts)
|
410 |
|
@@ -412,20 +408,26 @@ class RAGMemorySystem:
|
|
412 |
messages = [
|
413 |
{
|
414 |
"role": "system",
|
415 |
-
"content": f"""You are an AI assistant with access to
|
416 |
-
The embeddings are generated using {self.embedding_model} and results are reranked with {self.rerank_model}.
|
417 |
|
418 |
-
|
419 |
|
|
|
|
|
|
|
420 |
{context_str}
|
421 |
|
422 |
-
Guidelines:
|
423 |
-
-
|
424 |
-
-
|
425 |
-
-
|
426 |
-
-
|
427 |
-
-
|
428 |
-
-
|
|
|
|
|
|
|
|
|
429 |
}
|
430 |
]
|
431 |
|
@@ -445,9 +447,6 @@ Guidelines:
|
|
445 |
|
446 |
return ai_response, context_str, storage_result
|
447 |
|
448 |
-
# Initialize the RAG system
|
449 |
-
rag_system = RAGMemorySystem()
|
450 |
-
|
451 |
def chat_with_rag(message: str, history: List = None) -> tuple:
|
452 |
"""Main chat function for Gradio interface"""
|
453 |
if not message.strip():
|
@@ -481,136 +480,269 @@ def get_system_status():
|
|
481 |
if rag_system.index:
|
482 |
try:
|
483 |
stats = rag_system.index.describe_index_stats()
|
484 |
-
|
485 |
-
status.append(f"
|
486 |
-
status.append(f"🧠 Embedding
|
487 |
-
|
488 |
-
|
489 |
-
status.append(f"⚠️ Pinecone: Connected but cannot get stats")
|
490 |
else:
|
491 |
status.append("❌ Pinecone: Not connected")
|
492 |
|
493 |
# Check OpenRouter
|
494 |
if rag_system.openrouter_api_key:
|
495 |
-
status.append(f"✅ OpenRouter:
|
496 |
-
status.append(f"🤖 Model: {rag_system.model_name}")
|
497 |
else:
|
498 |
-
status.append("❌ OpenRouter:
|
499 |
|
500 |
return "\n".join(status)
|
501 |
|
502 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
with gr.Blocks(
|
504 |
-
title="AI with
|
505 |
-
|
506 |
-
|
507 |
-
.container { max-width: 1200px; margin: auto; }
|
508 |
-
.chat-container { height: 400px; overflow-y: auto; }
|
509 |
-
.context-box { background-color: #f8f9fa; padding: 10px; border-radius: 5px; font-family: monospace; }
|
510 |
-
.status-box { background-color: #e8f4f8; padding: 10px; border-radius: 5px; font-family: monospace; }
|
511 |
-
"""
|
512 |
) as demo:
|
513 |
|
|
|
514 |
gr.HTML("""
|
515 |
-
<div
|
516 |
-
<
|
517 |
-
<p
|
518 |
-
<p>Powered by <strong>multilingual-e5-large</strong> embeddings and <strong>pinecone-rerank-v0</strong> reranking model.</p>
|
519 |
-
<p><strong>🆕 Auto-Environment Creation:</strong> The system automatically creates a new Pinecone environment with integrated inference!</p>
|
520 |
</div>
|
521 |
""")
|
522 |
|
523 |
-
#
|
524 |
-
with gr.Row():
|
525 |
-
with gr.Column():
|
526 |
-
status_display = gr.Textbox(
|
527 |
-
label="🔧 System Status",
|
528 |
-
value=get_system_status(),
|
529 |
-
lines=8,
|
530 |
-
interactive=False,
|
531 |
-
elem_classes=["status-box"]
|
532 |
-
)
|
533 |
-
refresh_status_btn = gr.Button("🔄 Refresh Status", variant="secondary")
|
534 |
-
|
535 |
with gr.Row():
|
536 |
with gr.Column(scale=2):
|
537 |
chatbot = gr.Chatbot(
|
538 |
-
|
539 |
-
|
540 |
-
elem_classes=["
|
541 |
)
|
542 |
|
543 |
with gr.Row():
|
544 |
msg = gr.Textbox(
|
545 |
-
placeholder="Type your message
|
546 |
-
|
547 |
-
|
548 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
549 |
)
|
550 |
-
send_btn = gr.Button("Send", variant="primary", scale=1)
|
551 |
-
clear_btn = gr.Button("Clear", variant="secondary", scale=1)
|
552 |
|
553 |
with gr.Column(scale=1):
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
|
|
|
|
561 |
|
|
|
562 |
storage_info = gr.Textbox(
|
563 |
-
|
564 |
-
|
565 |
-
|
|
|
|
|
566 |
)
|
567 |
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
597 |
|
598 |
# Event handlers
|
599 |
def respond(message, history):
|
600 |
if not message:
|
601 |
return history, "", "", ""
|
602 |
|
603 |
-
# Get AI response
|
604 |
ai_response, context_used, storage_info_text = chat_with_rag(message, history)
|
605 |
|
606 |
-
# Update history
|
607 |
if history is None:
|
608 |
history = []
|
609 |
history.append((message, ai_response))
|
610 |
|
611 |
return history, "", context_used, storage_info_text
|
612 |
|
613 |
-
|
|
|
|
|
|
|
|
|
|
|
614 |
send_btn.click(
|
615 |
respond,
|
616 |
inputs=[msg, chatbot],
|
@@ -628,15 +760,22 @@ with gr.Blocks(
|
|
628 |
outputs=[chatbot, msg, context_display, storage_info]
|
629 |
)
|
630 |
|
631 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
632 |
get_system_status,
|
633 |
outputs=[status_display]
|
634 |
)
|
635 |
|
636 |
-
# Launch
|
637 |
if __name__ == "__main__":
|
638 |
demo.launch(
|
639 |
share=True,
|
640 |
-
server_name="0.0.0.0",
|
641 |
-
server_port=7860
|
|
|
642 |
)
|
|
|
14 |
|
15 |
def __init__(self):
|
16 |
# Initialize Pinecone - use the hardcoded key or environment variable
|
17 |
+
self.pinecone_api_key = os.getenv("PINECONE_API_KEY", "pcsk_6Ydj5y_QqLzPNzMEh2NMJv5Crh5XVYTTTkZTHkWjQkZAiU5SDthzYZW4ZvDF2qo1g9GPUR")
|
18 |
self.pinecone_environment = os.getenv("PINECONE_ENVIRONMENT", "us-east-1") # Serverless doesn't need specific environment
|
19 |
|
20 |
+
# Use a consistent index name instead of generating unique ones
|
21 |
+
self.index_name = os.getenv("PINECONE_INDEX_NAME", "shared-ai-experiences")
|
|
|
|
|
22 |
|
23 |
# Pinecone inference model configuration
|
24 |
self.embedding_model = os.getenv("PINECONE_EMBEDDING_MODEL", "multilingual-e5-large")
|
|
|
26 |
|
27 |
# Initialize OpenRouter
|
28 |
self.openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
|
29 |
+
self.model_name = os.getenv("MODEL_NAME", "meta-llama/llama-3.2-3b-instruct:free")
|
30 |
|
31 |
# Initialize Pinecone client
|
32 |
self.pc = None
|
|
|
34 |
|
35 |
# Initialize Pinecone
|
36 |
self.init_pinecone()
|
37 |
+
|
38 |
+
def update_model(self, new_model: str):
|
39 |
+
"""Update the OpenRouter model"""
|
40 |
+
if new_model and new_model.strip():
|
41 |
+
self.model_name = new_model.strip()
|
42 |
+
return f"✅ Model updated to: {self.model_name}"
|
43 |
+
return "❌ Please enter a valid model name"
|
44 |
|
45 |
def init_pinecone(self):
|
46 |
"""Initialize Pinecone connection with integrated inference"""
|
|
|
59 |
print(f"Error listing indexes: {list_error}")
|
60 |
existing_indexes = []
|
61 |
|
62 |
+
# Only create index if it doesn't exist
|
63 |
if self.index_name not in existing_indexes:
|
64 |
+
print(f"Index '{self.index_name}' not found. Creating new Pinecone index with integrated inference...")
|
65 |
try:
|
66 |
# Create index with integrated embedding model
|
67 |
index_model = self.pc.create_index_for_model(
|
|
|
77 |
print(f"Index details: {index_model}")
|
78 |
|
79 |
# Wait for index to be ready
|
80 |
+
print("Waiting for index to be ready...")
|
81 |
time.sleep(10)
|
82 |
|
83 |
except Exception as create_error:
|
84 |
print(f"Error creating index with integrated inference: {create_error}")
|
85 |
# Fallback to traditional index creation
|
86 |
try:
|
87 |
+
print("Attempting fallback to traditional index creation...")
|
88 |
self.pc.create_index(
|
89 |
name=self.index_name,
|
90 |
dimension=1024, # multilingual-e5-large dimension
|
|
|
100 |
time.sleep(5)
|
101 |
except Exception as fallback_error:
|
102 |
print(f"Failed to create fallback index: {fallback_error}")
|
103 |
+
self.index = None
|
104 |
+
return
|
105 |
+
else:
|
106 |
+
print(f"Index '{self.index_name}' already exists. Connecting to existing index...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
+
# Connect to the index (whether existing or newly created)
|
109 |
try:
|
110 |
self.index = self.pc.Index(self.index_name)
|
111 |
print(f"Successfully connected to Pinecone index: {self.index_name}")
|
112 |
|
113 |
+
# Test the connection and get stats
|
114 |
stats = self.index.describe_index_stats()
|
115 |
print(f"Index stats: {stats}")
|
116 |
|
117 |
+
# Check if this is an existing index with data
|
118 |
+
total_vectors = stats.get('total_vector_count', 0)
|
119 |
+
if total_vectors > 0:
|
120 |
+
print(f"Found existing index with {total_vectors} stored experiences. Continuing with shared knowledge base.")
|
121 |
+
else:
|
122 |
+
print("Index is empty. Ready to start building shared knowledge base.")
|
123 |
+
|
124 |
except Exception as connect_error:
|
125 |
print(f"Error connecting to index: {connect_error}")
|
126 |
self.index = None
|
|
|
213 |
"context": context[:500],
|
214 |
"timestamp": datetime.datetime.now().isoformat(),
|
215 |
"interaction_type": "conversation",
|
216 |
+
"session_id": getattr(self, 'session_id', 'shared')
|
217 |
}
|
218 |
}
|
219 |
|
220 |
# Try upsert with integrated inference
|
221 |
self.index.upsert_records([record])
|
222 |
+
return f"✅ Experience stored with integrated inference, ID: {experience_id[:8]}..."
|
223 |
|
224 |
except Exception as integrated_error:
|
225 |
print(f"Integrated inference failed: {integrated_error}")
|
|
|
234 |
"context": context[:500],
|
235 |
"timestamp": datetime.datetime.now().isoformat(),
|
236 |
"interaction_type": "conversation",
|
237 |
+
"session_id": getattr(self, 'session_id', 'shared')
|
238 |
})])
|
239 |
|
240 |
+
return f"✅ Experience stored with manual embedding, ID: {experience_id[:8]}..."
|
241 |
|
242 |
except Exception as e:
|
243 |
return f"❌ Error storing experience: {e}"
|
|
|
377 |
# Build context from relevant experiences
|
378 |
context_parts = []
|
379 |
if relevant_experiences:
|
380 |
+
context_parts.append("🧠 Relevant past experiences from the shared knowledge base (powered by Pinecone inference):")
|
381 |
|
382 |
# Extract documents for reranking
|
383 |
documents = [f"User: {exp['user_input']} AI: {exp['ai_response']}" for exp in relevant_experiences]
|
|
|
392 |
else:
|
393 |
# Fallback to original results
|
394 |
for i, exp in enumerate(relevant_experiences, 1):
|
395 |
+
context_parts.append(f"\n{i}. Previous interaction from shared knowledge (similarity: {exp['score']:.2f}):")
|
396 |
context_parts.append(f" 👤 User: {exp['user_input'][:200]}...")
|
397 |
context_parts.append(f" 🤖 AI: {exp['ai_response'][:200]}...")
|
398 |
context_parts.append(f" 🕒 Time: {exp['timestamp'][:19]}")
|
|
|
400 |
context_parts.append(f" 📝 Context: {exp['context'][:100]}...")
|
401 |
context_parts.append("")
|
402 |
else:
|
403 |
+
context_parts.append("🆕 No previous relevant experiences found in the shared knowledge base. This is a fresh conversation!")
|
404 |
|
405 |
context_str = "\n".join(context_parts)
|
406 |
|
|
|
408 |
messages = [
|
409 |
{
|
410 |
"role": "system",
|
411 |
+
"content": f"""You are an AI assistant with access to a shared knowledge base of past conversations and interactions through Pinecone's vector database with integrated inference.
|
|
|
412 |
|
413 |
+
IMPORTANT: The context below contains conversations from OTHER USERS and previous AI responses - this is NOT your personal memory, but rather a shared knowledge base that multiple users contribute to. Each conversation you have will also be added to this shared knowledge base for future users.
|
414 |
|
415 |
+
The embeddings are generated using {self.embedding_model} and results are reranked with {self.rerank_model}.
|
416 |
+
|
417 |
+
SHARED KNOWLEDGE BASE CONTEXT:
|
418 |
{context_str}
|
419 |
|
420 |
+
Guidelines for using shared knowledge:
|
421 |
+
- The experiences above are from OTHER USERS' conversations, not your own memories
|
422 |
+
- Use these shared experiences to provide helpful, informed responses
|
423 |
+
- When referencing past interactions, make it clear they came from the shared knowledge base
|
424 |
+
- Don't claim personal ownership of experiences that belong to other users
|
425 |
+
- Learn from the collective knowledge while maintaining your own conversational identity
|
426 |
+
- Be transparent that you're drawing from a shared pool of experiences
|
427 |
+
- Build upon the collective wisdom while providing fresh, contextual responses
|
428 |
+
- Acknowledge when information comes from the shared knowledge base vs. the current conversation
|
429 |
+
|
430 |
+
Remember: You're part of a learning system where each conversation contributes to helping future users, but you should be clear about the source of your knowledge."""
|
431 |
}
|
432 |
]
|
433 |
|
|
|
447 |
|
448 |
return ai_response, context_str, storage_result
|
449 |
|
|
|
|
|
|
|
450 |
def chat_with_rag(message: str, history: List = None) -> tuple:
|
451 |
"""Main chat function for Gradio interface"""
|
452 |
if not message.strip():
|
|
|
480 |
if rag_system.index:
|
481 |
try:
|
482 |
stats = rag_system.index.describe_index_stats()
|
483 |
+
total_vectors = stats.get('total_vector_count', 0)
|
484 |
+
status.append(f"✅ Pinecone: Connected ({total_vectors} experiences)")
|
485 |
+
status.append(f"🧠 Embedding: {rag_system.embedding_model}")
|
486 |
+
except Exception as e:
|
487 |
+
status.append(f"⚠️ Pinecone: Connected but stats unavailable")
|
|
|
488 |
else:
|
489 |
status.append("❌ Pinecone: Not connected")
|
490 |
|
491 |
# Check OpenRouter
|
492 |
if rag_system.openrouter_api_key:
|
493 |
+
status.append(f"✅ OpenRouter: {rag_system.model_name}")
|
|
|
494 |
else:
|
495 |
+
status.append("❌ OpenRouter: Not configured")
|
496 |
|
497 |
return "\n".join(status)
|
498 |
|
499 |
+
# Minimal CSS for clean appearance
|
500 |
+
minimal_css = """
|
501 |
+
/* Clean, minimal styling */
|
502 |
+
.gradio-container {
|
503 |
+
max-width: 1100px !important;
|
504 |
+
margin: 0 auto !important;
|
505 |
+
}
|
506 |
+
|
507 |
+
/* Remove excess padding and margins */
|
508 |
+
.block {
|
509 |
+
border: none !important;
|
510 |
+
box-shadow: none !important;
|
511 |
+
}
|
512 |
+
|
513 |
+
/* Simple header */
|
514 |
+
.header {
|
515 |
+
text-align: center;
|
516 |
+
padding: 1rem;
|
517 |
+
background: linear-gradient(90deg, #4f46e5, #7c3aed);
|
518 |
+
color: white;
|
519 |
+
border-radius: 8px;
|
520 |
+
margin-bottom: 1rem;
|
521 |
+
}
|
522 |
+
|
523 |
+
/* Clean chatbot styling */
|
524 |
+
.chatbot {
|
525 |
+
border: 1px solid #e5e7eb !important;
|
526 |
+
border-radius: 8px !important;
|
527 |
+
}
|
528 |
+
|
529 |
+
/* Simple input styling */
|
530 |
+
.input-box {
|
531 |
+
border: 1px solid #d1d5db !important;
|
532 |
+
border-radius: 6px !important;
|
533 |
+
}
|
534 |
+
|
535 |
+
/* Clean buttons */
|
536 |
+
.primary-btn {
|
537 |
+
background: #4f46e5 !important;
|
538 |
+
border: none !important;
|
539 |
+
border-radius: 6px !important;
|
540 |
+
color: white !important;
|
541 |
+
}
|
542 |
+
|
543 |
+
.secondary-btn {
|
544 |
+
background: #f3f4f6 !important;
|
545 |
+
border: 1px solid #d1d5db !important;
|
546 |
+
border-radius: 6px !important;
|
547 |
+
color: #374151 !important;
|
548 |
+
}
|
549 |
+
|
550 |
+
/* Context area */
|
551 |
+
.context-area {
|
552 |
+
background: #f9fafb !important;
|
553 |
+
border: 1px solid #e5e7eb !important;
|
554 |
+
border-radius: 6px !important;
|
555 |
+
font-family: monospace !important;
|
556 |
+
font-size: 12px !important;
|
557 |
+
}
|
558 |
+
|
559 |
+
/* Status display */
|
560 |
+
.status-display {
|
561 |
+
background: #f0f9ff !important;
|
562 |
+
border: 1px solid #bae6fd !important;
|
563 |
+
border-radius: 6px !important;
|
564 |
+
font-family: monospace !important;
|
565 |
+
font-size: 12px !important;
|
566 |
+
}
|
567 |
+
|
568 |
+
/* Memory info */
|
569 |
+
.memory-display {
|
570 |
+
background: #f0fdf4 !important;
|
571 |
+
border: 1px solid #bbf7d0 !important;
|
572 |
+
border-radius: 6px !important;
|
573 |
+
font-size: 12px !important;
|
574 |
+
}
|
575 |
+
|
576 |
+
/* Remove default gradio styling */
|
577 |
+
.gr-button {
|
578 |
+
font-size: 14px !important;
|
579 |
+
}
|
580 |
+
|
581 |
+
.gr-textbox {
|
582 |
+
font-size: 14px !important;
|
583 |
+
}
|
584 |
+
|
585 |
+
/* Tabs styling */
|
586 |
+
.tab-nav {
|
587 |
+
border-bottom: 1px solid #e5e7eb;
|
588 |
+
}
|
589 |
+
|
590 |
+
/* Collapsible sections */
|
591 |
+
.accordion {
|
592 |
+
border: 1px solid #e5e7eb;
|
593 |
+
border-radius: 6px;
|
594 |
+
margin: 0.5rem 0;
|
595 |
+
}
|
596 |
+
"""
|
597 |
+
|
598 |
+
# Initialize the RAG system
|
599 |
+
rag_system = RAGMemorySystem()
|
600 |
+
|
601 |
+
# Create minimal Gradio interface
|
602 |
with gr.Blocks(
|
603 |
+
title="AI Assistant with RAG",
|
604 |
+
css=minimal_css,
|
605 |
+
theme=gr.themes.Soft()
|
|
|
|
|
|
|
|
|
|
|
606 |
) as demo:
|
607 |
|
608 |
+
# Simple header
|
609 |
gr.HTML("""
|
610 |
+
<div class="header">
|
611 |
+
<h2 style="margin: 0;">🤖 AI Assistant with RAG</h2>
|
612 |
+
<p style="margin: 5px 0 0 0; opacity: 0.9;">Powered by Pinecone Vector Search</p>
|
|
|
|
|
613 |
</div>
|
614 |
""")
|
615 |
|
616 |
+
# Main chat interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
with gr.Row():
|
618 |
with gr.Column(scale=2):
|
619 |
chatbot = gr.Chatbot(
|
620 |
+
height=450,
|
621 |
+
show_label=False,
|
622 |
+
elem_classes=["chatbot"]
|
623 |
)
|
624 |
|
625 |
with gr.Row():
|
626 |
msg = gr.Textbox(
|
627 |
+
placeholder="Type your message...",
|
628 |
+
show_label=False,
|
629 |
+
scale=4,
|
630 |
+
elem_classes=["input-box"]
|
631 |
+
)
|
632 |
+
send_btn = gr.Button(
|
633 |
+
"Send",
|
634 |
+
variant="primary",
|
635 |
+
scale=1,
|
636 |
+
elem_classes=["primary-btn"]
|
637 |
+
)
|
638 |
+
|
639 |
+
with gr.Row():
|
640 |
+
clear_btn = gr.Button(
|
641 |
+
"Clear Chat",
|
642 |
+
variant="secondary",
|
643 |
+
elem_classes=["secondary-btn"]
|
644 |
)
|
|
|
|
|
645 |
|
646 |
with gr.Column(scale=1):
|
647 |
+
# Context display (collapsible)
|
648 |
+
with gr.Accordion("Knowledge Context", open=False):
|
649 |
+
context_display = gr.Textbox(
|
650 |
+
lines=8,
|
651 |
+
interactive=False,
|
652 |
+
show_label=False,
|
653 |
+
placeholder="Retrieved context appears here...",
|
654 |
+
elem_classes=["context-area"]
|
655 |
+
)
|
656 |
|
657 |
+
# Storage info
|
658 |
storage_info = gr.Textbox(
|
659 |
+
lines=1,
|
660 |
+
interactive=False,
|
661 |
+
show_label=False,
|
662 |
+
placeholder="Storage status...",
|
663 |
+
elem_classes=["memory-display"]
|
664 |
)
|
665 |
|
666 |
+
# Settings section (collapsible)
|
667 |
+
with gr.Accordion("Settings", open=False):
|
668 |
+
with gr.Row():
|
669 |
+
with gr.Column():
|
670 |
+
gr.Markdown("### Model Configuration")
|
671 |
+
with gr.Row():
|
672 |
+
model_input = gr.Textbox(
|
673 |
+
label="OpenRouter Model",
|
674 |
+
value=rag_system.model_name,
|
675 |
+
placeholder="Enter model name...",
|
676 |
+
scale=3
|
677 |
+
)
|
678 |
+
update_btn = gr.Button(
|
679 |
+
"Update",
|
680 |
+
variant="primary",
|
681 |
+
scale=1,
|
682 |
+
elem_classes=["primary-btn"]
|
683 |
+
)
|
684 |
+
|
685 |
+
model_status = gr.Textbox(
|
686 |
+
label="Current Model",
|
687 |
+
value=f"Using: {rag_system.model_name}",
|
688 |
+
interactive=False
|
689 |
+
)
|
690 |
+
|
691 |
+
gr.Markdown("""
|
692 |
+
**Free Models:**
|
693 |
+
- `meta-llama/llama-3.2-3b-instruct:free`
|
694 |
+
- `microsoft/phi-3-mini-128k-instruct:free`
|
695 |
+
- `google/gemma-2-9b-it:free`
|
696 |
+
""")
|
697 |
+
|
698 |
+
with gr.Column():
|
699 |
+
gr.Markdown("### System Status")
|
700 |
+
status_display = gr.Textbox(
|
701 |
+
value=get_system_status(),
|
702 |
+
lines=4,
|
703 |
+
interactive=False,
|
704 |
+
show_label=False,
|
705 |
+
elem_classes=["status-display"]
|
706 |
+
)
|
707 |
+
refresh_btn = gr.Button(
|
708 |
+
"Refresh",
|
709 |
+
variant="secondary",
|
710 |
+
elem_classes=["secondary-btn"]
|
711 |
+
)
|
712 |
+
|
713 |
+
# About section (collapsible)
|
714 |
+
with gr.Accordion("About", open=False):
|
715 |
+
gr.Markdown("""
|
716 |
+
### AI Assistant with RAG
|
717 |
+
|
718 |
+
This application uses **Retrieval-Augmented Generation** to provide more informed responses by:
|
719 |
+
- Storing conversations in a **Pinecone vector database**
|
720 |
+
- Retrieving relevant past experiences using **semantic search**
|
721 |
+
- Using **multilingual-e5-large** embeddings for understanding
|
722 |
+
- Reranking results with **pinecone-rerank-v0** for better relevance
|
723 |
+
|
724 |
+
**Privacy:** Conversations are stored in a shared knowledge base. No personal data is retained.
|
725 |
+
""")
|
726 |
|
727 |
# Event handlers
|
728 |
def respond(message, history):
|
729 |
if not message:
|
730 |
return history, "", "", ""
|
731 |
|
|
|
732 |
ai_response, context_used, storage_info_text = chat_with_rag(message, history)
|
733 |
|
|
|
734 |
if history is None:
|
735 |
history = []
|
736 |
history.append((message, ai_response))
|
737 |
|
738 |
return history, "", context_used, storage_info_text
|
739 |
|
740 |
+
def update_model_handler(new_model):
|
741 |
+
result = rag_system.update_model(new_model)
|
742 |
+
status = f"Using: {rag_system.model_name}"
|
743 |
+
return "", status, get_system_status()
|
744 |
+
|
745 |
+
# Wire up events
|
746 |
send_btn.click(
|
747 |
respond,
|
748 |
inputs=[msg, chatbot],
|
|
|
760 |
outputs=[chatbot, msg, context_display, storage_info]
|
761 |
)
|
762 |
|
763 |
+
update_btn.click(
|
764 |
+
update_model_handler,
|
765 |
+
inputs=[model_input],
|
766 |
+
outputs=[model_input, model_status, status_display]
|
767 |
+
)
|
768 |
+
|
769 |
+
refresh_btn.click(
|
770 |
get_system_status,
|
771 |
outputs=[status_display]
|
772 |
)
|
773 |
|
774 |
+
# Launch
|
775 |
if __name__ == "__main__":
|
776 |
demo.launch(
|
777 |
share=True,
|
778 |
+
server_name="0.0.0.0",
|
779 |
+
server_port=7860,
|
780 |
+
show_error=True
|
781 |
)
|