Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from pinecone import Pinecone
|
3 |
from sentence_transformers import SentenceTransformer
|
@@ -15,8 +116,6 @@ AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
|
|
15 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
16 |
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
17 |
|
18 |
-
|
19 |
-
|
20 |
# Pinecone Setup
|
21 |
EMBED_INDEXES = {
|
22 |
"cohere": {
|
@@ -46,6 +145,7 @@ def run_rag_query(query: str, model_choice: str) -> str:
|
|
46 |
index_config = EMBED_INDEXES[model_choice]
|
47 |
index = pc.Index(index_config["name"])
|
48 |
|
|
|
49 |
if model_choice == "cohere":
|
50 |
co = cohere.Client(COHERE_API_KEY)
|
51 |
embedding = co.embed(
|
@@ -58,17 +158,16 @@ def run_rag_query(query: str, model_choice: str) -> str:
|
|
58 |
model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
|
59 |
embedding = model.encode([query], prompt_name="query")[0].tolist()
|
60 |
|
|
|
61 |
results = index.query(vector=embedding, top_k=15, include_metadata=True)
|
62 |
-
context = "\n\n".join([m["metadata"]["text"] for m in results.matches])
|
63 |
|
|
|
64 |
prompt = f"""You are a helpful assistant. Use the following context to answer the question:
|
65 |
-
|
66 |
Context:
|
67 |
{context}
|
68 |
-
|
69 |
Question:
|
70 |
{query}
|
71 |
-
|
72 |
Answer:"""
|
73 |
|
74 |
response = llm_client.chat.completions.create(
|
@@ -78,22 +177,41 @@ Answer:"""
|
|
78 |
)
|
79 |
answer = response.choices[0].message.content
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
if __name__ == "__main__":
|
99 |
iface.launch()
|
|
|
1 |
+
# import gradio as gr
|
2 |
+
# from pinecone import Pinecone
|
3 |
+
# from sentence_transformers import SentenceTransformer
|
4 |
+
# from openai import AzureOpenAI
|
5 |
+
# from huggingface_hub import login as hf_login
|
6 |
+
# import cohere
|
7 |
+
# import os
|
8 |
+
# from dotenv import load_dotenv
|
9 |
+
|
10 |
+
# load_dotenv() # Load keys from .env file
|
11 |
+
|
12 |
+
# # === ENVIRONMENT VARIABLES ===
|
13 |
+
# AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
|
14 |
+
# AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
|
15 |
+
# COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
16 |
+
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
# # Pinecone Setup
|
21 |
+
# EMBED_INDEXES = {
|
22 |
+
# "cohere": {
|
23 |
+
# "name": "cohere-pdf-index",
|
24 |
+
# "dimension": 1536,
|
25 |
+
# "region": "us-east-1"
|
26 |
+
# },
|
27 |
+
# "qwen": {
|
28 |
+
# "name": "gwen-embeddings",
|
29 |
+
# "dimension": 1024,
|
30 |
+
# "region": "us-west-2"
|
31 |
+
# }
|
32 |
+
# }
|
33 |
+
# pc = Pinecone(api_key=PINECONE_API_KEY)
|
34 |
+
|
35 |
+
# # Azure OpenAI Client
|
36 |
+
# llm_client = AzureOpenAI(
|
37 |
+
# api_key=AZURE_OPENAI_KEY,
|
38 |
+
# api_version="2024-12-01-preview",
|
39 |
+
# azure_endpoint=AZURE_OPENAI_ENDPOINT
|
40 |
+
# )
|
41 |
+
|
42 |
+
# def run_rag_query(query: str, model_choice: str) -> str:
|
43 |
+
# if model_choice not in EMBED_INDEXES:
|
44 |
+
# return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}"
|
45 |
+
|
46 |
+
# index_config = EMBED_INDEXES[model_choice]
|
47 |
+
# index = pc.Index(index_config["name"])
|
48 |
+
|
49 |
+
# if model_choice == "cohere":
|
50 |
+
# co = cohere.Client(COHERE_API_KEY)
|
51 |
+
# embedding = co.embed(
|
52 |
+
# model="embed-v4.0",
|
53 |
+
# texts=[query],
|
54 |
+
# input_type="search_query",
|
55 |
+
# truncate="NONE"
|
56 |
+
# ).embeddings[0]
|
57 |
+
# else: # qwen
|
58 |
+
# model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
|
59 |
+
# embedding = model.encode([query], prompt_name="query")[0].tolist()
|
60 |
+
|
61 |
+
# results = index.query(vector=embedding, top_k=15, include_metadata=True)
|
62 |
+
# context = "\n\n".join([m["metadata"]["text"] for m in results.matches])
|
63 |
+
|
64 |
+
# prompt = f"""You are a helpful assistant. Use the following context to answer the question:
|
65 |
+
|
66 |
+
# Context:
|
67 |
+
# {context}
|
68 |
+
|
69 |
+
# Question:
|
70 |
+
# {query}
|
71 |
+
|
72 |
+
# Answer:"""
|
73 |
+
|
74 |
+
# response = llm_client.chat.completions.create(
|
75 |
+
# model="gpt-4o-mini",
|
76 |
+
# messages=[{"role": "user", "content": prompt}],
|
77 |
+
# temperature=0.3
|
78 |
+
# )
|
79 |
+
# answer = response.choices[0].message.content
|
80 |
+
|
81 |
+
# top_matches = "\n\n".join(
|
82 |
+
# [f"Rank {i+1}: {m.metadata['text'][:200]}" for i, m in enumerate(results.matches)]
|
83 |
+
# )
|
84 |
+
|
85 |
+
# return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"
|
86 |
+
|
87 |
+
# iface = gr.Interface(
|
88 |
+
# fn=run_rag_query,
|
89 |
+
# inputs=[
|
90 |
+
# gr.Textbox(label="Enter your query"),
|
91 |
+
# gr.Radio(["cohere", "qwen"], label="Choose embedding model")
|
92 |
+
# ],
|
93 |
+
# outputs=gr.Markdown(label="RAG Response"),
|
94 |
+
# title="QWEN vs COHERE RAG App",
|
95 |
+
# description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
|
96 |
+
# )
|
97 |
+
|
98 |
+
# if __name__ == "__main__":
|
99 |
+
# iface.launch()
|
100 |
+
|
101 |
+
|
102 |
import gradio as gr
|
103 |
from pinecone import Pinecone
|
104 |
from sentence_transformers import SentenceTransformer
|
|
|
116 |
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
117 |
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
118 |
|
|
|
|
|
119 |
# Pinecone Setup
|
120 |
EMBED_INDEXES = {
|
121 |
"cohere": {
|
|
|
145 |
index_config = EMBED_INDEXES[model_choice]
|
146 |
index = pc.Index(index_config["name"])
|
147 |
|
148 |
+
# Get embedding
|
149 |
if model_choice == "cohere":
|
150 |
co = cohere.Client(COHERE_API_KEY)
|
151 |
embedding = co.embed(
|
|
|
158 |
model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
|
159 |
embedding = model.encode([query], prompt_name="query")[0].tolist()
|
160 |
|
161 |
+
# Query Pinecone
|
162 |
results = index.query(vector=embedding, top_k=15, include_metadata=True)
|
163 |
+
context = "\n\n".join([m["metadata"]["text"] for m in results.matches if "text" in m.metadata])
|
164 |
|
165 |
+
# Prompt to LLM
|
166 |
prompt = f"""You are a helpful assistant. Use the following context to answer the question:
|
|
|
167 |
Context:
|
168 |
{context}
|
|
|
169 |
Question:
|
170 |
{query}
|
|
|
171 |
Answer:"""
|
172 |
|
173 |
response = llm_client.chat.completions.create(
|
|
|
177 |
)
|
178 |
answer = response.choices[0].message.content
|
179 |
|
180 |
+
# Include source in retrieved chunks
|
181 |
+
top_matches = "\n\n".join([
|
182 |
+
f"**Rank {i+1}:**\n"
|
183 |
+
f"**Source:** {m.metadata.get('source', 'N/A')}\n"
|
184 |
+
f"**Text:** {m.metadata.get('text', '')[:500]}..."
|
185 |
+
for i, m in enumerate(results.matches)
|
186 |
+
])
|
187 |
+
|
188 |
+
return f"### β
Answer:\n{answer}\n\n---\n### π Top Retrieved Chunks:\n{top_matches}"
|
189 |
+
|
190 |
+
# Gradio UI
|
191 |
+
with gr.Blocks(title="QWEN vs COHERE RAG App") as iface:
|
192 |
+
gr.Markdown("## π QWEN vs COHERE RAG App")
|
193 |
+
gr.Markdown("Ask a question and retrieve contextual answers from your embedded documents.\n"
|
194 |
+
"[π View PDF Files](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)")
|
195 |
+
|
196 |
+
with gr.Row():
|
197 |
+
query = gr.Textbox(label="Enter your query", lines=2, scale=3)
|
198 |
+
model_choice = gr.Radio(["cohere", "qwen"], label="Choose embedding model", scale=1)
|
199 |
+
|
200 |
+
output = gr.Markdown(label="RAG Response")
|
201 |
+
|
202 |
+
submit_btn = gr.Button("π Run Query")
|
203 |
+
|
204 |
+
# Add spinner and action
|
205 |
+
with gr.Row():
|
206 |
+
status = gr.Markdown("")
|
207 |
+
|
208 |
+
def wrapped_run(query, model_choice):
|
209 |
+
status.update("β³ Running... please wait")
|
210 |
+
result = run_rag_query(query, model_choice)
|
211 |
+
status.update("β
Done")
|
212 |
+
return result
|
213 |
+
|
214 |
+
submit_btn.click(fn=wrapped_run, inputs=[query, model_choice], outputs=output)
|
215 |
|
216 |
if __name__ == "__main__":
|
217 |
iface.launch()
|