mouryachinta commited on
Commit
afddbb2
Β·
verified Β·
1 Parent(s): ca0b76e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -22
app.py CHANGED
@@ -1,3 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from pinecone import Pinecone
3
  from sentence_transformers import SentenceTransformer
@@ -15,8 +116,6 @@ AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
15
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
16
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
17
 
18
-
19
-
20
  # Pinecone Setup
21
  EMBED_INDEXES = {
22
  "cohere": {
@@ -46,6 +145,7 @@ def run_rag_query(query: str, model_choice: str) -> str:
46
  index_config = EMBED_INDEXES[model_choice]
47
  index = pc.Index(index_config["name"])
48
 
 
49
  if model_choice == "cohere":
50
  co = cohere.Client(COHERE_API_KEY)
51
  embedding = co.embed(
@@ -58,17 +158,16 @@ def run_rag_query(query: str, model_choice: str) -> str:
58
  model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
59
  embedding = model.encode([query], prompt_name="query")[0].tolist()
60
 
 
61
  results = index.query(vector=embedding, top_k=15, include_metadata=True)
62
- context = "\n\n".join([m["metadata"]["text"] for m in results.matches])
63
 
 
64
  prompt = f"""You are a helpful assistant. Use the following context to answer the question:
65
-
66
  Context:
67
  {context}
68
-
69
  Question:
70
  {query}
71
-
72
  Answer:"""
73
 
74
  response = llm_client.chat.completions.create(
@@ -78,22 +177,41 @@ Answer:"""
78
  )
79
  answer = response.choices[0].message.content
80
 
81
- top_matches = "\n\n".join(
82
- [f"Rank {i+1}: {m.metadata['text'][:200]}" for i, m in enumerate(results.matches)]
83
- )
84
-
85
- return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"
86
-
87
- iface = gr.Interface(
88
- fn=run_rag_query,
89
- inputs=[
90
- gr.Textbox(label="Enter your query"),
91
- gr.Radio(["cohere", "qwen"], label="Choose embedding model")
92
- ],
93
- outputs=gr.Markdown(label="RAG Response"),
94
- title="QWEN vs COHERE RAG App",
95
- description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
96
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  if __name__ == "__main__":
99
  iface.launch()
 
1
+ # import gradio as gr
2
+ # from pinecone import Pinecone
3
+ # from sentence_transformers import SentenceTransformer
4
+ # from openai import AzureOpenAI
5
+ # from huggingface_hub import login as hf_login
6
+ # import cohere
7
+ # import os
8
+ # from dotenv import load_dotenv
9
+
10
+ # load_dotenv() # Load keys from .env file
11
+
12
+ # # === ENVIRONMENT VARIABLES ===
13
+ # AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
14
+ # AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
15
+ # COHERE_API_KEY = os.getenv("COHERE_API_KEY")
16
+ # PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
17
+
18
+
19
+
20
+ # # Pinecone Setup
21
+ # EMBED_INDEXES = {
22
+ # "cohere": {
23
+ # "name": "cohere-pdf-index",
24
+ # "dimension": 1536,
25
+ # "region": "us-east-1"
26
+ # },
27
+ # "qwen": {
28
+ # "name": "gwen-embeddings",
29
+ # "dimension": 1024,
30
+ # "region": "us-west-2"
31
+ # }
32
+ # }
33
+ # pc = Pinecone(api_key=PINECONE_API_KEY)
34
+
35
+ # # Azure OpenAI Client
36
+ # llm_client = AzureOpenAI(
37
+ # api_key=AZURE_OPENAI_KEY,
38
+ # api_version="2024-12-01-preview",
39
+ # azure_endpoint=AZURE_OPENAI_ENDPOINT
40
+ # )
41
+
42
+ # def run_rag_query(query: str, model_choice: str) -> str:
43
+ # if model_choice not in EMBED_INDEXES:
44
+ # return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}"
45
+
46
+ # index_config = EMBED_INDEXES[model_choice]
47
+ # index = pc.Index(index_config["name"])
48
+
49
+ # if model_choice == "cohere":
50
+ # co = cohere.Client(COHERE_API_KEY)
51
+ # embedding = co.embed(
52
+ # model="embed-v4.0",
53
+ # texts=[query],
54
+ # input_type="search_query",
55
+ # truncate="NONE"
56
+ # ).embeddings[0]
57
+ # else: # qwen
58
+ # model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
59
+ # embedding = model.encode([query], prompt_name="query")[0].tolist()
60
+
61
+ # results = index.query(vector=embedding, top_k=15, include_metadata=True)
62
+ # context = "\n\n".join([m["metadata"]["text"] for m in results.matches])
63
+
64
+ # prompt = f"""You are a helpful assistant. Use the following context to answer the question:
65
+
66
+ # Context:
67
+ # {context}
68
+
69
+ # Question:
70
+ # {query}
71
+
72
+ # Answer:"""
73
+
74
+ # response = llm_client.chat.completions.create(
75
+ # model="gpt-4o-mini",
76
+ # messages=[{"role": "user", "content": prompt}],
77
+ # temperature=0.3
78
+ # )
79
+ # answer = response.choices[0].message.content
80
+
81
+ # top_matches = "\n\n".join(
82
+ # [f"Rank {i+1}: {m.metadata['text'][:200]}" for i, m in enumerate(results.matches)]
83
+ # )
84
+
85
+ # return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"
86
+
87
+ # iface = gr.Interface(
88
+ # fn=run_rag_query,
89
+ # inputs=[
90
+ # gr.Textbox(label="Enter your query"),
91
+ # gr.Radio(["cohere", "qwen"], label="Choose embedding model")
92
+ # ],
93
+ # outputs=gr.Markdown(label="RAG Response"),
94
+ # title="QWEN vs COHERE RAG App",
95
+ # description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
96
+ # )
97
+
98
+ # if __name__ == "__main__":
99
+ # iface.launch()
100
+
101
+
102
  import gradio as gr
103
  from pinecone import Pinecone
104
  from sentence_transformers import SentenceTransformer
 
116
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
117
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
118
 
 
 
119
  # Pinecone Setup
120
  EMBED_INDEXES = {
121
  "cohere": {
 
145
  index_config = EMBED_INDEXES[model_choice]
146
  index = pc.Index(index_config["name"])
147
 
148
+ # Get embedding
149
  if model_choice == "cohere":
150
  co = cohere.Client(COHERE_API_KEY)
151
  embedding = co.embed(
 
158
  model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
159
  embedding = model.encode([query], prompt_name="query")[0].tolist()
160
 
161
+ # Query Pinecone
162
  results = index.query(vector=embedding, top_k=15, include_metadata=True)
163
+ context = "\n\n".join([m["metadata"]["text"] for m in results.matches if "text" in m.metadata])
164
 
165
+ # Prompt to LLM
166
  prompt = f"""You are a helpful assistant. Use the following context to answer the question:
 
167
  Context:
168
  {context}
 
169
  Question:
170
  {query}
 
171
  Answer:"""
172
 
173
  response = llm_client.chat.completions.create(
 
177
  )
178
  answer = response.choices[0].message.content
179
 
180
+ # Include source in retrieved chunks
181
+ top_matches = "\n\n".join([
182
+ f"**Rank {i+1}:**\n"
183
+ f"**Source:** {m.metadata.get('source', 'N/A')}\n"
184
+ f"**Text:** {m.metadata.get('text', '')[:500]}..."
185
+ for i, m in enumerate(results.matches)
186
+ ])
187
+
188
+ return f"### βœ… Answer:\n{answer}\n\n---\n### πŸ“„ Top Retrieved Chunks:\n{top_matches}"
189
+
190
+ # Gradio UI
191
+ with gr.Blocks(title="QWEN vs COHERE RAG App") as iface:
192
+ gr.Markdown("## πŸ“š QWEN vs COHERE RAG App")
193
+ gr.Markdown("Ask a question and retrieve contextual answers from your embedded documents.\n"
194
+ "[πŸ“ View PDF Files](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)")
195
+
196
+ with gr.Row():
197
+ query = gr.Textbox(label="Enter your query", lines=2, scale=3)
198
+ model_choice = gr.Radio(["cohere", "qwen"], label="Choose embedding model", scale=1)
199
+
200
+ output = gr.Markdown(label="RAG Response")
201
+
202
+ submit_btn = gr.Button("πŸ” Run Query")
203
+
204
+ # Add spinner and action
205
+ with gr.Row():
206
+ status = gr.Markdown("")
207
+
208
+ def wrapped_run(query, model_choice):
209
+ status.update("⏳ Running... please wait")
210
+ result = run_rag_query(query, model_choice)
211
+ status.update("βœ… Done")
212
+ return result
213
+
214
+ submit_btn.click(fn=wrapped_run, inputs=[query, model_choice], outputs=output)
215
 
216
  if __name__ == "__main__":
217
  iface.launch()