Spaces:

mouryachinta
/

gwen-cohere

Running

App Files Files Community

mouryachinta commited on Jun 13

Commit

afddbb2

verified ·

1 Parent(s): ca0b76e

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -22

app.py CHANGED Viewed

@@ -1,3 +1,104 @@
 import gradio as gr
 from pinecone import Pinecone
 from sentence_transformers import SentenceTransformer
@@ -15,8 +116,6 @@ AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
 # Pinecone Setup
 EMBED_INDEXES = {
     "cohere": {
@@ -46,6 +145,7 @@ def run_rag_query(query: str, model_choice: str) -> str:
     index_config = EMBED_INDEXES[model_choice]
     index = pc.Index(index_config["name"])
     if model_choice == "cohere":
         co = cohere.Client(COHERE_API_KEY)
         embedding = co.embed(
@@ -58,17 +158,16 @@ def run_rag_query(query: str, model_choice: str) -> str:
         model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
         embedding = model.encode([query], prompt_name="query")[0].tolist()
     results = index.query(vector=embedding, top_k=15, include_metadata=True)
-    context = "\n\n".join([m["metadata"]["text"] for m in results.matches])
     prompt = f"""You are a helpful assistant. Use the following context to answer the question:
 Context:
 {context}
 Question:
 {query}
 Answer:"""
     response = llm_client.chat.completions.create(
@@ -78,22 +177,41 @@ Answer:"""
     )
     answer = response.choices[0].message.content
-    top_matches = "\n\n".join(
-        [f"Rank {i+1}: {m.metadata['text'][:200]}" for i, m in enumerate(results.matches)]
-    )
-    return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"
-iface = gr.Interface(
-    fn=run_rag_query,
-    inputs=[
-        gr.Textbox(label="Enter your query"),
-        gr.Radio(["cohere", "qwen"], label="Choose embedding model")
-    ],
-    outputs=gr.Markdown(label="RAG Response"),
-    title="QWEN vs COHERE RAG App",
-    description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
-)
 if __name__ == "__main__":
     iface.launch()

+# import gradio as gr
+# from pinecone import Pinecone
+# from sentence_transformers import SentenceTransformer
+# from openai import AzureOpenAI
+# from huggingface_hub import login as hf_login
+# import cohere
+# import os
+# from dotenv import load_dotenv
+# load_dotenv()  # Load keys from .env file
+# # === ENVIRONMENT VARIABLES ===
+# AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
+# AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
+# COHERE_API_KEY = os.getenv("COHERE_API_KEY")
+# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+# # Pinecone Setup
+# EMBED_INDEXES = {
+#     "cohere": {
+#         "name": "cohere-pdf-index",
+#         "dimension": 1536,
+#         "region": "us-east-1"
+#     },
+#     "qwen": {
+#         "name": "gwen-embeddings",
+#         "dimension": 1024,
+#         "region": "us-west-2"
+#     }
+# }
+# pc = Pinecone(api_key=PINECONE_API_KEY)
+# # Azure OpenAI Client
+# llm_client = AzureOpenAI(
+#     api_key=AZURE_OPENAI_KEY,
+#     api_version="2024-12-01-preview",
+#     azure_endpoint=AZURE_OPENAI_ENDPOINT
+# )
+# def run_rag_query(query: str, model_choice: str) -> str:
+#     if model_choice not in EMBED_INDEXES:
+#         return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}"
+#     index_config = EMBED_INDEXES[model_choice]
+#     index = pc.Index(index_config["name"])
+#     if model_choice == "cohere":
+#         co = cohere.Client(COHERE_API_KEY)
+#         embedding = co.embed(
+#             model="embed-v4.0",
+#             texts=[query],
+#             input_type="search_query",
+#             truncate="NONE"
+#         ).embeddings[0]
+#     else:  # qwen
+#         model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
+#         embedding = model.encode([query], prompt_name="query")[0].tolist()
+#     results = index.query(vector=embedding, top_k=15, include_metadata=True)
+#     context = "\n\n".join([m["metadata"]["text"] for m in results.matches])
+#     prompt = f"""You are a helpful assistant. Use the following context to answer the question:
+# Context:
+# {context}
+# Question:
+# {query}
+# Answer:"""
+#     response = llm_client.chat.completions.create(
+#         model="gpt-4o-mini",
+#         messages=[{"role": "user", "content": prompt}],
+#         temperature=0.3
+#     )
+#     answer = response.choices[0].message.content
+#     top_matches = "\n\n".join(
+#         [f"Rank {i+1}: {m.metadata['text'][:200]}" for i, m in enumerate(results.matches)]
+#     )
+#     return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"
+# iface = gr.Interface(
+#     fn=run_rag_query,
+#     inputs=[
+#         gr.Textbox(label="Enter your query"),
+#         gr.Radio(["cohere", "qwen"], label="Choose embedding model")
+#     ],
+#     outputs=gr.Markdown(label="RAG Response"),
+#     title="QWEN vs COHERE RAG App",
+#     description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
+# )
+# if __name__ == "__main__":
+#     iface.launch()
 import gradio as gr
 from pinecone import Pinecone
 from sentence_transformers import SentenceTransformer
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
 # Pinecone Setup
 EMBED_INDEXES = {
     "cohere": {
     index_config = EMBED_INDEXES[model_choice]
     index = pc.Index(index_config["name"])
+    # Get embedding
     if model_choice == "cohere":
         co = cohere.Client(COHERE_API_KEY)
         embedding = co.embed(
         model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
         embedding = model.encode([query], prompt_name="query")[0].tolist()
+    # Query Pinecone
     results = index.query(vector=embedding, top_k=15, include_metadata=True)
+    context = "\n\n".join([m["metadata"]["text"] for m in results.matches if "text" in m.metadata])
+    # Prompt to LLM
     prompt = f"""You are a helpful assistant. Use the following context to answer the question:
 Context:
 {context}
 Question:
 {query}
 Answer:"""
     response = llm_client.chat.completions.create(
     )
     answer = response.choices[0].message.content
+    # Include source in retrieved chunks
+    top_matches = "\n\n".join([
+        f"**Rank {i+1}:**\n"
+        f"**Source:** {m.metadata.get('source', 'N/A')}\n"
+        f"**Text:** {m.metadata.get('text', '')[:500]}..."
+        for i, m in enumerate(results.matches)
+    ])
+    return f"### ✅ Answer:\n{answer}\n\n---\n### 📄 Top Retrieved Chunks:\n{top_matches}"
+# Gradio UI
+with gr.Blocks(title="QWEN vs COHERE RAG App") as iface:
+    gr.Markdown("## 📚 QWEN vs COHERE RAG App")
+    gr.Markdown("Ask a question and retrieve contextual answers from your embedded documents.\n"
+                "[📁 View PDF Files](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)")
+    with gr.Row():
+        query = gr.Textbox(label="Enter your query", lines=2, scale=3)
+        model_choice = gr.Radio(["cohere", "qwen"], label="Choose embedding model", scale=1)
+    output = gr.Markdown(label="RAG Response")
+    submit_btn = gr.Button("🔍 Run Query")
+    # Add spinner and action
+    with gr.Row():
+        status = gr.Markdown("")
+    def wrapped_run(query, model_choice):
+        status.update("⏳ Running... please wait")
+        result = run_rag_query(query, model_choice)
+        status.update("✅ Done")
+        return result
+    submit_btn.click(fn=wrapped_run, inputs=[query, model_choice], outputs=output)
 if __name__ == "__main__":
     iface.launch()