Spaces:

Rivalcoder
/

Issurance_Agent_Rag

Running

App Files Files Community

Rivalcoder commited on 7 days ago

Commit

011118e

1 Parent(s): 862446b

[Edit] Update of Caching

Browse files

Files changed (2) hide show

app.py +39 -12
llm.py +1 -0

app.py CHANGED Viewed

@@ -3,8 +3,10 @@ import warnings
 import logging
 import time
 import json
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor
 # Set up cache directory for HuggingFace models
 cache_dir = os.path.join(os.getcwd(), ".cache")
@@ -74,6 +76,13 @@ def verify_token(authorization: str = Header(None)):
 def process_batch(batch_questions, context_chunks):
     return query_gemini(batch_questions, context_chunks)
 @app.post("/api/v1/hackrx/run")
 async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
     start_time = time.time()
@@ -85,15 +94,32 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         print(f"Processing {len(request.questions)} questions...")
-        pdf_start = time.time()
-        text_chunks = parse_pdf_from_url(request.documents)
-        timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
-        print(f"Extracted {len(text_chunks)} text chunks from PDF")
-        index_start = time.time()
-        index, texts = build_faiss_index(text_chunks)
-        timing_data['faiss_index_building'] = round(time.time() - index_start, 2)
         retrieval_start = time.time()
         all_chunks = set()
         for question in request.questions:
@@ -102,6 +128,7 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
         print(f"Retrieved {len(all_chunks)} unique chunks")
         questions = request.questions
         context_chunks = list(all_chunks)
         batch_size = 10
@@ -126,8 +153,8 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         timing_data['llm_processing'] = round(time.time() - llm_start, 2)
         responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
         timing_data['total_time'] = round(time.time() - start_time, 2)
         print(f"\n=== TIMING BREAKDOWN ===")
         for k, v in timing_data.items():
             print(f"{k}: {v}s")
@@ -195,8 +222,8 @@ async def run_local_query(request: LocalQueryRequest):
         timing_data['llm_processing'] = round(time.time() - llm_start, 2)
         responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
         timing_data['total_time'] = round(time.time() - start_time, 2)
         print(f"\n=== TIMING BREAKDOWN ===")
         for k, v in timing_data.items():
             print(f"{k}: {v}s")
@@ -214,4 +241,4 @@ async def run_local_query(request: LocalQueryRequest):
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
-    uvicorn.run("app:app", host="0.0.0.0", port=port)

 import logging
 import time
 import json
+import hashlib
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor
+from threading import Lock
 # Set up cache directory for HuggingFace models
 cache_dir = os.path.join(os.getcwd(), ".cache")
 def process_batch(batch_questions, context_chunks):
     return query_gemini(batch_questions, context_chunks)
+def get_document_id_from_url(url: str) -> str:
+    return hashlib.md5(url.encode()).hexdigest()
+# Document cache with thread safety
+doc_cache = {}
+doc_cache_lock = Lock()
 @app.post("/api/v1/hackrx/run")
 async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
     start_time = time.time()
         print(f"Processing {len(request.questions)} questions...")
+        # PDF Parsing and FAISS Caching
+        doc_id = get_document_id_from_url(request.documents)
+        with doc_cache_lock:
+            if doc_id in doc_cache:
+                print("✅ Using cached document...")
+                cached = doc_cache[doc_id]
+                text_chunks = cached["chunks"]
+                index = cached["index"]
+                texts = cached["texts"]
+            else:
+                print("⚙️ Parsing and indexing new document...")
+                pdf_start = time.time()
+                text_chunks = parse_pdf_from_url(request.documents)
+                timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
+                index_start = time.time()
+                index, texts = build_faiss_index(text_chunks)
+                timing_data['faiss_index_building'] = round(time.time() - index_start, 2)
+                doc_cache[doc_id] = {
+                    "chunks": text_chunks,
+                    "index": index,
+                    "texts": texts
+                }
+        # Chunk Retrieval
         retrieval_start = time.time()
         all_chunks = set()
         for question in request.questions:
         timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
         print(f"Retrieved {len(all_chunks)} unique chunks")
+        # LLM Batch Processing
         questions = request.questions
         context_chunks = list(all_chunks)
         batch_size = 10
         timing_data['llm_processing'] = round(time.time() - llm_start, 2)
         responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
         timing_data['total_time'] = round(time.time() - start_time, 2)
         print(f"\n=== TIMING BREAKDOWN ===")
         for k, v in timing_data.items():
             print(f"{k}: {v}s")
         timing_data['llm_processing'] = round(time.time() - llm_start, 2)
         responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
         timing_data['total_time'] = round(time.time() - start_time, 2)
         print(f"\n=== TIMING BREAKDOWN ===")
         for k, v in timing_data.items():
             print(f"{k}: {v}s")
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
+    uvicorn.run("app:app", host="0.0.0.0", port=port)

llm.py CHANGED Viewed

@@ -41,6 +41,7 @@ You are an expert insurance assistant generating formal yet user-facing answers
 - Use overly robotic passive constructions like "shall be indemnified".
 - Dont Give In Message Like "Based On The Context "Or "Nothing Refered In The context" Like That Dont Give In Response Try To Give Answer For The Question Alone
 - Over-explain or give long theory answers.
 ✅ DO:
 - Write in clean, informative language.

 - Use overly robotic passive constructions like "shall be indemnified".
 - Dont Give In Message Like "Based On The Context "Or "Nothing Refered In The context" Like That Dont Give In Response Try To Give Answer For The Question Alone
 - Over-explain or give long theory answers.
+- Dont Give Directly In Answer Like "The provided information does not contain Details" or "The context does not provide information about this question." Instead, give a general answer if nothing is found in the context. If General Also Not Possible Then Give Like That (That Time Only Use That Response) "I am unable to provide an answer to this question based on the provided context. Please refer to the relevant policy documents or contact customer support for assistance."
 ✅ DO:
 - Write in clean, informative language.