Spaces:
Running
Running
Rivalcoder
commited on
Commit
·
011118e
1
Parent(s):
862446b
[Edit] Update of Caching
Browse files
app.py
CHANGED
@@ -3,8 +3,10 @@ import warnings
|
|
3 |
import logging
|
4 |
import time
|
5 |
import json
|
|
|
6 |
from datetime import datetime
|
7 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
8 |
|
9 |
# Set up cache directory for HuggingFace models
|
10 |
cache_dir = os.path.join(os.getcwd(), ".cache")
|
@@ -74,6 +76,13 @@ def verify_token(authorization: str = Header(None)):
|
|
74 |
def process_batch(batch_questions, context_chunks):
|
75 |
return query_gemini(batch_questions, context_chunks)
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
@app.post("/api/v1/hackrx/run")
|
78 |
async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
79 |
start_time = time.time()
|
@@ -85,15 +94,32 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
|
85 |
|
86 |
print(f"Processing {len(request.questions)} questions...")
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
retrieval_start = time.time()
|
98 |
all_chunks = set()
|
99 |
for question in request.questions:
|
@@ -102,6 +128,7 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
|
102 |
timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
|
103 |
print(f"Retrieved {len(all_chunks)} unique chunks")
|
104 |
|
|
|
105 |
questions = request.questions
|
106 |
context_chunks = list(all_chunks)
|
107 |
batch_size = 10
|
@@ -126,8 +153,8 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
|
126 |
timing_data['llm_processing'] = round(time.time() - llm_start, 2)
|
127 |
|
128 |
responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
|
129 |
-
|
130 |
timing_data['total_time'] = round(time.time() - start_time, 2)
|
|
|
131 |
print(f"\n=== TIMING BREAKDOWN ===")
|
132 |
for k, v in timing_data.items():
|
133 |
print(f"{k}: {v}s")
|
@@ -195,8 +222,8 @@ async def run_local_query(request: LocalQueryRequest):
|
|
195 |
timing_data['llm_processing'] = round(time.time() - llm_start, 2)
|
196 |
|
197 |
responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
|
198 |
-
|
199 |
timing_data['total_time'] = round(time.time() - start_time, 2)
|
|
|
200 |
print(f"\n=== TIMING BREAKDOWN ===")
|
201 |
for k, v in timing_data.items():
|
202 |
print(f"{k}: {v}s")
|
@@ -214,4 +241,4 @@ async def run_local_query(request: LocalQueryRequest):
|
|
214 |
|
215 |
if __name__ == "__main__":
|
216 |
port = int(os.environ.get("PORT", 7860))
|
217 |
-
uvicorn.run("app:app", host="0.0.0.0", port=port)
|
|
|
3 |
import logging
|
4 |
import time
|
5 |
import json
|
6 |
+
import hashlib
|
7 |
from datetime import datetime
|
8 |
from concurrent.futures import ThreadPoolExecutor
|
9 |
+
from threading import Lock
|
10 |
|
11 |
# Set up cache directory for HuggingFace models
|
12 |
cache_dir = os.path.join(os.getcwd(), ".cache")
|
|
|
76 |
def process_batch(batch_questions, context_chunks):
|
77 |
return query_gemini(batch_questions, context_chunks)
|
78 |
|
79 |
+
def get_document_id_from_url(url: str) -> str:
|
80 |
+
return hashlib.md5(url.encode()).hexdigest()
|
81 |
+
|
82 |
+
# Document cache with thread safety
|
83 |
+
doc_cache = {}
|
84 |
+
doc_cache_lock = Lock()
|
85 |
+
|
86 |
@app.post("/api/v1/hackrx/run")
|
87 |
async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
88 |
start_time = time.time()
|
|
|
94 |
|
95 |
print(f"Processing {len(request.questions)} questions...")
|
96 |
|
97 |
+
# PDF Parsing and FAISS Caching
|
98 |
+
doc_id = get_document_id_from_url(request.documents)
|
99 |
+
with doc_cache_lock:
|
100 |
+
if doc_id in doc_cache:
|
101 |
+
print("✅ Using cached document...")
|
102 |
+
cached = doc_cache[doc_id]
|
103 |
+
text_chunks = cached["chunks"]
|
104 |
+
index = cached["index"]
|
105 |
+
texts = cached["texts"]
|
106 |
+
else:
|
107 |
+
print("⚙️ Parsing and indexing new document...")
|
108 |
+
pdf_start = time.time()
|
109 |
+
text_chunks = parse_pdf_from_url(request.documents)
|
110 |
+
timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
|
111 |
+
|
112 |
+
index_start = time.time()
|
113 |
+
index, texts = build_faiss_index(text_chunks)
|
114 |
+
timing_data['faiss_index_building'] = round(time.time() - index_start, 2)
|
115 |
+
|
116 |
+
doc_cache[doc_id] = {
|
117 |
+
"chunks": text_chunks,
|
118 |
+
"index": index,
|
119 |
+
"texts": texts
|
120 |
+
}
|
121 |
+
|
122 |
+
# Chunk Retrieval
|
123 |
retrieval_start = time.time()
|
124 |
all_chunks = set()
|
125 |
for question in request.questions:
|
|
|
128 |
timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
|
129 |
print(f"Retrieved {len(all_chunks)} unique chunks")
|
130 |
|
131 |
+
# LLM Batch Processing
|
132 |
questions = request.questions
|
133 |
context_chunks = list(all_chunks)
|
134 |
batch_size = 10
|
|
|
153 |
timing_data['llm_processing'] = round(time.time() - llm_start, 2)
|
154 |
|
155 |
responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
|
|
|
156 |
timing_data['total_time'] = round(time.time() - start_time, 2)
|
157 |
+
|
158 |
print(f"\n=== TIMING BREAKDOWN ===")
|
159 |
for k, v in timing_data.items():
|
160 |
print(f"{k}: {v}s")
|
|
|
222 |
timing_data['llm_processing'] = round(time.time() - llm_start, 2)
|
223 |
|
224 |
responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
|
|
|
225 |
timing_data['total_time'] = round(time.time() - start_time, 2)
|
226 |
+
|
227 |
print(f"\n=== TIMING BREAKDOWN ===")
|
228 |
for k, v in timing_data.items():
|
229 |
print(f"{k}: {v}s")
|
|
|
241 |
|
242 |
if __name__ == "__main__":
|
243 |
port = int(os.environ.get("PORT", 7860))
|
244 |
+
uvicorn.run("app:app", host="0.0.0.0", port=port)
|
llm.py
CHANGED
@@ -41,6 +41,7 @@ You are an expert insurance assistant generating formal yet user-facing answers
|
|
41 |
- Use overly robotic passive constructions like "shall be indemnified".
|
42 |
- Dont Give In Message Like "Based On The Context "Or "Nothing Refered In The context" Like That Dont Give In Response Try To Give Answer For The Question Alone
|
43 |
- Over-explain or give long theory answers.
|
|
|
44 |
|
45 |
✅ DO:
|
46 |
- Write in clean, informative language.
|
|
|
41 |
- Use overly robotic passive constructions like "shall be indemnified".
|
42 |
- Dont Give In Message Like "Based On The Context "Or "Nothing Refered In The context" Like That Dont Give In Response Try To Give Answer For The Question Alone
|
43 |
- Over-explain or give long theory answers.
|
44 |
+
- Dont Give Directly In Answer Like "The provided information does not contain Details" or "The context does not provide information about this question." Instead, give a general answer if nothing is found in the context. If General Also Not Possible Then Give Like That (That Time Only Use That Response) "I am unable to provide an answer to this question based on the provided context. Please refer to the relevant policy documents or contact customer support for assistance."
|
45 |
|
46 |
✅ DO:
|
47 |
- Write in clean, informative language.
|