Rivalcoder commited on
Commit
011118e
·
1 Parent(s): 862446b

[Edit] Update of Caching

Browse files
Files changed (2) hide show
  1. app.py +39 -12
  2. llm.py +1 -0
app.py CHANGED
@@ -3,8 +3,10 @@ import warnings
3
  import logging
4
  import time
5
  import json
 
6
  from datetime import datetime
7
  from concurrent.futures import ThreadPoolExecutor
 
8
 
9
  # Set up cache directory for HuggingFace models
10
  cache_dir = os.path.join(os.getcwd(), ".cache")
@@ -74,6 +76,13 @@ def verify_token(authorization: str = Header(None)):
74
  def process_batch(batch_questions, context_chunks):
75
  return query_gemini(batch_questions, context_chunks)
76
 
 
 
 
 
 
 
 
77
  @app.post("/api/v1/hackrx/run")
78
  async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
79
  start_time = time.time()
@@ -85,15 +94,32 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
85
 
86
  print(f"Processing {len(request.questions)} questions...")
87
 
88
- pdf_start = time.time()
89
- text_chunks = parse_pdf_from_url(request.documents)
90
- timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
91
- print(f"Extracted {len(text_chunks)} text chunks from PDF")
92
-
93
- index_start = time.time()
94
- index, texts = build_faiss_index(text_chunks)
95
- timing_data['faiss_index_building'] = round(time.time() - index_start, 2)
96
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  retrieval_start = time.time()
98
  all_chunks = set()
99
  for question in request.questions:
@@ -102,6 +128,7 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
102
  timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
103
  print(f"Retrieved {len(all_chunks)} unique chunks")
104
 
 
105
  questions = request.questions
106
  context_chunks = list(all_chunks)
107
  batch_size = 10
@@ -126,8 +153,8 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
126
  timing_data['llm_processing'] = round(time.time() - llm_start, 2)
127
 
128
  responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
129
-
130
  timing_data['total_time'] = round(time.time() - start_time, 2)
 
131
  print(f"\n=== TIMING BREAKDOWN ===")
132
  for k, v in timing_data.items():
133
  print(f"{k}: {v}s")
@@ -195,8 +222,8 @@ async def run_local_query(request: LocalQueryRequest):
195
  timing_data['llm_processing'] = round(time.time() - llm_start, 2)
196
 
197
  responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
198
-
199
  timing_data['total_time'] = round(time.time() - start_time, 2)
 
200
  print(f"\n=== TIMING BREAKDOWN ===")
201
  for k, v in timing_data.items():
202
  print(f"{k}: {v}s")
@@ -214,4 +241,4 @@ async def run_local_query(request: LocalQueryRequest):
214
 
215
  if __name__ == "__main__":
216
  port = int(os.environ.get("PORT", 7860))
217
- uvicorn.run("app:app", host="0.0.0.0", port=port)
 
3
  import logging
4
  import time
5
  import json
6
+ import hashlib
7
  from datetime import datetime
8
  from concurrent.futures import ThreadPoolExecutor
9
+ from threading import Lock
10
 
11
  # Set up cache directory for HuggingFace models
12
  cache_dir = os.path.join(os.getcwd(), ".cache")
 
76
  def process_batch(batch_questions, context_chunks):
77
  return query_gemini(batch_questions, context_chunks)
78
 
79
+ def get_document_id_from_url(url: str) -> str:
80
+ return hashlib.md5(url.encode()).hexdigest()
81
+
82
+ # Document cache with thread safety
83
+ doc_cache = {}
84
+ doc_cache_lock = Lock()
85
+
86
  @app.post("/api/v1/hackrx/run")
87
  async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
88
  start_time = time.time()
 
94
 
95
  print(f"Processing {len(request.questions)} questions...")
96
 
97
+ # PDF Parsing and FAISS Caching
98
+ doc_id = get_document_id_from_url(request.documents)
99
+ with doc_cache_lock:
100
+ if doc_id in doc_cache:
101
+ print("✅ Using cached document...")
102
+ cached = doc_cache[doc_id]
103
+ text_chunks = cached["chunks"]
104
+ index = cached["index"]
105
+ texts = cached["texts"]
106
+ else:
107
+ print("⚙️ Parsing and indexing new document...")
108
+ pdf_start = time.time()
109
+ text_chunks = parse_pdf_from_url(request.documents)
110
+ timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
111
+
112
+ index_start = time.time()
113
+ index, texts = build_faiss_index(text_chunks)
114
+ timing_data['faiss_index_building'] = round(time.time() - index_start, 2)
115
+
116
+ doc_cache[doc_id] = {
117
+ "chunks": text_chunks,
118
+ "index": index,
119
+ "texts": texts
120
+ }
121
+
122
+ # Chunk Retrieval
123
  retrieval_start = time.time()
124
  all_chunks = set()
125
  for question in request.questions:
 
128
  timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
129
  print(f"Retrieved {len(all_chunks)} unique chunks")
130
 
131
+ # LLM Batch Processing
132
  questions = request.questions
133
  context_chunks = list(all_chunks)
134
  batch_size = 10
 
153
  timing_data['llm_processing'] = round(time.time() - llm_start, 2)
154
 
155
  responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
 
156
  timing_data['total_time'] = round(time.time() - start_time, 2)
157
+
158
  print(f"\n=== TIMING BREAKDOWN ===")
159
  for k, v in timing_data.items():
160
  print(f"{k}: {v}s")
 
222
  timing_data['llm_processing'] = round(time.time() - llm_start, 2)
223
 
224
  responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
 
225
  timing_data['total_time'] = round(time.time() - start_time, 2)
226
+
227
  print(f"\n=== TIMING BREAKDOWN ===")
228
  for k, v in timing_data.items():
229
  print(f"{k}: {v}s")
 
241
 
242
  if __name__ == "__main__":
243
  port = int(os.environ.get("PORT", 7860))
244
+ uvicorn.run("app:app", host="0.0.0.0", port=port)
llm.py CHANGED
@@ -41,6 +41,7 @@ You are an expert insurance assistant generating formal yet user-facing answers
41
  - Use overly robotic passive constructions like "shall be indemnified".
42
  - Dont Give In Message Like "Based On The Context "Or "Nothing Refered In The context" Like That Dont Give In Response Try To Give Answer For The Question Alone
43
  - Over-explain or give long theory answers.
 
44
 
45
  ✅ DO:
46
  - Write in clean, informative language.
 
41
  - Use overly robotic passive constructions like "shall be indemnified".
42
  - Dont Give In Message Like "Based On The Context "Or "Nothing Refered In The context" Like That Dont Give In Response Try To Give Answer For The Question Alone
43
  - Over-explain or give long theory answers.
44
+ - Dont Give Directly In Answer Like "The provided information does not contain Details" or "The context does not provide information about this question." Instead, give a general answer if nothing is found in the context. If General Also Not Possible Then Give Like That (That Time Only Use That Response) "I am unable to provide an answer to this question based on the provided context. Please refer to the relevant policy documents or contact customer support for assistance."
45
 
46
  ✅ DO:
47
  - Write in clean, informative language.