Rivalcoder commited on
Commit
6bc8549
·
1 Parent(s): ea49415

Update The Model issues and Prompt

Browse files
Files changed (6) hide show
  1. app.py +22 -20
  2. embedder.py +3 -30
  3. llm.py +3 -52
  4. main.py +22 -20
  5. parser.py +0 -21
  6. retriever.py +0 -22
app.py CHANGED
@@ -81,6 +81,11 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
81
  timing_data = {}
82
 
83
  try:
 
 
 
 
 
84
  print(f"Processing {len(request.questions)} questions...")
85
 
86
  # Time PDF parsing
@@ -88,7 +93,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
88
  text_chunks = parse_pdf_from_url(request.documents)
89
  pdf_time = time.time() - pdf_start
90
  timing_data['pdf_parsing'] = round(pdf_time, 2)
91
- print(f"PDF Parsing took: {pdf_time:.2f} seconds")
92
  print(f"Extracted {len(text_chunks)} text chunks from PDF")
93
 
94
  # Time FAISS index building
@@ -96,7 +100,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
96
  index, texts = build_faiss_index(text_chunks)
97
  index_time = time.time() - index_start
98
  timing_data['faiss_index_building'] = round(index_time, 2)
99
- print(f"FAISS Index Building took: {index_time:.2f} seconds")
100
 
101
  # Time chunk retrieval for all questions
102
  retrieval_start = time.time()
@@ -105,12 +108,10 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
105
  question_start = time.time()
106
  top_chunks = retrieve_chunks(index, texts, question)
107
  question_time = time.time() - question_start
108
- print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
109
  all_chunks.update(top_chunks)
110
 
111
  retrieval_time = time.time() - retrieval_start
112
  timing_data['chunk_retrieval'] = round(retrieval_time, 2)
113
- print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
114
  print(f"Retrieved {len(all_chunks)} unique chunks")
115
 
116
  # Time LLM processing
@@ -119,7 +120,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
119
  response = query_gemini(request.questions, list(all_chunks))
120
  llm_time = time.time() - llm_start
121
  timing_data['llm_processing'] = round(llm_time, 2)
122
- print(f"LLM Processing took: {llm_time:.2f} seconds")
123
 
124
  # Time response processing
125
  response_start = time.time()
@@ -140,13 +140,11 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
140
 
141
  response_time = time.time() - response_start
142
  timing_data['response_processing'] = round(response_time, 2)
143
- print(f"Response Processing took: {response_time:.2f} seconds")
144
  print(f"Generated {len(answers)} answers")
145
 
146
  # Calculate total time
147
  total_time = time.time() - start_time
148
  timing_data['total_time'] = round(total_time, 2)
149
- timing_data['timestamp'] = datetime.now().isoformat()
150
 
151
  print(f"\n=== TIMING BREAKDOWN ===")
152
  print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
@@ -157,9 +155,12 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
157
  print(f"TOTAL TIME: {timing_data['total_time']}s")
158
  print(f"=======================\n")
159
 
160
- return {
161
- "answers": answers
162
- }
 
 
 
163
 
164
  except Exception as e:
165
  total_time = time.time() - start_time
@@ -172,6 +173,11 @@ async def run_local_query(request: LocalQueryRequest):
172
  timing_data = {}
173
 
174
  try:
 
 
 
 
 
175
  print(f"Processing local document: {request.document_path}")
176
  print(f"Processing {len(request.questions)} questions...")
177
 
@@ -180,7 +186,6 @@ async def run_local_query(request: LocalQueryRequest):
180
  text_chunks = parse_pdf_from_file(request.document_path)
181
  pdf_time = time.time() - pdf_start
182
  timing_data['pdf_parsing'] = round(pdf_time, 2)
183
- print(f"Local PDF Parsing took: {pdf_time:.2f} seconds")
184
  print(f"Extracted {len(text_chunks)} text chunks from local PDF")
185
 
186
  # Time FAISS index building
@@ -188,7 +193,6 @@ async def run_local_query(request: LocalQueryRequest):
188
  index, texts = build_faiss_index(text_chunks)
189
  index_time = time.time() - index_start
190
  timing_data['faiss_index_building'] = round(index_time, 2)
191
- print(f"FAISS Index Building took: {index_time:.2f} seconds")
192
 
193
  # Time chunk retrieval for all questions
194
  retrieval_start = time.time()
@@ -197,12 +201,10 @@ async def run_local_query(request: LocalQueryRequest):
197
  question_start = time.time()
198
  top_chunks = retrieve_chunks(index, texts, question)
199
  question_time = time.time() - question_start
200
- print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
201
  all_chunks.update(top_chunks)
202
 
203
  retrieval_time = time.time() - retrieval_start
204
  timing_data['chunk_retrieval'] = round(retrieval_time, 2)
205
- print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
206
  print(f"Retrieved {len(all_chunks)} unique chunks")
207
 
208
  # Time LLM processing
@@ -211,7 +213,6 @@ async def run_local_query(request: LocalQueryRequest):
211
  response = query_gemini(request.questions, list(all_chunks))
212
  llm_time = time.time() - llm_start
213
  timing_data['llm_processing'] = round(llm_time, 2)
214
- print(f"LLM Processing took: {llm_time:.2f} seconds")
215
 
216
  # Time response processing
217
  response_start = time.time()
@@ -232,13 +233,11 @@ async def run_local_query(request: LocalQueryRequest):
232
 
233
  response_time = time.time() - response_start
234
  timing_data['response_processing'] = round(response_time, 2)
235
- print(f"Response Processing took: {response_time:.2f} seconds")
236
  print(f"Generated {len(answers)} answers")
237
 
238
  # Calculate total time
239
  total_time = time.time() - start_time
240
  timing_data['total_time'] = round(total_time, 2)
241
- timing_data['timestamp'] = datetime.now().isoformat()
242
 
243
  print(f"\n=== TIMING BREAKDOWN ===")
244
  print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
@@ -249,9 +248,12 @@ async def run_local_query(request: LocalQueryRequest):
249
  print(f"TOTAL TIME: {timing_data['total_time']}s")
250
  print(f"=======================\n")
251
 
252
- return {
253
- "answers": answers
254
- }
 
 
 
255
 
256
  except Exception as e:
257
  total_time = time.time() - start_time
 
81
  timing_data = {}
82
 
83
  try:
84
+ print(f"\n=== INPUT JSON ===")
85
+ print(f"Documents: {request.documents}")
86
+ print(f"Questions: {request.questions}")
87
+ print(f"==================\n")
88
+
89
  print(f"Processing {len(request.questions)} questions...")
90
 
91
  # Time PDF parsing
 
93
  text_chunks = parse_pdf_from_url(request.documents)
94
  pdf_time = time.time() - pdf_start
95
  timing_data['pdf_parsing'] = round(pdf_time, 2)
 
96
  print(f"Extracted {len(text_chunks)} text chunks from PDF")
97
 
98
  # Time FAISS index building
 
100
  index, texts = build_faiss_index(text_chunks)
101
  index_time = time.time() - index_start
102
  timing_data['faiss_index_building'] = round(index_time, 2)
 
103
 
104
  # Time chunk retrieval for all questions
105
  retrieval_start = time.time()
 
108
  question_start = time.time()
109
  top_chunks = retrieve_chunks(index, texts, question)
110
  question_time = time.time() - question_start
 
111
  all_chunks.update(top_chunks)
112
 
113
  retrieval_time = time.time() - retrieval_start
114
  timing_data['chunk_retrieval'] = round(retrieval_time, 2)
 
115
  print(f"Retrieved {len(all_chunks)} unique chunks")
116
 
117
  # Time LLM processing
 
120
  response = query_gemini(request.questions, list(all_chunks))
121
  llm_time = time.time() - llm_start
122
  timing_data['llm_processing'] = round(llm_time, 2)
 
123
 
124
  # Time response processing
125
  response_start = time.time()
 
140
 
141
  response_time = time.time() - response_start
142
  timing_data['response_processing'] = round(response_time, 2)
 
143
  print(f"Generated {len(answers)} answers")
144
 
145
  # Calculate total time
146
  total_time = time.time() - start_time
147
  timing_data['total_time'] = round(total_time, 2)
 
148
 
149
  print(f"\n=== TIMING BREAKDOWN ===")
150
  print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
 
155
  print(f"TOTAL TIME: {timing_data['total_time']}s")
156
  print(f"=======================\n")
157
 
158
+ result = {"answers": answers}
159
+ print(f"=== OUTPUT JSON ===")
160
+ print(f"{result}")
161
+ print(f"==================\n")
162
+
163
+ return result
164
 
165
  except Exception as e:
166
  total_time = time.time() - start_time
 
173
  timing_data = {}
174
 
175
  try:
176
+ print(f"\n=== INPUT JSON ===")
177
+ print(f"Document Path: {request.document_path}")
178
+ print(f"Questions: {request.questions}")
179
+ print(f"==================\n")
180
+
181
  print(f"Processing local document: {request.document_path}")
182
  print(f"Processing {len(request.questions)} questions...")
183
 
 
186
  text_chunks = parse_pdf_from_file(request.document_path)
187
  pdf_time = time.time() - pdf_start
188
  timing_data['pdf_parsing'] = round(pdf_time, 2)
 
189
  print(f"Extracted {len(text_chunks)} text chunks from local PDF")
190
 
191
  # Time FAISS index building
 
193
  index, texts = build_faiss_index(text_chunks)
194
  index_time = time.time() - index_start
195
  timing_data['faiss_index_building'] = round(index_time, 2)
 
196
 
197
  # Time chunk retrieval for all questions
198
  retrieval_start = time.time()
 
201
  question_start = time.time()
202
  top_chunks = retrieve_chunks(index, texts, question)
203
  question_time = time.time() - question_start
 
204
  all_chunks.update(top_chunks)
205
 
206
  retrieval_time = time.time() - retrieval_start
207
  timing_data['chunk_retrieval'] = round(retrieval_time, 2)
 
208
  print(f"Retrieved {len(all_chunks)} unique chunks")
209
 
210
  # Time LLM processing
 
213
  response = query_gemini(request.questions, list(all_chunks))
214
  llm_time = time.time() - llm_start
215
  timing_data['llm_processing'] = round(llm_time, 2)
 
216
 
217
  # Time response processing
218
  response_start = time.time()
 
233
 
234
  response_time = time.time() - response_start
235
  timing_data['response_processing'] = round(response_time, 2)
 
236
  print(f"Generated {len(answers)} answers")
237
 
238
  # Calculate total time
239
  total_time = time.time() - start_time
240
  timing_data['total_time'] = round(total_time, 2)
 
241
 
242
  print(f"\n=== TIMING BREAKDOWN ===")
243
  print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
 
248
  print(f"TOTAL TIME: {timing_data['total_time']}s")
249
  print(f"=======================\n")
250
 
251
+ result = {"answers": answers}
252
+ print(f"=== OUTPUT JSON ===")
253
+ print(f"{result}")
254
+ print(f"==================\n")
255
+
256
+ return result
257
 
258
  except Exception as e:
259
  total_time = time.time() - start_time
embedder.py CHANGED
@@ -2,7 +2,6 @@ import faiss
2
  from sentence_transformers import SentenceTransformer
3
  import numpy as np
4
  import os
5
- import time
6
 
7
  # Set up cache directory in a writable location
8
  cache_dir = os.path.join(os.getcwd(), ".cache")
@@ -17,19 +16,16 @@ def preload_model():
17
  """Preload the sentence transformer model at startup"""
18
  global _model
19
  if _model is None:
20
- model_start = time.time()
21
  print("Preloading sentence transformer model...")
22
  try:
23
  _model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
24
- model_time = time.time() - model_start
25
- print(f"Model preloading completed in {model_time:.2f} seconds")
26
  except Exception as e:
27
  print(f"Error loading model: {e}")
28
  # Fallback to a different model if the first one fails
29
  try:
30
  _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
31
- model_time = time.time() - model_start
32
- print(f"Fallback model preloading completed in {model_time:.2f} seconds")
33
  except Exception as e2:
34
  print(f"Error loading fallback model: {e2}")
35
  raise
@@ -39,37 +35,14 @@ def get_model():
39
  """Get the sentence transformer model, loading it lazily if needed"""
40
  global _model
41
  if _model is None:
42
- # If model is not preloaded, load it now (should not happen in production)
43
  print("Warning: Model not preloaded, loading now...")
44
  return preload_model()
45
  return _model
46
 
47
  def build_faiss_index(chunks):
48
- start_time = time.time()
49
- print(f"Building FAISS index for {len(chunks)} chunks...")
50
-
51
- # Time model retrieval (should be instant now)
52
- model_start = time.time()
53
  model = get_model()
54
- model_time = time.time() - model_start
55
- print(f"Model retrieval took: {model_time:.3f} seconds")
56
-
57
- # Time embedding generation
58
- embed_start = time.time()
59
  embeddings = model.encode(chunks)
60
- embed_time = time.time() - embed_start
61
- print(f"Embedding generation took: {embed_time:.2f} seconds")
62
- print(f"Generated embeddings shape: {embeddings.shape}")
63
-
64
- # Time FAISS index creation
65
- index_start = time.time()
66
  dimension = embeddings.shape[1]
67
  index = faiss.IndexFlatL2(dimension)
68
  index.add(np.array(embeddings))
69
- index_time = time.time() - index_start
70
- print(f"FAISS index creation took: {index_time:.2f} seconds")
71
-
72
- total_time = time.time() - start_time
73
- print(f"Total FAISS index building took: {total_time:.2f} seconds")
74
-
75
- return index, chunks
 
2
  from sentence_transformers import SentenceTransformer
3
  import numpy as np
4
  import os
 
5
 
6
  # Set up cache directory in a writable location
7
  cache_dir = os.path.join(os.getcwd(), ".cache")
 
16
  """Preload the sentence transformer model at startup"""
17
  global _model
18
  if _model is None:
 
19
  print("Preloading sentence transformer model...")
20
  try:
21
  _model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
22
+ print("Model preloading completed")
 
23
  except Exception as e:
24
  print(f"Error loading model: {e}")
25
  # Fallback to a different model if the first one fails
26
  try:
27
  _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
28
+ print("Fallback model preloading completed")
 
29
  except Exception as e2:
30
  print(f"Error loading fallback model: {e2}")
31
  raise
 
35
  """Get the sentence transformer model, loading it lazily if needed"""
36
  global _model
37
  if _model is None:
 
38
  print("Warning: Model not preloaded, loading now...")
39
  return preload_model()
40
  return _model
41
 
42
  def build_faiss_index(chunks):
 
 
 
 
 
43
  model = get_model()
 
 
 
 
 
44
  embeddings = model.encode(chunks)
 
 
 
 
 
 
45
  dimension = embeddings.shape[1]
46
  index = faiss.IndexFlatL2(dimension)
47
  index.add(np.array(embeddings))
48
+ return index, chunks
 
 
 
 
 
 
llm.py CHANGED
@@ -1,7 +1,6 @@
1
  import google.generativeai as genai
2
  import os
3
  import json
4
- import time
5
  from dotenv import load_dotenv
6
  load_dotenv()
7
 
@@ -13,22 +12,9 @@ print(f"Google API Key loaded: {api_key[:10]}..." if api_key else "No API key fo
13
  genai.configure(api_key=api_key)
14
 
15
  def query_gemini(questions, contexts):
16
- start_time = time.time()
17
- print(f"Starting LLM processing for {len(questions)} questions with {len(contexts)} context chunks")
18
-
19
  try:
20
- # Time context preparation
21
- context_start = time.time()
22
  context = "\n\n".join(contexts)
23
- context_time = time.time() - context_start
24
- print(f"Context preparation took: {context_time:.2f} seconds")
25
- print(f"Total context length: {len(context)} characters")
26
-
27
- # Time prompt preparation
28
- prompt_start = time.time()
29
- # Create a numbered list of questions
30
  questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
31
-
32
  prompt = f"""
33
  You are a skilled insurance policy assistant. Based only on the provided context, answer each question clearly and briefly.
34
 
@@ -69,54 +55,19 @@ Respond in the exact JSON format below — no extra text or explanations.
69
  Your task: Answer each question concisely and professionally. Use plain phrasing, stay within 1–2 clear sentences, and avoid unnecessary detail or repetition.
70
  """
71
 
72
-
73
-
74
-
75
-
76
- prompt_time = time.time() - prompt_start
77
- print(f"Prompt preparation took: {prompt_time:.2f} seconds")
78
- print(f"Total prompt length: {len(prompt)} characters")
79
-
80
- # Time model initialization and API call
81
- api_start = time.time()
82
- model = genai.GenerativeModel('gemini-2.0-flash-exp')
83
  response = model.generate_content(prompt)
84
- api_time = time.time() - api_start
85
- print(f"Gemini API call took: {api_time:.2f} seconds")
86
-
87
- # Time response processing
88
- process_start = time.time()
89
  response_text = response.text.strip()
90
- print(f"Raw response length: {len(response_text)} characters")
91
-
92
- # Try to parse the response as JSON
93
  try:
94
- # Remove any markdown code blocks if present
95
  if response_text.startswith("```json"):
96
  response_text = response_text.replace("```json", "").replace("```", "").strip()
97
  elif response_text.startswith("```"):
98
  response_text = response_text.replace("```", "").strip()
99
-
100
  parsed_response = json.loads(response_text)
101
- process_time = time.time() - process_start
102
- print(f"Response processing took: {process_time:.2f} seconds")
103
-
104
- total_time = time.time() - start_time
105
- print(f"Total LLM processing took: {total_time:.2f} seconds")
106
-
107
  return parsed_response
108
  except json.JSONDecodeError:
109
- # If JSON parsing fails, return a structured response
110
- process_time = time.time() - process_start
111
- print(f"Response processing took: {process_time:.2f} seconds (JSON parsing failed)")
112
  print(f"Failed to parse JSON response: {response_text}")
113
-
114
- total_time = time.time() - start_time
115
- print(f"Total LLM processing took: {total_time:.2f} seconds")
116
-
117
  return {"answers": ["Error parsing response"] * len(questions)}
118
-
119
  except Exception as e:
120
- total_time = time.time() - start_time
121
- print(f"Error in query_gemini after {total_time:.2f} seconds: {str(e)}")
122
- return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}
 
1
  import google.generativeai as genai
2
  import os
3
  import json
 
4
  from dotenv import load_dotenv
5
  load_dotenv()
6
 
 
12
  genai.configure(api_key=api_key)
13
 
14
  def query_gemini(questions, contexts):
 
 
 
15
  try:
 
 
16
  context = "\n\n".join(contexts)
 
 
 
 
 
 
 
17
  questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
 
18
  prompt = f"""
19
  You are a skilled insurance policy assistant. Based only on the provided context, answer each question clearly and briefly.
20
 
 
55
  Your task: Answer each question concisely and professionally. Use plain phrasing, stay within 1–2 clear sentences, and avoid unnecessary detail or repetition.
56
  """
57
 
58
+ model = genai.GenerativeModel('gemini-2.5-flash')
 
 
 
 
 
 
 
 
 
 
59
  response = model.generate_content(prompt)
 
 
 
 
 
60
  response_text = response.text.strip()
 
 
 
61
  try:
 
62
  if response_text.startswith("```json"):
63
  response_text = response_text.replace("```json", "").replace("```", "").strip()
64
  elif response_text.startswith("```"):
65
  response_text = response_text.replace("```", "").strip()
 
66
  parsed_response = json.loads(response_text)
 
 
 
 
 
 
67
  return parsed_response
68
  except json.JSONDecodeError:
 
 
 
69
  print(f"Failed to parse JSON response: {response_text}")
 
 
 
 
70
  return {"answers": ["Error parsing response"] * len(questions)}
 
71
  except Exception as e:
72
+ print(f"Error in query_gemini: {str(e)}")
73
+ return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}
 
main.py CHANGED
@@ -75,6 +75,11 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
75
  timing_data = {}
76
 
77
  try:
 
 
 
 
 
78
  print(f"Processing {len(request.questions)} questions...")
79
 
80
  # Time PDF parsing
@@ -82,7 +87,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
82
  text_chunks = parse_pdf_from_url(request.documents)
83
  pdf_time = time.time() - pdf_start
84
  timing_data['pdf_parsing'] = round(pdf_time, 2)
85
- print(f"PDF Parsing took: {pdf_time:.2f} seconds")
86
  print(f"Extracted {len(text_chunks)} text chunks from PDF")
87
 
88
  # Time FAISS index building
@@ -90,7 +94,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
90
  index, texts = build_faiss_index(text_chunks)
91
  index_time = time.time() - index_start
92
  timing_data['faiss_index_building'] = round(index_time, 2)
93
- print(f"FAISS Index Building took: {index_time:.2f} seconds")
94
 
95
  # Time chunk retrieval for all questions
96
  retrieval_start = time.time()
@@ -99,12 +102,10 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
99
  question_start = time.time()
100
  top_chunks = retrieve_chunks(index, texts, question)
101
  question_time = time.time() - question_start
102
- print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
103
  all_chunks.update(top_chunks)
104
 
105
  retrieval_time = time.time() - retrieval_start
106
  timing_data['chunk_retrieval'] = round(retrieval_time, 2)
107
- print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
108
  print(f"Retrieved {len(all_chunks)} unique chunks")
109
 
110
  # Time LLM processing
@@ -113,7 +114,6 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
113
  response = query_gemini(request.questions, list(all_chunks))
114
  llm_time = time.time() - llm_start
115
  timing_data['llm_processing'] = round(llm_time, 2)
116
- print(f"LLM Processing took: {llm_time:.2f} seconds")
117
 
118
  # Time response processing
119
  response_start = time.time()
@@ -134,13 +134,11 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
134
 
135
  response_time = time.time() - response_start
136
  timing_data['response_processing'] = round(response_time, 2)
137
- print(f"Response Processing took: {response_time:.2f} seconds")
138
  print(f"Generated {len(answers)} answers")
139
 
140
  # Calculate total time
141
  total_time = time.time() - start_time
142
  timing_data['total_time'] = round(total_time, 2)
143
- timing_data['timestamp'] = datetime.now().isoformat()
144
 
145
  print(f"\n=== TIMING BREAKDOWN ===")
146
  print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
@@ -151,9 +149,12 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
151
  print(f"TOTAL TIME: {timing_data['total_time']}s")
152
  print(f"=======================\n")
153
 
154
- return {
155
- "answers": answers
156
- }
 
 
 
157
 
158
  except Exception as e:
159
  total_time = time.time() - start_time
@@ -166,6 +167,11 @@ async def run_local_query(request: LocalQueryRequest):
166
  timing_data = {}
167
 
168
  try:
 
 
 
 
 
169
  print(f"Processing local document: {request.document_path}")
170
  print(f"Processing {len(request.questions)} questions...")
171
 
@@ -174,7 +180,6 @@ async def run_local_query(request: LocalQueryRequest):
174
  text_chunks = parse_pdf_from_file(request.document_path)
175
  pdf_time = time.time() - pdf_start
176
  timing_data['pdf_parsing'] = round(pdf_time, 2)
177
- print(f"Local PDF Parsing took: {pdf_time:.2f} seconds")
178
  print(f"Extracted {len(text_chunks)} text chunks from local PDF")
179
 
180
  # Time FAISS index building
@@ -182,7 +187,6 @@ async def run_local_query(request: LocalQueryRequest):
182
  index, texts = build_faiss_index(text_chunks)
183
  index_time = time.time() - index_start
184
  timing_data['faiss_index_building'] = round(index_time, 2)
185
- print(f"FAISS Index Building took: {index_time:.2f} seconds")
186
 
187
  # Time chunk retrieval for all questions
188
  retrieval_start = time.time()
@@ -191,12 +195,10 @@ async def run_local_query(request: LocalQueryRequest):
191
  question_start = time.time()
192
  top_chunks = retrieve_chunks(index, texts, question)
193
  question_time = time.time() - question_start
194
- print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
195
  all_chunks.update(top_chunks)
196
 
197
  retrieval_time = time.time() - retrieval_start
198
  timing_data['chunk_retrieval'] = round(retrieval_time, 2)
199
- print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
200
  print(f"Retrieved {len(all_chunks)} unique chunks")
201
 
202
  # Time LLM processing
@@ -205,7 +207,6 @@ async def run_local_query(request: LocalQueryRequest):
205
  response = query_gemini(request.questions, list(all_chunks))
206
  llm_time = time.time() - llm_start
207
  timing_data['llm_processing'] = round(llm_time, 2)
208
- print(f"LLM Processing took: {llm_time:.2f} seconds")
209
 
210
  # Time response processing
211
  response_start = time.time()
@@ -226,13 +227,11 @@ async def run_local_query(request: LocalQueryRequest):
226
 
227
  response_time = time.time() - response_start
228
  timing_data['response_processing'] = round(response_time, 2)
229
- print(f"Response Processing took: {response_time:.2f} seconds")
230
  print(f"Generated {len(answers)} answers")
231
 
232
  # Calculate total time
233
  total_time = time.time() - start_time
234
  timing_data['total_time'] = round(total_time, 2)
235
- timing_data['timestamp'] = datetime.now().isoformat()
236
 
237
  print(f"\n=== TIMING BREAKDOWN ===")
238
  print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
@@ -243,9 +242,12 @@ async def run_local_query(request: LocalQueryRequest):
243
  print(f"TOTAL TIME: {timing_data['total_time']}s")
244
  print(f"=======================\n")
245
 
246
- return {
247
- "answers": answers
248
- }
 
 
 
249
 
250
  except Exception as e:
251
  total_time = time.time() - start_time
 
75
  timing_data = {}
76
 
77
  try:
78
+ print(f"\n=== INPUT JSON ===")
79
+ print(f"Documents: {request.documents}")
80
+ print(f"Questions: {request.questions}")
81
+ print(f"==================\n")
82
+
83
  print(f"Processing {len(request.questions)} questions...")
84
 
85
  # Time PDF parsing
 
87
  text_chunks = parse_pdf_from_url(request.documents)
88
  pdf_time = time.time() - pdf_start
89
  timing_data['pdf_parsing'] = round(pdf_time, 2)
 
90
  print(f"Extracted {len(text_chunks)} text chunks from PDF")
91
 
92
  # Time FAISS index building
 
94
  index, texts = build_faiss_index(text_chunks)
95
  index_time = time.time() - index_start
96
  timing_data['faiss_index_building'] = round(index_time, 2)
 
97
 
98
  # Time chunk retrieval for all questions
99
  retrieval_start = time.time()
 
102
  question_start = time.time()
103
  top_chunks = retrieve_chunks(index, texts, question)
104
  question_time = time.time() - question_start
 
105
  all_chunks.update(top_chunks)
106
 
107
  retrieval_time = time.time() - retrieval_start
108
  timing_data['chunk_retrieval'] = round(retrieval_time, 2)
 
109
  print(f"Retrieved {len(all_chunks)} unique chunks")
110
 
111
  # Time LLM processing
 
114
  response = query_gemini(request.questions, list(all_chunks))
115
  llm_time = time.time() - llm_start
116
  timing_data['llm_processing'] = round(llm_time, 2)
 
117
 
118
  # Time response processing
119
  response_start = time.time()
 
134
 
135
  response_time = time.time() - response_start
136
  timing_data['response_processing'] = round(response_time, 2)
 
137
  print(f"Generated {len(answers)} answers")
138
 
139
  # Calculate total time
140
  total_time = time.time() - start_time
141
  timing_data['total_time'] = round(total_time, 2)
 
142
 
143
  print(f"\n=== TIMING BREAKDOWN ===")
144
  print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
 
149
  print(f"TOTAL TIME: {timing_data['total_time']}s")
150
  print(f"=======================\n")
151
 
152
+ result = {"answers": answers}
153
+ print(f"=== OUTPUT JSON ===")
154
+ print(f"{result}")
155
+ print(f"==================\n")
156
+
157
+ return result
158
 
159
  except Exception as e:
160
  total_time = time.time() - start_time
 
167
  timing_data = {}
168
 
169
  try:
170
+ print(f"\n=== INPUT JSON ===")
171
+ print(f"Document Path: {request.document_path}")
172
+ print(f"Questions: {request.questions}")
173
+ print(f"==================\n")
174
+
175
  print(f"Processing local document: {request.document_path}")
176
  print(f"Processing {len(request.questions)} questions...")
177
 
 
180
  text_chunks = parse_pdf_from_file(request.document_path)
181
  pdf_time = time.time() - pdf_start
182
  timing_data['pdf_parsing'] = round(pdf_time, 2)
 
183
  print(f"Extracted {len(text_chunks)} text chunks from local PDF")
184
 
185
  # Time FAISS index building
 
187
  index, texts = build_faiss_index(text_chunks)
188
  index_time = time.time() - index_start
189
  timing_data['faiss_index_building'] = round(index_time, 2)
 
190
 
191
  # Time chunk retrieval for all questions
192
  retrieval_start = time.time()
 
195
  question_start = time.time()
196
  top_chunks = retrieve_chunks(index, texts, question)
197
  question_time = time.time() - question_start
 
198
  all_chunks.update(top_chunks)
199
 
200
  retrieval_time = time.time() - retrieval_start
201
  timing_data['chunk_retrieval'] = round(retrieval_time, 2)
 
202
  print(f"Retrieved {len(all_chunks)} unique chunks")
203
 
204
  # Time LLM processing
 
207
  response = query_gemini(request.questions, list(all_chunks))
208
  llm_time = time.time() - llm_start
209
  timing_data['llm_processing'] = round(llm_time, 2)
 
210
 
211
  # Time response processing
212
  response_start = time.time()
 
227
 
228
  response_time = time.time() - response_start
229
  timing_data['response_processing'] = round(response_time, 2)
 
230
  print(f"Generated {len(answers)} answers")
231
 
232
  # Calculate total time
233
  total_time = time.time() - start_time
234
  timing_data['total_time'] = round(total_time, 2)
 
235
 
236
  print(f"\n=== TIMING BREAKDOWN ===")
237
  print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
 
242
  print(f"TOTAL TIME: {timing_data['total_time']}s")
243
  print(f"=======================\n")
244
 
245
+ result = {"answers": answers}
246
+ print(f"=== OUTPUT JSON ===")
247
+ print(f"{result}")
248
+ print(f"==================\n")
249
+
250
+ return result
251
 
252
  except Exception as e:
253
  total_time = time.time() - start_time
parser.py CHANGED
@@ -4,15 +4,7 @@ from io import BytesIO
4
  import time
5
 
6
  def parse_pdf_from_url(url):
7
- start_time = time.time()
8
- print(f"Starting PDF download and parsing from URL...")
9
-
10
- download_start = time.time()
11
  res = requests.get(url)
12
- download_time = time.time() - download_start
13
- print(f"PDF Download took: {download_time:.2f} seconds")
14
-
15
- parse_start = time.time()
16
  doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
17
  chunks = []
18
  for page in doc:
@@ -20,18 +12,10 @@ def parse_pdf_from_url(url):
20
  if text.strip():
21
  chunks.append(text)
22
  doc.close()
23
- parse_time = time.time() - parse_start
24
- print(f"PDF Text Extraction took: {parse_time:.2f} seconds")
25
-
26
- total_time = time.time() - start_time
27
- print(f"Total PDF parsing from URL took: {total_time:.2f} seconds")
28
  return chunks
29
 
30
  def parse_pdf_from_file(file_path):
31
  """Parse a local PDF file and extract text chunks"""
32
- start_time = time.time()
33
- print(f"Starting PDF parsing from local file: {file_path}")
34
-
35
  try:
36
  doc = fitz.open(file_path)
37
  chunks = []
@@ -40,11 +24,6 @@ def parse_pdf_from_file(file_path):
40
  if text.strip():
41
  chunks.append(text)
42
  doc.close()
43
-
44
- total_time = time.time() - start_time
45
- print(f"Total PDF parsing from file took: {total_time:.2f} seconds")
46
  return chunks
47
  except Exception as e:
48
- total_time = time.time() - start_time
49
- print(f"Error parsing PDF file after {total_time:.2f} seconds: {str(e)}")
50
  raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")
 
4
  import time
5
 
6
  def parse_pdf_from_url(url):
 
 
 
 
7
  res = requests.get(url)
 
 
 
 
8
  doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
9
  chunks = []
10
  for page in doc:
 
12
  if text.strip():
13
  chunks.append(text)
14
  doc.close()
 
 
 
 
 
15
  return chunks
16
 
17
  def parse_pdf_from_file(file_path):
18
  """Parse a local PDF file and extract text chunks"""
 
 
 
19
  try:
20
  doc = fitz.open(file_path)
21
  chunks = []
 
24
  if text.strip():
25
  chunks.append(text)
26
  doc.close()
 
 
 
27
  return chunks
28
  except Exception as e:
 
 
29
  raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")
retriever.py CHANGED
@@ -5,30 +5,8 @@ from embedder import get_model
5
 
6
  # Use the preloaded model from embedder instead of creating a new instance
7
  def retrieve_chunks(index, texts, query, k=5):
8
- start_time = time.time()
9
- print(f"Retrieving chunks for query: '{query[:50]}...'")
10
-
11
- # Time query embedding
12
- embed_start = time.time()
13
  model = get_model() # Use the preloaded model
14
  query_vec = model.encode([query])
15
- embed_time = time.time() - embed_start
16
- print(f"Query embedding took: {embed_time:.3f} seconds")
17
-
18
- # Time FAISS search
19
- search_start = time.time()
20
  distances, indices = index.search(np.array(query_vec), k)
21
- search_time = time.time() - search_start
22
- print(f"FAISS search took: {search_time:.3f} seconds")
23
-
24
- # Time result processing
25
- process_start = time.time()
26
  results = [texts[i] for i in indices[0]]
27
- process_time = time.time() - process_start
28
- print(f"Result processing took: {process_time:.3f} seconds")
29
-
30
- total_time = time.time() - start_time
31
- print(f"Total chunk retrieval took: {total_time:.3f} seconds")
32
- print(f"Retrieved {len(results)} chunks")
33
-
34
  return results
 
5
 
6
  # Use the preloaded model from embedder instead of creating a new instance
7
  def retrieve_chunks(index, texts, query, k=5):
 
 
 
 
 
8
  model = get_model() # Use the preloaded model
9
  query_vec = model.encode([query])
 
 
 
 
 
10
  distances, indices = index.search(np.array(query_vec), k)
 
 
 
 
 
11
  results = [texts[i] for i in indices[0]]
 
 
 
 
 
 
 
12
  return results