Rivalcoder commited on
Commit
eb87b3b
·
1 Parent(s): c89a7bc

Update The Model issues and Prompt

Browse files
Files changed (23) hide show
  1. .cache/chunks_6635d94cf9023c83521982b3043ec70c.pkl +3 -0
  2. .cache/embeddings_b24811e7d333cc7d5047e52b357abd7e.pkl +3 -0
  3. .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/refs/main +1 -0
  4. .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json +7 -0
  5. .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/README.md +173 -0
  6. .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json +7 -0
  7. .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json +20 -0
  8. .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/sentence_bert_config.json +4 -0
  9. .cache/{.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock → models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json} +0 -0
  10. .cache/models--sentence-transformers--all-MiniLM-L6-v2/{blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.incomplete → .no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja} +0 -0
  11. .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json +7 -0
  12. .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/model.safetensors +3 -0
  13. .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/special_tokens_map.json +1 -0
  14. .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer.json +0 -0
  15. .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json +1 -0
  16. .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/vocab.txt +0 -0
  17. .cache/response_2ab720ffccd688afdc790db13e338c83.pkl +3 -0
  18. app.py +119 -12
  19. embedder.py +40 -2
  20. llm.py +69 -54
  21. main.py +119 -12
  22. parser.py +23 -0
  23. retriever.py +28 -3
.cache/chunks_6635d94cf9023c83521982b3043ec70c.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cef2cc09ef9d4ef7d8649bb78ec868e356dcfecbcd6dde23442a90497d407e
3
+ size 124546
.cache/embeddings_b24811e7d333cc7d5047e52b357abd7e.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:475523b57f8f6b89e62e668efef73309193b05f0f05bbeffb7f012ee952024f0
3
+ size 347400
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/refs/main ADDED
@@ -0,0 +1 @@
 
 
1
+ c9745ed1d9f207416be6d2e6f8de32d1f16199bf
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/README.md ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: apache-2.0
4
+ library_name: sentence-transformers
5
+ tags:
6
+ - sentence-transformers
7
+ - feature-extraction
8
+ - sentence-similarity
9
+ - transformers
10
+ datasets:
11
+ - s2orc
12
+ - flax-sentence-embeddings/stackexchange_xml
13
+ - ms_marco
14
+ - gooaq
15
+ - yahoo_answers_topics
16
+ - code_search_net
17
+ - search_qa
18
+ - eli5
19
+ - snli
20
+ - multi_nli
21
+ - wikihow
22
+ - natural_questions
23
+ - trivia_qa
24
+ - embedding-data/sentence-compression
25
+ - embedding-data/flickr30k-captions
26
+ - embedding-data/altlex
27
+ - embedding-data/simple-wiki
28
+ - embedding-data/QQP
29
+ - embedding-data/SPECTER
30
+ - embedding-data/PAQ_pairs
31
+ - embedding-data/WikiAnswers
32
+ pipeline_tag: sentence-similarity
33
+ ---
34
+
35
+
36
+ # all-MiniLM-L6-v2
37
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
38
+
39
+ ## Usage (Sentence-Transformers)
40
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
41
+
42
+ ```
43
+ pip install -U sentence-transformers
44
+ ```
45
+
46
+ Then you can use the model like this:
47
+ ```python
48
+ from sentence_transformers import SentenceTransformer
49
+ sentences = ["This is an example sentence", "Each sentence is converted"]
50
+
51
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
52
+ embeddings = model.encode(sentences)
53
+ print(embeddings)
54
+ ```
55
+
56
+ ## Usage (HuggingFace Transformers)
57
+ Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
58
+
59
+ ```python
60
+ from transformers import AutoTokenizer, AutoModel
61
+ import torch
62
+ import torch.nn.functional as F
63
+
64
+ #Mean Pooling - Take attention mask into account for correct averaging
65
+ def mean_pooling(model_output, attention_mask):
66
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
67
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
68
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
69
+
70
+
71
+ # Sentences we want sentence embeddings for
72
+ sentences = ['This is an example sentence', 'Each sentence is converted']
73
+
74
+ # Load model from HuggingFace Hub
75
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
76
+ model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
77
+
78
+ # Tokenize sentences
79
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
80
+
81
+ # Compute token embeddings
82
+ with torch.no_grad():
83
+ model_output = model(**encoded_input)
84
+
85
+ # Perform pooling
86
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
87
+
88
+ # Normalize embeddings
89
+ sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
90
+
91
+ print("Sentence embeddings:")
92
+ print(sentence_embeddings)
93
+ ```
94
+
95
+ ------
96
+
97
+ ## Background
98
+
99
+ The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
100
+ contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
101
+ 1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
102
+
103
+ We developed this model during the
104
+ [Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
105
+ organized by Hugging Face. We developed this model as part of the project:
106
+ [Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
107
+
108
+ ## Intended uses
109
+
110
+ Our model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector which captures
111
+ the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
112
+
113
+ By default, input text longer than 256 word pieces is truncated.
114
+
115
+
116
+ ## Training procedure
117
+
118
+ ### Pre-training
119
+
120
+ We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
121
+
122
+ ### Fine-tuning
123
+
124
+ We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
125
+ We then apply the cross entropy loss by comparing with true pairs.
126
+
127
+ #### Hyper parameters
128
+
129
+ We trained our model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
130
+ We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
131
+ a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
132
+
133
+ #### Training data
134
+
135
+ We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
136
+ We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
137
+
138
+
139
+ | Dataset | Paper | Number of training tuples |
140
+ |--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
141
+ | [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
142
+ | [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
143
+ | [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
144
+ | [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
145
+ | [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
146
+ | [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
147
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs | - | 25,316,456 |
148
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs | - | 21,396,559 |
149
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs | - | 21,396,559 |
150
+ | [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
151
+ | [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
152
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
153
+ | [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
154
+ | [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
155
+ | [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
156
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
157
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
158
+ | [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
159
+ | [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
160
+ | [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
161
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
162
+ | AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
163
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
164
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
165
+ | [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
166
+ | [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
167
+ | [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
168
+ | [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
169
+ | [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
170
+ | [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
171
+ | [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
172
+ | [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
173
+ | **Total** | | **1,170,060,424** |
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.6.1",
5
+ "pytorch": "1.8.1"
6
+ }
7
+ }
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
.cache/{.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock → models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json} RENAMED
File without changes
.cache/models--sentence-transformers--all-MiniLM-L6-v2/{blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.incomplete → .no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja} RENAMED
File without changes
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
3
+ size 90868376
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "name_or_path": "nreimers/MiniLM-L6-H384-uncased", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer", "model_max_length": 512}
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
.cache/response_2ab720ffccd688afdc790db13e338c83.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c5853e52bd3fdc0bdf05ca5b73769bc17fe8f44fe56271a78a87f155c5de6da
3
+ size 429
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import warnings
3
  import logging
 
 
4
 
5
  # Set up cache directory for HuggingFace models
6
  cache_dir = os.path.join(os.getcwd(), ".cache")
@@ -22,7 +24,7 @@ from fastapi import FastAPI, Request, HTTPException, Depends, Header
22
  from fastapi.middleware.cors import CORSMiddleware
23
  from pydantic import BaseModel
24
  from parser import parse_pdf_from_url, parse_pdf_from_file
25
- from embedder import build_faiss_index
26
  from retriever import retrieve_chunks
27
  from llm import query_gemini
28
  import uvicorn
@@ -38,6 +40,14 @@ app.add_middleware(
38
  allow_headers=["*"],
39
  )
40
 
 
 
 
 
 
 
 
 
41
  @app.get("/")
42
  async def root():
43
  return {"message": "HackRx Insurance Policy Assistant API is running!"}
@@ -67,24 +77,52 @@ def verify_token(authorization: str = Header(None)):
67
 
68
  @app.post("/api/v1/hackrx/run")
69
  async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
 
 
 
70
  try:
71
  print(f"Processing {len(request.questions)} questions...")
72
 
 
 
73
  text_chunks = parse_pdf_from_url(request.documents)
 
 
 
74
  print(f"Extracted {len(text_chunks)} text chunks from PDF")
75
 
 
 
76
  index, texts = build_faiss_index(text_chunks)
 
 
 
77
 
78
- # Get relevant chunks for all questions at once
 
79
  all_chunks = set()
80
- for question in request.questions:
 
81
  top_chunks = retrieve_chunks(index, texts, question)
 
 
82
  all_chunks.update(top_chunks)
83
 
84
- # Process all questions in a single LLM call
 
 
 
 
 
 
85
  print(f"Processing all {len(request.questions)} questions in batch...")
86
  response = query_gemini(request.questions, list(all_chunks))
 
 
 
87
 
 
 
88
  # Extract answers from the JSON response
89
  if isinstance(response, dict) and "answers" in response:
90
  answers = response["answers"]
@@ -100,35 +138,83 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
100
  answers.append("Not Found")
101
  answers = answers[:len(request.questions)]
102
 
 
 
 
103
  print(f"Generated {len(answers)} answers")
104
- return { "answers": answers }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  except Exception as e:
107
- print(f"Error: {str(e)}")
 
108
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
109
 
110
  @app.post("/api/v1/hackrx/local")
111
  async def run_local_query(request: LocalQueryRequest):
 
 
 
112
  try:
113
  print(f"Processing local document: {request.document_path}")
114
  print(f"Processing {len(request.questions)} questions...")
115
 
116
- # Parse local PDF file
 
117
  text_chunks = parse_pdf_from_file(request.document_path)
 
 
 
118
  print(f"Extracted {len(text_chunks)} text chunks from local PDF")
119
 
 
 
120
  index, texts = build_faiss_index(text_chunks)
 
 
 
121
 
122
- # Get relevant chunks for all questions at once
 
123
  all_chunks = set()
124
- for question in request.questions:
 
125
  top_chunks = retrieve_chunks(index, texts, question)
 
 
126
  all_chunks.update(top_chunks)
127
 
128
- # Process all questions in a single LLM call
 
 
 
 
 
 
129
  print(f"Processing all {len(request.questions)} questions in batch...")
130
  response = query_gemini(request.questions, list(all_chunks))
 
 
 
131
 
 
 
132
  # Extract answers from the JSON response
133
  if isinstance(response, dict) and "answers" in response:
134
  answers = response["answers"]
@@ -144,11 +230,32 @@ async def run_local_query(request: LocalQueryRequest):
144
  answers.append("Not Found")
145
  answers = answers[:len(request.questions)]
146
 
 
 
 
147
  print(f"Generated {len(answers)} answers")
148
- return { "answers": answers }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  except Exception as e:
151
- print(f"Error: {str(e)}")
 
152
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
153
 
154
  if __name__ == "__main__":
 
1
  import os
2
  import warnings
3
  import logging
4
+ import time
5
+ from datetime import datetime
6
 
7
  # Set up cache directory for HuggingFace models
8
  cache_dir = os.path.join(os.getcwd(), ".cache")
 
24
  from fastapi.middleware.cors import CORSMiddleware
25
  from pydantic import BaseModel
26
  from parser import parse_pdf_from_url, parse_pdf_from_file
27
+ from embedder import build_faiss_index, preload_model
28
  from retriever import retrieve_chunks
29
  from llm import query_gemini
30
  import uvicorn
 
40
  allow_headers=["*"],
41
  )
42
 
43
+ # Preload the model at startup
44
+ @app.on_event("startup")
45
+ async def startup_event():
46
+ print("Starting up HackRx Insurance Policy Assistant...")
47
+ print("Preloading sentence transformer model...")
48
+ preload_model()
49
+ print("Model preloading completed. API is ready to serve requests!")
50
+
51
  @app.get("/")
52
  async def root():
53
  return {"message": "HackRx Insurance Policy Assistant API is running!"}
 
77
 
78
  @app.post("/api/v1/hackrx/run")
79
  async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
80
+ start_time = time.time()
81
+ timing_data = {}
82
+
83
  try:
84
  print(f"Processing {len(request.questions)} questions...")
85
 
86
+ # Time PDF parsing
87
+ pdf_start = time.time()
88
  text_chunks = parse_pdf_from_url(request.documents)
89
+ pdf_time = time.time() - pdf_start
90
+ timing_data['pdf_parsing'] = round(pdf_time, 2)
91
+ print(f"PDF Parsing took: {pdf_time:.2f} seconds")
92
  print(f"Extracted {len(text_chunks)} text chunks from PDF")
93
 
94
+ # Time FAISS index building
95
+ index_start = time.time()
96
  index, texts = build_faiss_index(text_chunks)
97
+ index_time = time.time() - index_start
98
+ timing_data['faiss_index_building'] = round(index_time, 2)
99
+ print(f"FAISS Index Building took: {index_time:.2f} seconds")
100
 
101
+ # Time chunk retrieval for all questions
102
+ retrieval_start = time.time()
103
  all_chunks = set()
104
+ for i, question in enumerate(request.questions):
105
+ question_start = time.time()
106
  top_chunks = retrieve_chunks(index, texts, question)
107
+ question_time = time.time() - question_start
108
+ print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
109
  all_chunks.update(top_chunks)
110
 
111
+ retrieval_time = time.time() - retrieval_start
112
+ timing_data['chunk_retrieval'] = round(retrieval_time, 2)
113
+ print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
114
+ print(f"Retrieved {len(all_chunks)} unique chunks")
115
+
116
+ # Time LLM processing
117
+ llm_start = time.time()
118
  print(f"Processing all {len(request.questions)} questions in batch...")
119
  response = query_gemini(request.questions, list(all_chunks))
120
+ llm_time = time.time() - llm_start
121
+ timing_data['llm_processing'] = round(llm_time, 2)
122
+ print(f"LLM Processing took: {llm_time:.2f} seconds")
123
 
124
+ # Time response processing
125
+ response_start = time.time()
126
  # Extract answers from the JSON response
127
  if isinstance(response, dict) and "answers" in response:
128
  answers = response["answers"]
 
138
  answers.append("Not Found")
139
  answers = answers[:len(request.questions)]
140
 
141
+ response_time = time.time() - response_start
142
+ timing_data['response_processing'] = round(response_time, 2)
143
+ print(f"Response Processing took: {response_time:.2f} seconds")
144
  print(f"Generated {len(answers)} answers")
145
+
146
+ # Calculate total time
147
+ total_time = time.time() - start_time
148
+ timing_data['total_time'] = round(total_time, 2)
149
+ timing_data['timestamp'] = datetime.now().isoformat()
150
+
151
+ print(f"\n=== TIMING BREAKDOWN ===")
152
+ print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
153
+ print(f"FAISS Index Building: {timing_data['faiss_index_building']}s")
154
+ print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
155
+ print(f"LLM Processing: {timing_data['llm_processing']}s")
156
+ print(f"Response Processing: {timing_data['response_processing']}s")
157
+ print(f"TOTAL TIME: {timing_data['total_time']}s")
158
+ print(f"=======================\n")
159
+
160
+ return {
161
+ "answers": answers
162
+ }
163
 
164
  except Exception as e:
165
+ total_time = time.time() - start_time
166
+ print(f"Error after {total_time:.2f} seconds: {str(e)}")
167
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
168
 
169
  @app.post("/api/v1/hackrx/local")
170
  async def run_local_query(request: LocalQueryRequest):
171
+ start_time = time.time()
172
+ timing_data = {}
173
+
174
  try:
175
  print(f"Processing local document: {request.document_path}")
176
  print(f"Processing {len(request.questions)} questions...")
177
 
178
+ # Time local PDF parsing
179
+ pdf_start = time.time()
180
  text_chunks = parse_pdf_from_file(request.document_path)
181
+ pdf_time = time.time() - pdf_start
182
+ timing_data['pdf_parsing'] = round(pdf_time, 2)
183
+ print(f"Local PDF Parsing took: {pdf_time:.2f} seconds")
184
  print(f"Extracted {len(text_chunks)} text chunks from local PDF")
185
 
186
+ # Time FAISS index building
187
+ index_start = time.time()
188
  index, texts = build_faiss_index(text_chunks)
189
+ index_time = time.time() - index_start
190
+ timing_data['faiss_index_building'] = round(index_time, 2)
191
+ print(f"FAISS Index Building took: {index_time:.2f} seconds")
192
 
193
+ # Time chunk retrieval for all questions
194
+ retrieval_start = time.time()
195
  all_chunks = set()
196
+ for i, question in enumerate(request.questions):
197
+ question_start = time.time()
198
  top_chunks = retrieve_chunks(index, texts, question)
199
+ question_time = time.time() - question_start
200
+ print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
201
  all_chunks.update(top_chunks)
202
 
203
+ retrieval_time = time.time() - retrieval_start
204
+ timing_data['chunk_retrieval'] = round(retrieval_time, 2)
205
+ print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
206
+ print(f"Retrieved {len(all_chunks)} unique chunks")
207
+
208
+ # Time LLM processing
209
+ llm_start = time.time()
210
  print(f"Processing all {len(request.questions)} questions in batch...")
211
  response = query_gemini(request.questions, list(all_chunks))
212
+ llm_time = time.time() - llm_start
213
+ timing_data['llm_processing'] = round(llm_time, 2)
214
+ print(f"LLM Processing took: {llm_time:.2f} seconds")
215
 
216
+ # Time response processing
217
+ response_start = time.time()
218
  # Extract answers from the JSON response
219
  if isinstance(response, dict) and "answers" in response:
220
  answers = response["answers"]
 
230
  answers.append("Not Found")
231
  answers = answers[:len(request.questions)]
232
 
233
+ response_time = time.time() - response_start
234
+ timing_data['response_processing'] = round(response_time, 2)
235
+ print(f"Response Processing took: {response_time:.2f} seconds")
236
  print(f"Generated {len(answers)} answers")
237
+
238
+ # Calculate total time
239
+ total_time = time.time() - start_time
240
+ timing_data['total_time'] = round(total_time, 2)
241
+ timing_data['timestamp'] = datetime.now().isoformat()
242
+
243
+ print(f"\n=== TIMING BREAKDOWN ===")
244
+ print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
245
+ print(f"FAISS Index Building: {timing_data['faiss_index_building']}s")
246
+ print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
247
+ print(f"LLM Processing: {timing_data['llm_processing']}s")
248
+ print(f"Response Processing: {timing_data['response_processing']}s")
249
+ print(f"TOTAL TIME: {timing_data['total_time']}s")
250
+ print(f"=======================\n")
251
+
252
+ return {
253
+ "answers": answers
254
+ }
255
 
256
  except Exception as e:
257
+ total_time = time.time() - start_time
258
+ print(f"Error after {total_time:.2f} seconds: {str(e)}")
259
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
260
 
261
  if __name__ == "__main__":
embedder.py CHANGED
@@ -2,6 +2,7 @@ import faiss
2
  from sentence_transformers import SentenceTransformer
3
  import numpy as np
4
  import os
 
5
 
6
  # Set up cache directory in a writable location
7
  cache_dir = os.path.join(os.getcwd(), ".cache")
@@ -12,26 +13,63 @@ os.environ['TRANSFORMERS_CACHE'] = cache_dir
12
  # Initialize model as None - will be loaded lazily
13
  _model = None
14
 
15
- def get_model():
16
- """Get the sentence transformer model, loading it lazily if needed"""
17
  global _model
18
  if _model is None:
 
 
19
  try:
20
  _model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
 
 
21
  except Exception as e:
22
  print(f"Error loading model: {e}")
23
  # Fallback to a different model if the first one fails
24
  try:
25
  _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
 
 
26
  except Exception as e2:
27
  print(f"Error loading fallback model: {e2}")
28
  raise
29
  return _model
30
 
 
 
 
 
 
 
 
 
 
31
  def build_faiss_index(chunks):
 
 
 
 
 
32
  model = get_model()
 
 
 
 
 
33
  embeddings = model.encode(chunks)
 
 
 
 
 
 
34
  dimension = embeddings.shape[1]
35
  index = faiss.IndexFlatL2(dimension)
36
  index.add(np.array(embeddings))
 
 
 
 
 
 
37
  return index, chunks
 
2
  from sentence_transformers import SentenceTransformer
3
  import numpy as np
4
  import os
5
+ import time
6
 
7
  # Set up cache directory in a writable location
8
  cache_dir = os.path.join(os.getcwd(), ".cache")
 
13
  # Initialize model as None - will be loaded lazily
14
  _model = None
15
 
16
+ def preload_model():
17
+ """Preload the sentence transformer model at startup"""
18
  global _model
19
  if _model is None:
20
+ model_start = time.time()
21
+ print("Preloading sentence transformer model...")
22
  try:
23
  _model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
24
+ model_time = time.time() - model_start
25
+ print(f"Model preloading completed in {model_time:.2f} seconds")
26
  except Exception as e:
27
  print(f"Error loading model: {e}")
28
  # Fallback to a different model if the first one fails
29
  try:
30
  _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
31
+ model_time = time.time() - model_start
32
+ print(f"Fallback model preloading completed in {model_time:.2f} seconds")
33
  except Exception as e2:
34
  print(f"Error loading fallback model: {e2}")
35
  raise
36
  return _model
37
 
38
+ def get_model():
39
+ """Get the sentence transformer model, loading it lazily if needed"""
40
+ global _model
41
+ if _model is None:
42
+ # If model is not preloaded, load it now (should not happen in production)
43
+ print("Warning: Model not preloaded, loading now...")
44
+ return preload_model()
45
+ return _model
46
+
47
  def build_faiss_index(chunks):
48
+ start_time = time.time()
49
+ print(f"Building FAISS index for {len(chunks)} chunks...")
50
+
51
+ # Time model retrieval (should be instant now)
52
+ model_start = time.time()
53
  model = get_model()
54
+ model_time = time.time() - model_start
55
+ print(f"Model retrieval took: {model_time:.3f} seconds")
56
+
57
+ # Time embedding generation
58
+ embed_start = time.time()
59
  embeddings = model.encode(chunks)
60
+ embed_time = time.time() - embed_start
61
+ print(f"Embedding generation took: {embed_time:.2f} seconds")
62
+ print(f"Generated embeddings shape: {embeddings.shape}")
63
+
64
+ # Time FAISS index creation
65
+ index_start = time.time()
66
  dimension = embeddings.shape[1]
67
  index = faiss.IndexFlatL2(dimension)
68
  index.add(np.array(embeddings))
69
+ index_time = time.time() - index_start
70
+ print(f"FAISS index creation took: {index_time:.2f} seconds")
71
+
72
+ total_time = time.time() - start_time
73
+ print(f"Total FAISS index building took: {total_time:.2f} seconds")
74
+
75
  return index, chunks
llm.py CHANGED
@@ -1,6 +1,7 @@
1
  import google.generativeai as genai
2
  import os
3
  import json
 
4
  from dotenv import load_dotenv
5
  load_dotenv()
6
 
@@ -12,78 +13,79 @@ print(f"Google API Key loaded: {api_key[:10]}..." if api_key else "No API key fo
12
  genai.configure(api_key=api_key)
13
 
14
  def query_gemini(questions, contexts):
 
 
 
15
  try:
 
 
16
  context = "\n\n".join(contexts)
 
 
 
17
 
 
 
18
  # Create a numbered list of questions
19
  questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
20
 
21
- prompt = f"""You are an insurance policy assistant. Based on the below document snippets, answer the following questions precisely.
 
22
 
23
- IMPORTANT INSTRUCTIONS:
24
- 1. Only respond based on the context provided. If information is not found in the context, respond with "Not Found".
25
- 2. Provide clear, concise answers that directly address each question.
26
- 3. Return your response in the exact JSON format shown below.
27
- 4. Give complete, informative responses based on the provided context.
28
- 5. Answer each question in the order provided.
29
 
30
- Context:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  {context}
32
 
33
- Questions:
34
  {questions_text}
35
 
36
- Response Should Good And Refined And Details But also Not So Large For Small Things and Too small Also Not Recommended With One Informative Line.
37
- Below I Provided For Current Context Coming How I Can need The Sentences Phrases And Words like is Given For Understanding and Reference:
38
- -Old_Ai_Response_Format
39
- {{
40
- "answers": [
41
- "The grace period for premium payment is thirty days.",
42
- "Expenses related to the treatment of a Pre-Existing Disease (PED) and its direct complications shall be excluded until the expiry of thirty six (36) months of continuous coverage after the date of inception of the first policy.",
43
- "Yes, the company shall indemnify Maternity Expenses as described in section 3.1.14 for any female Insured Person, and also Pre-Natal and Post-Natal Hospitalisation expenses per delivery, including expenses for necessary vaccination for New Born Baby, subject to the limit as shown in the Table of Benefits. The female Insured Person should have been continuously covered for at least 24 months before availing this benefit.",
44
- "Cataract surgery has a waiting period of two years.",
45
- "Yes, the Company shall indemnify the Medical Expenses incurred in respect of an organ donor’s Hospitalisation during the Policy Period for harvesting of the organ donated to an Insured Person, provided that certain conditions are met as outlined in section 3.1.7.",
46
- "On renewal of policies with a term of one year, a NCD of flat 5% shall be allowed on the * base premium, provided claims are not reported in the expiring Policy.\nOn renewal of policies with a term exceeding one year, the NCD amount with respect to each claim free policy year shall be aggregated and allowed on renewal. Aggregate amount of NCD allowed shall not exceed flat 5% of the total base premium for the term of the policy.",
47
- "Yes, expenses of health check up shall be reimbursed (irrespective of past claims) at the end of a block of two continuous policy years, provided the Policy has been continuously renewed with the Company without a break. Expenses payable are subject to the limit stated in the Table of Benefits.",
48
- "Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been registered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or under the enactments specified under Schedule of Section 56(1) of the said Act, OR complies with all minimum criteria as under:\ni. has qualified nursing staff under its employment round the clock;\nii. has at least ten inpatient beds, in those towns having a population of less than ten lacs and fifteen inpatient beds in all other places;\niii. has qualified medical practitioner (s) in charge round the clock;\niv. has a fully equipped operation theatre of its own where surgical procedures are carried out \nv. maintains daily records of patients and shall make these accessible to the Company’s authorized personnel.",
49
- "The Company shall indemnify Medical Expenses incurred for Inpatient Care treatment under Ayurveda, Yoga and Naturopathy, Unani, Siddha and Homeopathy systems of medicines during each Policy Period up to the limit of Sum Insured as specified in the Policy Schedule in any AYUSH Hospital.",
50
- "For Plan A, Room Charges are limited to Up to 1% of SI or actual, whichever is lower and ICU Charges are limited to Up to 2% of SI or actual, whichever is lower, per day per insured person."
51
- ]
52
- }}
53
-
54
- -New_Ai_Response_Format_And Wordings Given Like
55
- {{
56
- "answers": [
57
- "A grace period of thirty days is provided for premium payment after the due date to renew or continue the policy without losing continuity benefits.",
58
- "There is a waiting period of thirty-six (36) months of continuous coverage from the first policy inception for pre-existing diseases and their direct complications to be covered.",
59
- "Yes, the policy covers maternity expenses, including childbirth and lawful medical termination of pregnancy. To be eligible, the female insured person must have been continuously covered for at least 24 months. The benefit is limited to two deliveries or terminations during the policy period.",
60
- "The policy has a specific waiting period of two (2) years for cataract surgery.",
61
- "Yes, the policy indemnifies the medical expenses for the organ donor's hospitalization for the purpose of harvesting the organ, provided the organ is for an insured person and the donation complies with the Transplantation of Human Organs Act, 1994.",
62
- "A No Claim Discount of 5% on the base premium is offered on renewal for a one-year policy term if no claims were made in the preceding year. The maximum aggregate NCD is capped at 5% of the total base premium.",
63
- "Yes, the policy reimburses expenses for health check-ups at the end of every block of two continuous policy years, provided the policy has been renewed without a break. The amount is subject to the limits specified in the Table of Benefits.",
64
- "A hospital is defined as an institution with at least 10 inpatient beds (in towns with a population below ten lakhs) or 15 beds (in all other places), with qualified nursing staff and medical practitioners available 24/7, a fully equipped operation theatre, and which maintains daily records of patients.",
65
- "The policy covers medical expenses for inpatient treatment under Ayurveda, Yoga, Naturopathy, Unani, Siddha, and Homeopathy systems up to the Sum Insured limit, provided the treatment is taken in an AYUSH Hospital.",
66
- "Yes, for Plan A, the daily room rent is capped at 1% of the Sum Insured, and ICU charges are capped at 2% of the Sum Insured. These limits do not apply if the treatment is for a listed procedure in a Preferred Provider Network (PPN)."
67
- ]
68
- }}
69
 
70
- ## The Above Is Reference How Can Give Output Wordings Back To The Question is Given For References
71
 
72
- Return your response in this exact JSON format:
73
- {{
74
- "answers": [
75
- "Answer to question 1",
76
- "Answer to question 2",
77
- "Answer to question 3",
78
- ...
79
- ]
80
- }}
81
 
82
- Ensure each answer is comprehensive and directly addresses the corresponding question. If information is not found in the context for any question, respond with "Not Found" for that question."""
83
 
 
 
 
 
 
 
84
  model = genai.GenerativeModel('gemini-2.0-flash-exp')
85
  response = model.generate_content(prompt)
 
 
 
 
 
86
  response_text = response.text.strip()
 
87
 
88
  # Try to parse the response as JSON
89
  try:
@@ -94,12 +96,25 @@ Ensure each answer is comprehensive and directly addresses the corresponding que
94
  response_text = response_text.replace("```", "").strip()
95
 
96
  parsed_response = json.loads(response_text)
 
 
 
 
 
 
97
  return parsed_response
98
  except json.JSONDecodeError:
99
  # If JSON parsing fails, return a structured response
 
 
100
  print(f"Failed to parse JSON response: {response_text}")
 
 
 
 
101
  return {"answers": ["Error parsing response"] * len(questions)}
102
 
103
  except Exception as e:
104
- print(f"Error in query_gemini: {str(e)}")
 
105
  return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}
 
1
  import google.generativeai as genai
2
  import os
3
  import json
4
+ import time
5
  from dotenv import load_dotenv
6
  load_dotenv()
7
 
 
13
  genai.configure(api_key=api_key)
14
 
15
  def query_gemini(questions, contexts):
16
+ start_time = time.time()
17
+ print(f"Starting LLM processing for {len(questions)} questions with {len(contexts)} context chunks")
18
+
19
  try:
20
+ # Time context preparation
21
+ context_start = time.time()
22
  context = "\n\n".join(contexts)
23
+ context_time = time.time() - context_start
24
+ print(f"Context preparation took: {context_time:.2f} seconds")
25
+ print(f"Total context length: {len(context)} characters")
26
 
27
+ # Time prompt preparation
28
+ prompt_start = time.time()
29
  # Create a numbered list of questions
30
  questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
31
 
32
+ prompt = f"""
33
+ You are an intelligent insurance assistant trained to answer questions using insurance documents. Based on the context provided below, respond to each question with a **well-informed, complete, and professionally worded answer**.
34
 
35
+ 🎯 SCORING & OUTPUT GOAL:
36
+ - Responses are part of an evaluated system.
37
+ - Each answer should be **accurate**, **complete**, and **well-phrased** — ideally around **1–2 full sentences**.
38
+ - Avoid short/fragmented answers or long multi-paragraph explanations.
39
+ - Always write like an insurance advisor addressing a customer clearly.
 
40
 
41
+ 📘 INSTRUCTIONS:
42
+ 1. **Only use the provided context** to answer each question. If the answer is not found, respond with exactly: `"Not Found"`.
43
+ 2. Keep answers concise **but not vague**. Include all **key points** (such as limits, durations, conditions) in one or two complete sentences.
44
+ 3. DO NOT use bullet points, partial phrases, or excessive legal text. DO NOT repeat the question in the answer.
45
+ 4. Match the tone and format of these examples:
46
+ - "A grace period of thirty days is provided for premium payment after the due date to renew or continue the policy without losing continuity benefits."
47
+ - "Yes, the policy covers maternity expenses, including childbirth and lawful medical termination of pregnancy. To be eligible, the female insured person must have been continuously covered for at least 24 months. The benefit is limited to two deliveries or terminations during the policy period."
48
+ - "Yes, the policy indemnifies the medical expenses for the organ donor's hospitalization for the purpose of harvesting the organ, provided the organ is for an insured person and the donation complies with the Transplantation of Human Organs Act, 1994."
49
+ - "Not Found"
50
+
51
+ 📤 RETURN FORMAT:
52
+ Respond strictly using this JSON structure:
53
+
54
+ {{
55
+ "answers": [
56
+ "Answer to question 1",
57
+ "Answer to question 2",
58
+ ...
59
+ ]
60
+ }}
61
+
62
+ 📚 CONTEXT:
63
  {context}
64
 
65
+ ❓ QUESTIONS:
66
  {questions_text}
67
 
68
+ Your task: Provide accurate, refined answers based on the document context above. Use the tone and structure shown. Be concise but thorough. Only include what is supported in the context. Use "Not Found" if the answer is missing.
69
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
 
71
 
 
 
 
 
 
 
 
 
 
72
 
 
73
 
74
+ prompt_time = time.time() - prompt_start
75
+ print(f"Prompt preparation took: {prompt_time:.2f} seconds")
76
+ print(f"Total prompt length: {len(prompt)} characters")
77
+
78
+ # Time model initialization and API call
79
+ api_start = time.time()
80
  model = genai.GenerativeModel('gemini-2.0-flash-exp')
81
  response = model.generate_content(prompt)
82
+ api_time = time.time() - api_start
83
+ print(f"Gemini API call took: {api_time:.2f} seconds")
84
+
85
+ # Time response processing
86
+ process_start = time.time()
87
  response_text = response.text.strip()
88
+ print(f"Raw response length: {len(response_text)} characters")
89
 
90
  # Try to parse the response as JSON
91
  try:
 
96
  response_text = response_text.replace("```", "").strip()
97
 
98
  parsed_response = json.loads(response_text)
99
+ process_time = time.time() - process_start
100
+ print(f"Response processing took: {process_time:.2f} seconds")
101
+
102
+ total_time = time.time() - start_time
103
+ print(f"Total LLM processing took: {total_time:.2f} seconds")
104
+
105
  return parsed_response
106
  except json.JSONDecodeError:
107
  # If JSON parsing fails, return a structured response
108
+ process_time = time.time() - process_start
109
+ print(f"Response processing took: {process_time:.2f} seconds (JSON parsing failed)")
110
  print(f"Failed to parse JSON response: {response_text}")
111
+
112
+ total_time = time.time() - start_time
113
+ print(f"Total LLM processing took: {total_time:.2f} seconds")
114
+
115
  return {"answers": ["Error parsing response"] * len(questions)}
116
 
117
  except Exception as e:
118
+ total_time = time.time() - start_time
119
+ print(f"Error in query_gemini after {total_time:.2f} seconds: {str(e)}")
120
  return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}
main.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import warnings
3
  import logging
 
 
4
 
5
  # Suppress TensorFlow warnings
6
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
@@ -16,7 +18,7 @@ from fastapi import FastAPI, Request, HTTPException, Depends, Header
16
  from fastapi.middleware.cors import CORSMiddleware
17
  from pydantic import BaseModel
18
  from parser import parse_pdf_from_url, parse_pdf_from_file
19
- from embedder import build_faiss_index
20
  from retriever import retrieve_chunks
21
  from llm import query_gemini
22
  import uvicorn
@@ -32,6 +34,14 @@ app.add_middleware(
32
  allow_headers=["*"],
33
  )
34
 
 
 
 
 
 
 
 
 
35
  @app.get("/")
36
  async def root():
37
  return {"message": "HackRx Insurance Policy Assistant API is running!"}
@@ -61,24 +71,52 @@ def verify_token(authorization: str = Header(None)):
61
 
62
  @app.post("/api/v1/hackrx/run")
63
  async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
 
 
 
64
  try:
65
  print(f"Processing {len(request.questions)} questions...")
66
 
 
 
67
  text_chunks = parse_pdf_from_url(request.documents)
 
 
 
68
  print(f"Extracted {len(text_chunks)} text chunks from PDF")
69
 
 
 
70
  index, texts = build_faiss_index(text_chunks)
 
 
 
71
 
72
- # Get relevant chunks for all questions at once
 
73
  all_chunks = set()
74
- for question in request.questions:
 
75
  top_chunks = retrieve_chunks(index, texts, question)
 
 
76
  all_chunks.update(top_chunks)
77
 
78
- # Process all questions in a single LLM call
 
 
 
 
 
 
79
  print(f"Processing all {len(request.questions)} questions in batch...")
80
  response = query_gemini(request.questions, list(all_chunks))
 
 
 
81
 
 
 
82
  # Extract answers from the JSON response
83
  if isinstance(response, dict) and "answers" in response:
84
  answers = response["answers"]
@@ -94,35 +132,83 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
94
  answers.append("Not Found")
95
  answers = answers[:len(request.questions)]
96
 
 
 
 
97
  print(f"Generated {len(answers)} answers")
98
- return { "answers": answers }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  except Exception as e:
101
- print(f"Error: {str(e)}")
 
102
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
103
 
104
  @app.post("/api/v1/hackrx/local")
105
  async def run_local_query(request: LocalQueryRequest):
 
 
 
106
  try:
107
  print(f"Processing local document: {request.document_path}")
108
  print(f"Processing {len(request.questions)} questions...")
109
 
110
- # Parse local PDF file
 
111
  text_chunks = parse_pdf_from_file(request.document_path)
 
 
 
112
  print(f"Extracted {len(text_chunks)} text chunks from local PDF")
113
 
 
 
114
  index, texts = build_faiss_index(text_chunks)
 
 
 
115
 
116
- # Get relevant chunks for all questions at once
 
117
  all_chunks = set()
118
- for question in request.questions:
 
119
  top_chunks = retrieve_chunks(index, texts, question)
 
 
120
  all_chunks.update(top_chunks)
121
 
122
- # Process all questions in a single LLM call
 
 
 
 
 
 
123
  print(f"Processing all {len(request.questions)} questions in batch...")
124
  response = query_gemini(request.questions, list(all_chunks))
 
 
 
125
 
 
 
126
  # Extract answers from the JSON response
127
  if isinstance(response, dict) and "answers" in response:
128
  answers = response["answers"]
@@ -138,11 +224,32 @@ async def run_local_query(request: LocalQueryRequest):
138
  answers.append("Not Found")
139
  answers = answers[:len(request.questions)]
140
 
 
 
 
141
  print(f"Generated {len(answers)} answers")
142
- return { "answers": answers }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  except Exception as e:
145
- print(f"Error: {str(e)}")
 
146
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
147
 
148
  if __name__ == "__main__":
 
1
  import os
2
  import warnings
3
  import logging
4
+ import time
5
+ from datetime import datetime
6
 
7
  # Suppress TensorFlow warnings
8
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 
18
  from fastapi.middleware.cors import CORSMiddleware
19
  from pydantic import BaseModel
20
  from parser import parse_pdf_from_url, parse_pdf_from_file
21
+ from embedder import build_faiss_index, preload_model
22
  from retriever import retrieve_chunks
23
  from llm import query_gemini
24
  import uvicorn
 
34
  allow_headers=["*"],
35
  )
36
 
37
+ # Preload the model at startup
38
+ @app.on_event("startup")
39
+ async def startup_event():
40
+ print("Starting up HackRx Insurance Policy Assistant...")
41
+ print("Preloading sentence transformer model...")
42
+ preload_model()
43
+ print("Model preloading completed. API is ready to serve requests!")
44
+
45
  @app.get("/")
46
  async def root():
47
  return {"message": "HackRx Insurance Policy Assistant API is running!"}
 
71
 
72
  @app.post("/api/v1/hackrx/run")
73
  async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
74
+ start_time = time.time()
75
+ timing_data = {}
76
+
77
  try:
78
  print(f"Processing {len(request.questions)} questions...")
79
 
80
+ # Time PDF parsing
81
+ pdf_start = time.time()
82
  text_chunks = parse_pdf_from_url(request.documents)
83
+ pdf_time = time.time() - pdf_start
84
+ timing_data['pdf_parsing'] = round(pdf_time, 2)
85
+ print(f"PDF Parsing took: {pdf_time:.2f} seconds")
86
  print(f"Extracted {len(text_chunks)} text chunks from PDF")
87
 
88
+ # Time FAISS index building
89
+ index_start = time.time()
90
  index, texts = build_faiss_index(text_chunks)
91
+ index_time = time.time() - index_start
92
+ timing_data['faiss_index_building'] = round(index_time, 2)
93
+ print(f"FAISS Index Building took: {index_time:.2f} seconds")
94
 
95
+ # Time chunk retrieval for all questions
96
+ retrieval_start = time.time()
97
  all_chunks = set()
98
+ for i, question in enumerate(request.questions):
99
+ question_start = time.time()
100
  top_chunks = retrieve_chunks(index, texts, question)
101
+ question_time = time.time() - question_start
102
+ print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
103
  all_chunks.update(top_chunks)
104
 
105
+ retrieval_time = time.time() - retrieval_start
106
+ timing_data['chunk_retrieval'] = round(retrieval_time, 2)
107
+ print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
108
+ print(f"Retrieved {len(all_chunks)} unique chunks")
109
+
110
+ # Time LLM processing
111
+ llm_start = time.time()
112
  print(f"Processing all {len(request.questions)} questions in batch...")
113
  response = query_gemini(request.questions, list(all_chunks))
114
+ llm_time = time.time() - llm_start
115
+ timing_data['llm_processing'] = round(llm_time, 2)
116
+ print(f"LLM Processing took: {llm_time:.2f} seconds")
117
 
118
+ # Time response processing
119
+ response_start = time.time()
120
  # Extract answers from the JSON response
121
  if isinstance(response, dict) and "answers" in response:
122
  answers = response["answers"]
 
132
  answers.append("Not Found")
133
  answers = answers[:len(request.questions)]
134
 
135
+ response_time = time.time() - response_start
136
+ timing_data['response_processing'] = round(response_time, 2)
137
+ print(f"Response Processing took: {response_time:.2f} seconds")
138
  print(f"Generated {len(answers)} answers")
139
+
140
+ # Calculate total time
141
+ total_time = time.time() - start_time
142
+ timing_data['total_time'] = round(total_time, 2)
143
+ timing_data['timestamp'] = datetime.now().isoformat()
144
+
145
+ print(f"\n=== TIMING BREAKDOWN ===")
146
+ print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
147
+ print(f"FAISS Index Building: {timing_data['faiss_index_building']}s")
148
+ print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
149
+ print(f"LLM Processing: {timing_data['llm_processing']}s")
150
+ print(f"Response Processing: {timing_data['response_processing']}s")
151
+ print(f"TOTAL TIME: {timing_data['total_time']}s")
152
+ print(f"=======================\n")
153
+
154
+ return {
155
+ "answers": answers
156
+ }
157
 
158
  except Exception as e:
159
+ total_time = time.time() - start_time
160
+ print(f"Error after {total_time:.2f} seconds: {str(e)}")
161
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
162
 
163
  @app.post("/api/v1/hackrx/local")
164
  async def run_local_query(request: LocalQueryRequest):
165
+ start_time = time.time()
166
+ timing_data = {}
167
+
168
  try:
169
  print(f"Processing local document: {request.document_path}")
170
  print(f"Processing {len(request.questions)} questions...")
171
 
172
+ # Time local PDF parsing
173
+ pdf_start = time.time()
174
  text_chunks = parse_pdf_from_file(request.document_path)
175
+ pdf_time = time.time() - pdf_start
176
+ timing_data['pdf_parsing'] = round(pdf_time, 2)
177
+ print(f"Local PDF Parsing took: {pdf_time:.2f} seconds")
178
  print(f"Extracted {len(text_chunks)} text chunks from local PDF")
179
 
180
+ # Time FAISS index building
181
+ index_start = time.time()
182
  index, texts = build_faiss_index(text_chunks)
183
+ index_time = time.time() - index_start
184
+ timing_data['faiss_index_building'] = round(index_time, 2)
185
+ print(f"FAISS Index Building took: {index_time:.2f} seconds")
186
 
187
+ # Time chunk retrieval for all questions
188
+ retrieval_start = time.time()
189
  all_chunks = set()
190
+ for i, question in enumerate(request.questions):
191
+ question_start = time.time()
192
  top_chunks = retrieve_chunks(index, texts, question)
193
+ question_time = time.time() - question_start
194
+ print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
195
  all_chunks.update(top_chunks)
196
 
197
+ retrieval_time = time.time() - retrieval_start
198
+ timing_data['chunk_retrieval'] = round(retrieval_time, 2)
199
+ print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
200
+ print(f"Retrieved {len(all_chunks)} unique chunks")
201
+
202
+ # Time LLM processing
203
+ llm_start = time.time()
204
  print(f"Processing all {len(request.questions)} questions in batch...")
205
  response = query_gemini(request.questions, list(all_chunks))
206
+ llm_time = time.time() - llm_start
207
+ timing_data['llm_processing'] = round(llm_time, 2)
208
+ print(f"LLM Processing took: {llm_time:.2f} seconds")
209
 
210
+ # Time response processing
211
+ response_start = time.time()
212
  # Extract answers from the JSON response
213
  if isinstance(response, dict) and "answers" in response:
214
  answers = response["answers"]
 
224
  answers.append("Not Found")
225
  answers = answers[:len(request.questions)]
226
 
227
+ response_time = time.time() - response_start
228
+ timing_data['response_processing'] = round(response_time, 2)
229
+ print(f"Response Processing took: {response_time:.2f} seconds")
230
  print(f"Generated {len(answers)} answers")
231
+
232
+ # Calculate total time
233
+ total_time = time.time() - start_time
234
+ timing_data['total_time'] = round(total_time, 2)
235
+ timing_data['timestamp'] = datetime.now().isoformat()
236
+
237
+ print(f"\n=== TIMING BREAKDOWN ===")
238
+ print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
239
+ print(f"FAISS Index Building: {timing_data['faiss_index_building']}s")
240
+ print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
241
+ print(f"LLM Processing: {timing_data['llm_processing']}s")
242
+ print(f"Response Processing: {timing_data['response_processing']}s")
243
+ print(f"TOTAL TIME: {timing_data['total_time']}s")
244
+ print(f"=======================\n")
245
+
246
+ return {
247
+ "answers": answers
248
+ }
249
 
250
  except Exception as e:
251
+ total_time = time.time() - start_time
252
+ print(f"Error after {total_time:.2f} seconds: {str(e)}")
253
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
254
 
255
  if __name__ == "__main__":
parser.py CHANGED
@@ -1,19 +1,37 @@
1
  import fitz # PyMuPDF
2
  import requests
3
  from io import BytesIO
 
4
 
5
  def parse_pdf_from_url(url):
 
 
 
 
6
  res = requests.get(url)
 
 
 
 
7
  doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
8
  chunks = []
9
  for page in doc:
10
  text = page.get_text()
11
  if text.strip():
12
  chunks.append(text)
 
 
 
 
 
 
13
  return chunks
14
 
15
  def parse_pdf_from_file(file_path):
16
  """Parse a local PDF file and extract text chunks"""
 
 
 
17
  try:
18
  doc = fitz.open(file_path)
19
  chunks = []
@@ -22,6 +40,11 @@ def parse_pdf_from_file(file_path):
22
  if text.strip():
23
  chunks.append(text)
24
  doc.close()
 
 
 
25
  return chunks
26
  except Exception as e:
 
 
27
  raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")
 
1
  import fitz # PyMuPDF
2
  import requests
3
  from io import BytesIO
4
+ import time
5
 
6
  def parse_pdf_from_url(url):
7
+ start_time = time.time()
8
+ print(f"Starting PDF download and parsing from URL...")
9
+
10
+ download_start = time.time()
11
  res = requests.get(url)
12
+ download_time = time.time() - download_start
13
+ print(f"PDF Download took: {download_time:.2f} seconds")
14
+
15
+ parse_start = time.time()
16
  doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
17
  chunks = []
18
  for page in doc:
19
  text = page.get_text()
20
  if text.strip():
21
  chunks.append(text)
22
+ doc.close()
23
+ parse_time = time.time() - parse_start
24
+ print(f"PDF Text Extraction took: {parse_time:.2f} seconds")
25
+
26
+ total_time = time.time() - start_time
27
+ print(f"Total PDF parsing from URL took: {total_time:.2f} seconds")
28
  return chunks
29
 
30
  def parse_pdf_from_file(file_path):
31
  """Parse a local PDF file and extract text chunks"""
32
+ start_time = time.time()
33
+ print(f"Starting PDF parsing from local file: {file_path}")
34
+
35
  try:
36
  doc = fitz.open(file_path)
37
  chunks = []
 
40
  if text.strip():
41
  chunks.append(text)
42
  doc.close()
43
+
44
+ total_time = time.time() - start_time
45
+ print(f"Total PDF parsing from file took: {total_time:.2f} seconds")
46
  return chunks
47
  except Exception as e:
48
+ total_time = time.time() - start_time
49
+ print(f"Error parsing PDF file after {total_time:.2f} seconds: {str(e)}")
50
  raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")
retriever.py CHANGED
@@ -1,9 +1,34 @@
1
  from sentence_transformers import SentenceTransformer
2
  import numpy as np
 
 
3
 
4
- model = SentenceTransformer("all-MiniLM-L6-v2")
5
-
6
  def retrieve_chunks(index, texts, query, k=5):
 
 
 
 
 
 
7
  query_vec = model.encode([query])
 
 
 
 
 
8
  distances, indices = index.search(np.array(query_vec), k)
9
- return [texts[i] for i in indices[0]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from sentence_transformers import SentenceTransformer
2
  import numpy as np
3
+ import time
4
+ from embedder import get_model
5
 
6
+ # Use the preloaded model from embedder instead of creating a new instance
 
7
  def retrieve_chunks(index, texts, query, k=5):
8
+ start_time = time.time()
9
+ print(f"Retrieving chunks for query: '{query[:50]}...'")
10
+
11
+ # Time query embedding
12
+ embed_start = time.time()
13
+ model = get_model() # Use the preloaded model
14
  query_vec = model.encode([query])
15
+ embed_time = time.time() - embed_start
16
+ print(f"Query embedding took: {embed_time:.3f} seconds")
17
+
18
+ # Time FAISS search
19
+ search_start = time.time()
20
  distances, indices = index.search(np.array(query_vec), k)
21
+ search_time = time.time() - search_start
22
+ print(f"FAISS search took: {search_time:.3f} seconds")
23
+
24
+ # Time result processing
25
+ process_start = time.time()
26
+ results = [texts[i] for i in indices[0]]
27
+ process_time = time.time() - process_start
28
+ print(f"Result processing took: {process_time:.3f} seconds")
29
+
30
+ total_time = time.time() - start_time
31
+ print(f"Total chunk retrieval took: {total_time:.3f} seconds")
32
+ print(f"Retrieved {len(results)} chunks")
33
+
34
+ return results