Spaces:
Running
Running
Rivalcoder
commited on
Commit
·
eb87b3b
1
Parent(s):
c89a7bc
Update The Model issues and Prompt
Browse files- .cache/chunks_6635d94cf9023c83521982b3043ec70c.pkl +3 -0
- .cache/embeddings_b24811e7d333cc7d5047e52b357abd7e.pkl +3 -0
- .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/refs/main +1 -0
- .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json +7 -0
- .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/README.md +173 -0
- .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json +7 -0
- .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json +20 -0
- .cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/sentence_bert_config.json +4 -0
- .cache/{.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock → models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json} +0 -0
- .cache/models--sentence-transformers--all-MiniLM-L6-v2/{blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.incomplete → .no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja} +0 -0
- .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json +7 -0
- .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/model.safetensors +3 -0
- .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/special_tokens_map.json +1 -0
- .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer.json +0 -0
- .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json +1 -0
- .cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/vocab.txt +0 -0
- .cache/response_2ab720ffccd688afdc790db13e338c83.pkl +3 -0
- app.py +119 -12
- embedder.py +40 -2
- llm.py +69 -54
- main.py +119 -12
- parser.py +23 -0
- retriever.py +28 -3
.cache/chunks_6635d94cf9023c83521982b3043ec70c.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4cef2cc09ef9d4ef7d8649bb78ec868e356dcfecbcd6dde23442a90497d407e
|
3 |
+
size 124546
|
.cache/embeddings_b24811e7d333cc7d5047e52b357abd7e.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:475523b57f8f6b89e62e668efef73309193b05f0f05bbeffb7f012ee952024f0
|
3 |
+
size 347400
|
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/refs/main
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
c9745ed1d9f207416be6d2e6f8de32d1f16199bf
|
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
7 |
+
}
|
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/README.md
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language: en
|
3 |
+
license: apache-2.0
|
4 |
+
library_name: sentence-transformers
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- feature-extraction
|
8 |
+
- sentence-similarity
|
9 |
+
- transformers
|
10 |
+
datasets:
|
11 |
+
- s2orc
|
12 |
+
- flax-sentence-embeddings/stackexchange_xml
|
13 |
+
- ms_marco
|
14 |
+
- gooaq
|
15 |
+
- yahoo_answers_topics
|
16 |
+
- code_search_net
|
17 |
+
- search_qa
|
18 |
+
- eli5
|
19 |
+
- snli
|
20 |
+
- multi_nli
|
21 |
+
- wikihow
|
22 |
+
- natural_questions
|
23 |
+
- trivia_qa
|
24 |
+
- embedding-data/sentence-compression
|
25 |
+
- embedding-data/flickr30k-captions
|
26 |
+
- embedding-data/altlex
|
27 |
+
- embedding-data/simple-wiki
|
28 |
+
- embedding-data/QQP
|
29 |
+
- embedding-data/SPECTER
|
30 |
+
- embedding-data/PAQ_pairs
|
31 |
+
- embedding-data/WikiAnswers
|
32 |
+
pipeline_tag: sentence-similarity
|
33 |
+
---
|
34 |
+
|
35 |
+
|
36 |
+
# all-MiniLM-L6-v2
|
37 |
+
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
38 |
+
|
39 |
+
## Usage (Sentence-Transformers)
|
40 |
+
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
|
41 |
+
|
42 |
+
```
|
43 |
+
pip install -U sentence-transformers
|
44 |
+
```
|
45 |
+
|
46 |
+
Then you can use the model like this:
|
47 |
+
```python
|
48 |
+
from sentence_transformers import SentenceTransformer
|
49 |
+
sentences = ["This is an example sentence", "Each sentence is converted"]
|
50 |
+
|
51 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
52 |
+
embeddings = model.encode(sentences)
|
53 |
+
print(embeddings)
|
54 |
+
```
|
55 |
+
|
56 |
+
## Usage (HuggingFace Transformers)
|
57 |
+
Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
|
58 |
+
|
59 |
+
```python
|
60 |
+
from transformers import AutoTokenizer, AutoModel
|
61 |
+
import torch
|
62 |
+
import torch.nn.functional as F
|
63 |
+
|
64 |
+
#Mean Pooling - Take attention mask into account for correct averaging
|
65 |
+
def mean_pooling(model_output, attention_mask):
|
66 |
+
token_embeddings = model_output[0] #First element of model_output contains all token embeddings
|
67 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
68 |
+
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
69 |
+
|
70 |
+
|
71 |
+
# Sentences we want sentence embeddings for
|
72 |
+
sentences = ['This is an example sentence', 'Each sentence is converted']
|
73 |
+
|
74 |
+
# Load model from HuggingFace Hub
|
75 |
+
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
76 |
+
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
77 |
+
|
78 |
+
# Tokenize sentences
|
79 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
|
80 |
+
|
81 |
+
# Compute token embeddings
|
82 |
+
with torch.no_grad():
|
83 |
+
model_output = model(**encoded_input)
|
84 |
+
|
85 |
+
# Perform pooling
|
86 |
+
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
87 |
+
|
88 |
+
# Normalize embeddings
|
89 |
+
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
|
90 |
+
|
91 |
+
print("Sentence embeddings:")
|
92 |
+
print(sentence_embeddings)
|
93 |
+
```
|
94 |
+
|
95 |
+
------
|
96 |
+
|
97 |
+
## Background
|
98 |
+
|
99 |
+
The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
|
100 |
+
contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
|
101 |
+
1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
|
102 |
+
|
103 |
+
We developed this model during the
|
104 |
+
[Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
|
105 |
+
organized by Hugging Face. We developed this model as part of the project:
|
106 |
+
[Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
|
107 |
+
|
108 |
+
## Intended uses
|
109 |
+
|
110 |
+
Our model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector which captures
|
111 |
+
the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
|
112 |
+
|
113 |
+
By default, input text longer than 256 word pieces is truncated.
|
114 |
+
|
115 |
+
|
116 |
+
## Training procedure
|
117 |
+
|
118 |
+
### Pre-training
|
119 |
+
|
120 |
+
We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
|
121 |
+
|
122 |
+
### Fine-tuning
|
123 |
+
|
124 |
+
We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
|
125 |
+
We then apply the cross entropy loss by comparing with true pairs.
|
126 |
+
|
127 |
+
#### Hyper parameters
|
128 |
+
|
129 |
+
We trained our model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
|
130 |
+
We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
|
131 |
+
a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
|
132 |
+
|
133 |
+
#### Training data
|
134 |
+
|
135 |
+
We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
|
136 |
+
We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
|
137 |
+
|
138 |
+
|
139 |
+
| Dataset | Paper | Number of training tuples |
|
140 |
+
|--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
|
141 |
+
| [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
|
142 |
+
| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
|
143 |
+
| [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
|
144 |
+
| [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
|
145 |
+
| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
|
146 |
+
| [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
|
147 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs | - | 25,316,456 |
|
148 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs | - | 21,396,559 |
|
149 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs | - | 21,396,559 |
|
150 |
+
| [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
|
151 |
+
| [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
|
152 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
|
153 |
+
| [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
|
154 |
+
| [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
|
155 |
+
| [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
|
156 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
|
157 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
|
158 |
+
| [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
|
159 |
+
| [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
|
160 |
+
| [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
|
161 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
|
162 |
+
| AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
|
163 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
|
164 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
|
165 |
+
| [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
|
166 |
+
| [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
|
167 |
+
| [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
|
168 |
+
| [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
|
169 |
+
| [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
|
170 |
+
| [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
|
171 |
+
| [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
|
172 |
+
| [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
|
173 |
+
| **Total** | | **1,170,060,424** |
|
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "2.0.0",
|
4 |
+
"transformers": "4.6.1",
|
5 |
+
"pytorch": "1.8.1"
|
6 |
+
}
|
7 |
+
}
|
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
.cache/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
.cache/{.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock → models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/added_tokens.json}
RENAMED
File without changes
|
.cache/models--sentence-transformers--all-MiniLM-L6-v2/{blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.incomplete → .no_exist/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/chat_template.jinja}
RENAMED
File without changes
|
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/1_Pooling/config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
7 |
+
}
|
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
|
3 |
+
size 90868376
|
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "name_or_path": "nreimers/MiniLM-L6-H384-uncased", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer", "model_max_length": 512}
|
.cache/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
.cache/response_2ab720ffccd688afdc790db13e338c83.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c5853e52bd3fdc0bdf05ca5b73769bc17fe8f44fe56271a78a87f155c5de6da
|
3 |
+
size 429
|
app.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import os
|
2 |
import warnings
|
3 |
import logging
|
|
|
|
|
4 |
|
5 |
# Set up cache directory for HuggingFace models
|
6 |
cache_dir = os.path.join(os.getcwd(), ".cache")
|
@@ -22,7 +24,7 @@ from fastapi import FastAPI, Request, HTTPException, Depends, Header
|
|
22 |
from fastapi.middleware.cors import CORSMiddleware
|
23 |
from pydantic import BaseModel
|
24 |
from parser import parse_pdf_from_url, parse_pdf_from_file
|
25 |
-
from embedder import build_faiss_index
|
26 |
from retriever import retrieve_chunks
|
27 |
from llm import query_gemini
|
28 |
import uvicorn
|
@@ -38,6 +40,14 @@ app.add_middleware(
|
|
38 |
allow_headers=["*"],
|
39 |
)
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
@app.get("/")
|
42 |
async def root():
|
43 |
return {"message": "HackRx Insurance Policy Assistant API is running!"}
|
@@ -67,24 +77,52 @@ def verify_token(authorization: str = Header(None)):
|
|
67 |
|
68 |
@app.post("/api/v1/hackrx/run")
|
69 |
async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
|
|
|
|
|
|
70 |
try:
|
71 |
print(f"Processing {len(request.questions)} questions...")
|
72 |
|
|
|
|
|
73 |
text_chunks = parse_pdf_from_url(request.documents)
|
|
|
|
|
|
|
74 |
print(f"Extracted {len(text_chunks)} text chunks from PDF")
|
75 |
|
|
|
|
|
76 |
index, texts = build_faiss_index(text_chunks)
|
|
|
|
|
|
|
77 |
|
78 |
-
#
|
|
|
79 |
all_chunks = set()
|
80 |
-
for question in request.questions:
|
|
|
81 |
top_chunks = retrieve_chunks(index, texts, question)
|
|
|
|
|
82 |
all_chunks.update(top_chunks)
|
83 |
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
print(f"Processing all {len(request.questions)} questions in batch...")
|
86 |
response = query_gemini(request.questions, list(all_chunks))
|
|
|
|
|
|
|
87 |
|
|
|
|
|
88 |
# Extract answers from the JSON response
|
89 |
if isinstance(response, dict) and "answers" in response:
|
90 |
answers = response["answers"]
|
@@ -100,35 +138,83 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
|
100 |
answers.append("Not Found")
|
101 |
answers = answers[:len(request.questions)]
|
102 |
|
|
|
|
|
|
|
103 |
print(f"Generated {len(answers)} answers")
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
except Exception as e:
|
107 |
-
|
|
|
108 |
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
109 |
|
110 |
@app.post("/api/v1/hackrx/local")
|
111 |
async def run_local_query(request: LocalQueryRequest):
|
|
|
|
|
|
|
112 |
try:
|
113 |
print(f"Processing local document: {request.document_path}")
|
114 |
print(f"Processing {len(request.questions)} questions...")
|
115 |
|
116 |
-
#
|
|
|
117 |
text_chunks = parse_pdf_from_file(request.document_path)
|
|
|
|
|
|
|
118 |
print(f"Extracted {len(text_chunks)} text chunks from local PDF")
|
119 |
|
|
|
|
|
120 |
index, texts = build_faiss_index(text_chunks)
|
|
|
|
|
|
|
121 |
|
122 |
-
#
|
|
|
123 |
all_chunks = set()
|
124 |
-
for question in request.questions:
|
|
|
125 |
top_chunks = retrieve_chunks(index, texts, question)
|
|
|
|
|
126 |
all_chunks.update(top_chunks)
|
127 |
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
print(f"Processing all {len(request.questions)} questions in batch...")
|
130 |
response = query_gemini(request.questions, list(all_chunks))
|
|
|
|
|
|
|
131 |
|
|
|
|
|
132 |
# Extract answers from the JSON response
|
133 |
if isinstance(response, dict) and "answers" in response:
|
134 |
answers = response["answers"]
|
@@ -144,11 +230,32 @@ async def run_local_query(request: LocalQueryRequest):
|
|
144 |
answers.append("Not Found")
|
145 |
answers = answers[:len(request.questions)]
|
146 |
|
|
|
|
|
|
|
147 |
print(f"Generated {len(answers)} answers")
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
except Exception as e:
|
151 |
-
|
|
|
152 |
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
153 |
|
154 |
if __name__ == "__main__":
|
|
|
1 |
import os
|
2 |
import warnings
|
3 |
import logging
|
4 |
+
import time
|
5 |
+
from datetime import datetime
|
6 |
|
7 |
# Set up cache directory for HuggingFace models
|
8 |
cache_dir = os.path.join(os.getcwd(), ".cache")
|
|
|
24 |
from fastapi.middleware.cors import CORSMiddleware
|
25 |
from pydantic import BaseModel
|
26 |
from parser import parse_pdf_from_url, parse_pdf_from_file
|
27 |
+
from embedder import build_faiss_index, preload_model
|
28 |
from retriever import retrieve_chunks
|
29 |
from llm import query_gemini
|
30 |
import uvicorn
|
|
|
40 |
allow_headers=["*"],
|
41 |
)
|
42 |
|
43 |
+
# Preload the model at startup
|
44 |
+
@app.on_event("startup")
|
45 |
+
async def startup_event():
|
46 |
+
print("Starting up HackRx Insurance Policy Assistant...")
|
47 |
+
print("Preloading sentence transformer model...")
|
48 |
+
preload_model()
|
49 |
+
print("Model preloading completed. API is ready to serve requests!")
|
50 |
+
|
51 |
@app.get("/")
|
52 |
async def root():
|
53 |
return {"message": "HackRx Insurance Policy Assistant API is running!"}
|
|
|
77 |
|
78 |
@app.post("/api/v1/hackrx/run")
|
79 |
async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
80 |
+
start_time = time.time()
|
81 |
+
timing_data = {}
|
82 |
+
|
83 |
try:
|
84 |
print(f"Processing {len(request.questions)} questions...")
|
85 |
|
86 |
+
# Time PDF parsing
|
87 |
+
pdf_start = time.time()
|
88 |
text_chunks = parse_pdf_from_url(request.documents)
|
89 |
+
pdf_time = time.time() - pdf_start
|
90 |
+
timing_data['pdf_parsing'] = round(pdf_time, 2)
|
91 |
+
print(f"PDF Parsing took: {pdf_time:.2f} seconds")
|
92 |
print(f"Extracted {len(text_chunks)} text chunks from PDF")
|
93 |
|
94 |
+
# Time FAISS index building
|
95 |
+
index_start = time.time()
|
96 |
index, texts = build_faiss_index(text_chunks)
|
97 |
+
index_time = time.time() - index_start
|
98 |
+
timing_data['faiss_index_building'] = round(index_time, 2)
|
99 |
+
print(f"FAISS Index Building took: {index_time:.2f} seconds")
|
100 |
|
101 |
+
# Time chunk retrieval for all questions
|
102 |
+
retrieval_start = time.time()
|
103 |
all_chunks = set()
|
104 |
+
for i, question in enumerate(request.questions):
|
105 |
+
question_start = time.time()
|
106 |
top_chunks = retrieve_chunks(index, texts, question)
|
107 |
+
question_time = time.time() - question_start
|
108 |
+
print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
|
109 |
all_chunks.update(top_chunks)
|
110 |
|
111 |
+
retrieval_time = time.time() - retrieval_start
|
112 |
+
timing_data['chunk_retrieval'] = round(retrieval_time, 2)
|
113 |
+
print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
|
114 |
+
print(f"Retrieved {len(all_chunks)} unique chunks")
|
115 |
+
|
116 |
+
# Time LLM processing
|
117 |
+
llm_start = time.time()
|
118 |
print(f"Processing all {len(request.questions)} questions in batch...")
|
119 |
response = query_gemini(request.questions, list(all_chunks))
|
120 |
+
llm_time = time.time() - llm_start
|
121 |
+
timing_data['llm_processing'] = round(llm_time, 2)
|
122 |
+
print(f"LLM Processing took: {llm_time:.2f} seconds")
|
123 |
|
124 |
+
# Time response processing
|
125 |
+
response_start = time.time()
|
126 |
# Extract answers from the JSON response
|
127 |
if isinstance(response, dict) and "answers" in response:
|
128 |
answers = response["answers"]
|
|
|
138 |
answers.append("Not Found")
|
139 |
answers = answers[:len(request.questions)]
|
140 |
|
141 |
+
response_time = time.time() - response_start
|
142 |
+
timing_data['response_processing'] = round(response_time, 2)
|
143 |
+
print(f"Response Processing took: {response_time:.2f} seconds")
|
144 |
print(f"Generated {len(answers)} answers")
|
145 |
+
|
146 |
+
# Calculate total time
|
147 |
+
total_time = time.time() - start_time
|
148 |
+
timing_data['total_time'] = round(total_time, 2)
|
149 |
+
timing_data['timestamp'] = datetime.now().isoformat()
|
150 |
+
|
151 |
+
print(f"\n=== TIMING BREAKDOWN ===")
|
152 |
+
print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
|
153 |
+
print(f"FAISS Index Building: {timing_data['faiss_index_building']}s")
|
154 |
+
print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
|
155 |
+
print(f"LLM Processing: {timing_data['llm_processing']}s")
|
156 |
+
print(f"Response Processing: {timing_data['response_processing']}s")
|
157 |
+
print(f"TOTAL TIME: {timing_data['total_time']}s")
|
158 |
+
print(f"=======================\n")
|
159 |
+
|
160 |
+
return {
|
161 |
+
"answers": answers
|
162 |
+
}
|
163 |
|
164 |
except Exception as e:
|
165 |
+
total_time = time.time() - start_time
|
166 |
+
print(f"Error after {total_time:.2f} seconds: {str(e)}")
|
167 |
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
168 |
|
169 |
@app.post("/api/v1/hackrx/local")
|
170 |
async def run_local_query(request: LocalQueryRequest):
|
171 |
+
start_time = time.time()
|
172 |
+
timing_data = {}
|
173 |
+
|
174 |
try:
|
175 |
print(f"Processing local document: {request.document_path}")
|
176 |
print(f"Processing {len(request.questions)} questions...")
|
177 |
|
178 |
+
# Time local PDF parsing
|
179 |
+
pdf_start = time.time()
|
180 |
text_chunks = parse_pdf_from_file(request.document_path)
|
181 |
+
pdf_time = time.time() - pdf_start
|
182 |
+
timing_data['pdf_parsing'] = round(pdf_time, 2)
|
183 |
+
print(f"Local PDF Parsing took: {pdf_time:.2f} seconds")
|
184 |
print(f"Extracted {len(text_chunks)} text chunks from local PDF")
|
185 |
|
186 |
+
# Time FAISS index building
|
187 |
+
index_start = time.time()
|
188 |
index, texts = build_faiss_index(text_chunks)
|
189 |
+
index_time = time.time() - index_start
|
190 |
+
timing_data['faiss_index_building'] = round(index_time, 2)
|
191 |
+
print(f"FAISS Index Building took: {index_time:.2f} seconds")
|
192 |
|
193 |
+
# Time chunk retrieval for all questions
|
194 |
+
retrieval_start = time.time()
|
195 |
all_chunks = set()
|
196 |
+
for i, question in enumerate(request.questions):
|
197 |
+
question_start = time.time()
|
198 |
top_chunks = retrieve_chunks(index, texts, question)
|
199 |
+
question_time = time.time() - question_start
|
200 |
+
print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
|
201 |
all_chunks.update(top_chunks)
|
202 |
|
203 |
+
retrieval_time = time.time() - retrieval_start
|
204 |
+
timing_data['chunk_retrieval'] = round(retrieval_time, 2)
|
205 |
+
print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
|
206 |
+
print(f"Retrieved {len(all_chunks)} unique chunks")
|
207 |
+
|
208 |
+
# Time LLM processing
|
209 |
+
llm_start = time.time()
|
210 |
print(f"Processing all {len(request.questions)} questions in batch...")
|
211 |
response = query_gemini(request.questions, list(all_chunks))
|
212 |
+
llm_time = time.time() - llm_start
|
213 |
+
timing_data['llm_processing'] = round(llm_time, 2)
|
214 |
+
print(f"LLM Processing took: {llm_time:.2f} seconds")
|
215 |
|
216 |
+
# Time response processing
|
217 |
+
response_start = time.time()
|
218 |
# Extract answers from the JSON response
|
219 |
if isinstance(response, dict) and "answers" in response:
|
220 |
answers = response["answers"]
|
|
|
230 |
answers.append("Not Found")
|
231 |
answers = answers[:len(request.questions)]
|
232 |
|
233 |
+
response_time = time.time() - response_start
|
234 |
+
timing_data['response_processing'] = round(response_time, 2)
|
235 |
+
print(f"Response Processing took: {response_time:.2f} seconds")
|
236 |
print(f"Generated {len(answers)} answers")
|
237 |
+
|
238 |
+
# Calculate total time
|
239 |
+
total_time = time.time() - start_time
|
240 |
+
timing_data['total_time'] = round(total_time, 2)
|
241 |
+
timing_data['timestamp'] = datetime.now().isoformat()
|
242 |
+
|
243 |
+
print(f"\n=== TIMING BREAKDOWN ===")
|
244 |
+
print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
|
245 |
+
print(f"FAISS Index Building: {timing_data['faiss_index_building']}s")
|
246 |
+
print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
|
247 |
+
print(f"LLM Processing: {timing_data['llm_processing']}s")
|
248 |
+
print(f"Response Processing: {timing_data['response_processing']}s")
|
249 |
+
print(f"TOTAL TIME: {timing_data['total_time']}s")
|
250 |
+
print(f"=======================\n")
|
251 |
+
|
252 |
+
return {
|
253 |
+
"answers": answers
|
254 |
+
}
|
255 |
|
256 |
except Exception as e:
|
257 |
+
total_time = time.time() - start_time
|
258 |
+
print(f"Error after {total_time:.2f} seconds: {str(e)}")
|
259 |
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
260 |
|
261 |
if __name__ == "__main__":
|
embedder.py
CHANGED
@@ -2,6 +2,7 @@ import faiss
|
|
2 |
from sentence_transformers import SentenceTransformer
|
3 |
import numpy as np
|
4 |
import os
|
|
|
5 |
|
6 |
# Set up cache directory in a writable location
|
7 |
cache_dir = os.path.join(os.getcwd(), ".cache")
|
@@ -12,26 +13,63 @@ os.environ['TRANSFORMERS_CACHE'] = cache_dir
|
|
12 |
# Initialize model as None - will be loaded lazily
|
13 |
_model = None
|
14 |
|
15 |
-
def
|
16 |
-
"""
|
17 |
global _model
|
18 |
if _model is None:
|
|
|
|
|
19 |
try:
|
20 |
_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
|
|
|
|
|
21 |
except Exception as e:
|
22 |
print(f"Error loading model: {e}")
|
23 |
# Fallback to a different model if the first one fails
|
24 |
try:
|
25 |
_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
|
|
|
|
|
26 |
except Exception as e2:
|
27 |
print(f"Error loading fallback model: {e2}")
|
28 |
raise
|
29 |
return _model
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def build_faiss_index(chunks):
|
|
|
|
|
|
|
|
|
|
|
32 |
model = get_model()
|
|
|
|
|
|
|
|
|
|
|
33 |
embeddings = model.encode(chunks)
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
dimension = embeddings.shape[1]
|
35 |
index = faiss.IndexFlatL2(dimension)
|
36 |
index.add(np.array(embeddings))
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
return index, chunks
|
|
|
2 |
from sentence_transformers import SentenceTransformer
|
3 |
import numpy as np
|
4 |
import os
|
5 |
+
import time
|
6 |
|
7 |
# Set up cache directory in a writable location
|
8 |
cache_dir = os.path.join(os.getcwd(), ".cache")
|
|
|
13 |
# Initialize model as None - will be loaded lazily
|
14 |
_model = None
|
15 |
|
16 |
+
def preload_model():
|
17 |
+
"""Preload the sentence transformer model at startup"""
|
18 |
global _model
|
19 |
if _model is None:
|
20 |
+
model_start = time.time()
|
21 |
+
print("Preloading sentence transformer model...")
|
22 |
try:
|
23 |
_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
|
24 |
+
model_time = time.time() - model_start
|
25 |
+
print(f"Model preloading completed in {model_time:.2f} seconds")
|
26 |
except Exception as e:
|
27 |
print(f"Error loading model: {e}")
|
28 |
# Fallback to a different model if the first one fails
|
29 |
try:
|
30 |
_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
|
31 |
+
model_time = time.time() - model_start
|
32 |
+
print(f"Fallback model preloading completed in {model_time:.2f} seconds")
|
33 |
except Exception as e2:
|
34 |
print(f"Error loading fallback model: {e2}")
|
35 |
raise
|
36 |
return _model
|
37 |
|
38 |
+
def get_model():
|
39 |
+
"""Get the sentence transformer model, loading it lazily if needed"""
|
40 |
+
global _model
|
41 |
+
if _model is None:
|
42 |
+
# If model is not preloaded, load it now (should not happen in production)
|
43 |
+
print("Warning: Model not preloaded, loading now...")
|
44 |
+
return preload_model()
|
45 |
+
return _model
|
46 |
+
|
47 |
def build_faiss_index(chunks):
|
48 |
+
start_time = time.time()
|
49 |
+
print(f"Building FAISS index for {len(chunks)} chunks...")
|
50 |
+
|
51 |
+
# Time model retrieval (should be instant now)
|
52 |
+
model_start = time.time()
|
53 |
model = get_model()
|
54 |
+
model_time = time.time() - model_start
|
55 |
+
print(f"Model retrieval took: {model_time:.3f} seconds")
|
56 |
+
|
57 |
+
# Time embedding generation
|
58 |
+
embed_start = time.time()
|
59 |
embeddings = model.encode(chunks)
|
60 |
+
embed_time = time.time() - embed_start
|
61 |
+
print(f"Embedding generation took: {embed_time:.2f} seconds")
|
62 |
+
print(f"Generated embeddings shape: {embeddings.shape}")
|
63 |
+
|
64 |
+
# Time FAISS index creation
|
65 |
+
index_start = time.time()
|
66 |
dimension = embeddings.shape[1]
|
67 |
index = faiss.IndexFlatL2(dimension)
|
68 |
index.add(np.array(embeddings))
|
69 |
+
index_time = time.time() - index_start
|
70 |
+
print(f"FAISS index creation took: {index_time:.2f} seconds")
|
71 |
+
|
72 |
+
total_time = time.time() - start_time
|
73 |
+
print(f"Total FAISS index building took: {total_time:.2f} seconds")
|
74 |
+
|
75 |
return index, chunks
|
llm.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import google.generativeai as genai
|
2 |
import os
|
3 |
import json
|
|
|
4 |
from dotenv import load_dotenv
|
5 |
load_dotenv()
|
6 |
|
@@ -12,78 +13,79 @@ print(f"Google API Key loaded: {api_key[:10]}..." if api_key else "No API key fo
|
|
12 |
genai.configure(api_key=api_key)
|
13 |
|
14 |
def query_gemini(questions, contexts):
|
|
|
|
|
|
|
15 |
try:
|
|
|
|
|
16 |
context = "\n\n".join(contexts)
|
|
|
|
|
|
|
17 |
|
|
|
|
|
18 |
# Create a numbered list of questions
|
19 |
questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
|
20 |
|
21 |
-
prompt = f"""
|
|
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
5. Answer each question in the order provided.
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
{context}
|
32 |
|
33 |
-
|
34 |
{questions_text}
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
-Old_Ai_Response_Format
|
39 |
-
{{
|
40 |
-
"answers": [
|
41 |
-
"The grace period for premium payment is thirty days.",
|
42 |
-
"Expenses related to the treatment of a Pre-Existing Disease (PED) and its direct complications shall be excluded until the expiry of thirty six (36) months of continuous coverage after the date of inception of the first policy.",
|
43 |
-
"Yes, the company shall indemnify Maternity Expenses as described in section 3.1.14 for any female Insured Person, and also Pre-Natal and Post-Natal Hospitalisation expenses per delivery, including expenses for necessary vaccination for New Born Baby, subject to the limit as shown in the Table of Benefits. The female Insured Person should have been continuously covered for at least 24 months before availing this benefit.",
|
44 |
-
"Cataract surgery has a waiting period of two years.",
|
45 |
-
"Yes, the Company shall indemnify the Medical Expenses incurred in respect of an organ donor’s Hospitalisation during the Policy Period for harvesting of the organ donated to an Insured Person, provided that certain conditions are met as outlined in section 3.1.7.",
|
46 |
-
"On renewal of policies with a term of one year, a NCD of flat 5% shall be allowed on the * base premium, provided claims are not reported in the expiring Policy.\nOn renewal of policies with a term exceeding one year, the NCD amount with respect to each claim free policy year shall be aggregated and allowed on renewal. Aggregate amount of NCD allowed shall not exceed flat 5% of the total base premium for the term of the policy.",
|
47 |
-
"Yes, expenses of health check up shall be reimbursed (irrespective of past claims) at the end of a block of two continuous policy years, provided the Policy has been continuously renewed with the Company without a break. Expenses payable are subject to the limit stated in the Table of Benefits.",
|
48 |
-
"Hospital means any institution established for in-patient care and day care treatment of disease/ injuries and which has been registered as a hospital with the local authorities under the Clinical Establishments (Registration and Regulation) Act, 2010 or under the enactments specified under Schedule of Section 56(1) of the said Act, OR complies with all minimum criteria as under:\ni. has qualified nursing staff under its employment round the clock;\nii. has at least ten inpatient beds, in those towns having a population of less than ten lacs and fifteen inpatient beds in all other places;\niii. has qualified medical practitioner (s) in charge round the clock;\niv. has a fully equipped operation theatre of its own where surgical procedures are carried out \nv. maintains daily records of patients and shall make these accessible to the Company’s authorized personnel.",
|
49 |
-
"The Company shall indemnify Medical Expenses incurred for Inpatient Care treatment under Ayurveda, Yoga and Naturopathy, Unani, Siddha and Homeopathy systems of medicines during each Policy Period up to the limit of Sum Insured as specified in the Policy Schedule in any AYUSH Hospital.",
|
50 |
-
"For Plan A, Room Charges are limited to Up to 1% of SI or actual, whichever is lower and ICU Charges are limited to Up to 2% of SI or actual, whichever is lower, per day per insured person."
|
51 |
-
]
|
52 |
-
}}
|
53 |
-
|
54 |
-
-New_Ai_Response_Format_And Wordings Given Like
|
55 |
-
{{
|
56 |
-
"answers": [
|
57 |
-
"A grace period of thirty days is provided for premium payment after the due date to renew or continue the policy without losing continuity benefits.",
|
58 |
-
"There is a waiting period of thirty-six (36) months of continuous coverage from the first policy inception for pre-existing diseases and their direct complications to be covered.",
|
59 |
-
"Yes, the policy covers maternity expenses, including childbirth and lawful medical termination of pregnancy. To be eligible, the female insured person must have been continuously covered for at least 24 months. The benefit is limited to two deliveries or terminations during the policy period.",
|
60 |
-
"The policy has a specific waiting period of two (2) years for cataract surgery.",
|
61 |
-
"Yes, the policy indemnifies the medical expenses for the organ donor's hospitalization for the purpose of harvesting the organ, provided the organ is for an insured person and the donation complies with the Transplantation of Human Organs Act, 1994.",
|
62 |
-
"A No Claim Discount of 5% on the base premium is offered on renewal for a one-year policy term if no claims were made in the preceding year. The maximum aggregate NCD is capped at 5% of the total base premium.",
|
63 |
-
"Yes, the policy reimburses expenses for health check-ups at the end of every block of two continuous policy years, provided the policy has been renewed without a break. The amount is subject to the limits specified in the Table of Benefits.",
|
64 |
-
"A hospital is defined as an institution with at least 10 inpatient beds (in towns with a population below ten lakhs) or 15 beds (in all other places), with qualified nursing staff and medical practitioners available 24/7, a fully equipped operation theatre, and which maintains daily records of patients.",
|
65 |
-
"The policy covers medical expenses for inpatient treatment under Ayurveda, Yoga, Naturopathy, Unani, Siddha, and Homeopathy systems up to the Sum Insured limit, provided the treatment is taken in an AYUSH Hospital.",
|
66 |
-
"Yes, for Plan A, the daily room rent is capped at 1% of the Sum Insured, and ICU charges are capped at 2% of the Sum Insured. These limits do not apply if the treatment is for a listed procedure in a Preferred Provider Network (PPN)."
|
67 |
-
]
|
68 |
-
}}
|
69 |
|
70 |
-
## The Above Is Reference How Can Give Output Wordings Back To The Question is Given For References
|
71 |
|
72 |
-
Return your response in this exact JSON format:
|
73 |
-
{{
|
74 |
-
"answers": [
|
75 |
-
"Answer to question 1",
|
76 |
-
"Answer to question 2",
|
77 |
-
"Answer to question 3",
|
78 |
-
...
|
79 |
-
]
|
80 |
-
}}
|
81 |
|
82 |
-
Ensure each answer is comprehensive and directly addresses the corresponding question. If information is not found in the context for any question, respond with "Not Found" for that question."""
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
85 |
response = model.generate_content(prompt)
|
|
|
|
|
|
|
|
|
|
|
86 |
response_text = response.text.strip()
|
|
|
87 |
|
88 |
# Try to parse the response as JSON
|
89 |
try:
|
@@ -94,12 +96,25 @@ Ensure each answer is comprehensive and directly addresses the corresponding que
|
|
94 |
response_text = response_text.replace("```", "").strip()
|
95 |
|
96 |
parsed_response = json.loads(response_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
return parsed_response
|
98 |
except json.JSONDecodeError:
|
99 |
# If JSON parsing fails, return a structured response
|
|
|
|
|
100 |
print(f"Failed to parse JSON response: {response_text}")
|
|
|
|
|
|
|
|
|
101 |
return {"answers": ["Error parsing response"] * len(questions)}
|
102 |
|
103 |
except Exception as e:
|
104 |
-
|
|
|
105 |
return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}
|
|
|
1 |
import google.generativeai as genai
|
2 |
import os
|
3 |
import json
|
4 |
+
import time
|
5 |
from dotenv import load_dotenv
|
6 |
load_dotenv()
|
7 |
|
|
|
13 |
genai.configure(api_key=api_key)
|
14 |
|
15 |
def query_gemini(questions, contexts):
|
16 |
+
start_time = time.time()
|
17 |
+
print(f"Starting LLM processing for {len(questions)} questions with {len(contexts)} context chunks")
|
18 |
+
|
19 |
try:
|
20 |
+
# Time context preparation
|
21 |
+
context_start = time.time()
|
22 |
context = "\n\n".join(contexts)
|
23 |
+
context_time = time.time() - context_start
|
24 |
+
print(f"Context preparation took: {context_time:.2f} seconds")
|
25 |
+
print(f"Total context length: {len(context)} characters")
|
26 |
|
27 |
+
# Time prompt preparation
|
28 |
+
prompt_start = time.time()
|
29 |
# Create a numbered list of questions
|
30 |
questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
|
31 |
|
32 |
+
prompt = f"""
|
33 |
+
You are an intelligent insurance assistant trained to answer questions using insurance documents. Based on the context provided below, respond to each question with a **well-informed, complete, and professionally worded answer**.
|
34 |
|
35 |
+
🎯 SCORING & OUTPUT GOAL:
|
36 |
+
- Responses are part of an evaluated system.
|
37 |
+
- Each answer should be **accurate**, **complete**, and **well-phrased** — ideally around **1–2 full sentences**.
|
38 |
+
- Avoid short/fragmented answers or long multi-paragraph explanations.
|
39 |
+
- Always write like an insurance advisor addressing a customer clearly.
|
|
|
40 |
|
41 |
+
📘 INSTRUCTIONS:
|
42 |
+
1. **Only use the provided context** to answer each question. If the answer is not found, respond with exactly: `"Not Found"`.
|
43 |
+
2. Keep answers concise **but not vague**. Include all **key points** (such as limits, durations, conditions) in one or two complete sentences.
|
44 |
+
3. DO NOT use bullet points, partial phrases, or excessive legal text. DO NOT repeat the question in the answer.
|
45 |
+
4. Match the tone and format of these examples:
|
46 |
+
- "A grace period of thirty days is provided for premium payment after the due date to renew or continue the policy without losing continuity benefits."
|
47 |
+
- "Yes, the policy covers maternity expenses, including childbirth and lawful medical termination of pregnancy. To be eligible, the female insured person must have been continuously covered for at least 24 months. The benefit is limited to two deliveries or terminations during the policy period."
|
48 |
+
- "Yes, the policy indemnifies the medical expenses for the organ donor's hospitalization for the purpose of harvesting the organ, provided the organ is for an insured person and the donation complies with the Transplantation of Human Organs Act, 1994."
|
49 |
+
- "Not Found"
|
50 |
+
|
51 |
+
📤 RETURN FORMAT:
|
52 |
+
Respond strictly using this JSON structure:
|
53 |
+
|
54 |
+
{{
|
55 |
+
"answers": [
|
56 |
+
"Answer to question 1",
|
57 |
+
"Answer to question 2",
|
58 |
+
...
|
59 |
+
]
|
60 |
+
}}
|
61 |
+
|
62 |
+
📚 CONTEXT:
|
63 |
{context}
|
64 |
|
65 |
+
❓ QUESTIONS:
|
66 |
{questions_text}
|
67 |
|
68 |
+
Your task: Provide accurate, refined answers based on the document context above. Use the tone and structure shown. Be concise but thorough. Only include what is supported in the context. Use "Not Found" if the answer is missing.
|
69 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
|
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
|
|
73 |
|
74 |
+
prompt_time = time.time() - prompt_start
|
75 |
+
print(f"Prompt preparation took: {prompt_time:.2f} seconds")
|
76 |
+
print(f"Total prompt length: {len(prompt)} characters")
|
77 |
+
|
78 |
+
# Time model initialization and API call
|
79 |
+
api_start = time.time()
|
80 |
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
81 |
response = model.generate_content(prompt)
|
82 |
+
api_time = time.time() - api_start
|
83 |
+
print(f"Gemini API call took: {api_time:.2f} seconds")
|
84 |
+
|
85 |
+
# Time response processing
|
86 |
+
process_start = time.time()
|
87 |
response_text = response.text.strip()
|
88 |
+
print(f"Raw response length: {len(response_text)} characters")
|
89 |
|
90 |
# Try to parse the response as JSON
|
91 |
try:
|
|
|
96 |
response_text = response_text.replace("```", "").strip()
|
97 |
|
98 |
parsed_response = json.loads(response_text)
|
99 |
+
process_time = time.time() - process_start
|
100 |
+
print(f"Response processing took: {process_time:.2f} seconds")
|
101 |
+
|
102 |
+
total_time = time.time() - start_time
|
103 |
+
print(f"Total LLM processing took: {total_time:.2f} seconds")
|
104 |
+
|
105 |
return parsed_response
|
106 |
except json.JSONDecodeError:
|
107 |
# If JSON parsing fails, return a structured response
|
108 |
+
process_time = time.time() - process_start
|
109 |
+
print(f"Response processing took: {process_time:.2f} seconds (JSON parsing failed)")
|
110 |
print(f"Failed to parse JSON response: {response_text}")
|
111 |
+
|
112 |
+
total_time = time.time() - start_time
|
113 |
+
print(f"Total LLM processing took: {total_time:.2f} seconds")
|
114 |
+
|
115 |
return {"answers": ["Error parsing response"] * len(questions)}
|
116 |
|
117 |
except Exception as e:
|
118 |
+
total_time = time.time() - start_time
|
119 |
+
print(f"Error in query_gemini after {total_time:.2f} seconds: {str(e)}")
|
120 |
return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}
|
main.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import os
|
2 |
import warnings
|
3 |
import logging
|
|
|
|
|
4 |
|
5 |
# Suppress TensorFlow warnings
|
6 |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
@@ -16,7 +18,7 @@ from fastapi import FastAPI, Request, HTTPException, Depends, Header
|
|
16 |
from fastapi.middleware.cors import CORSMiddleware
|
17 |
from pydantic import BaseModel
|
18 |
from parser import parse_pdf_from_url, parse_pdf_from_file
|
19 |
-
from embedder import build_faiss_index
|
20 |
from retriever import retrieve_chunks
|
21 |
from llm import query_gemini
|
22 |
import uvicorn
|
@@ -32,6 +34,14 @@ app.add_middleware(
|
|
32 |
allow_headers=["*"],
|
33 |
)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
@app.get("/")
|
36 |
async def root():
|
37 |
return {"message": "HackRx Insurance Policy Assistant API is running!"}
|
@@ -61,24 +71,52 @@ def verify_token(authorization: str = Header(None)):
|
|
61 |
|
62 |
@app.post("/api/v1/hackrx/run")
|
63 |
async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
|
|
|
|
|
|
64 |
try:
|
65 |
print(f"Processing {len(request.questions)} questions...")
|
66 |
|
|
|
|
|
67 |
text_chunks = parse_pdf_from_url(request.documents)
|
|
|
|
|
|
|
68 |
print(f"Extracted {len(text_chunks)} text chunks from PDF")
|
69 |
|
|
|
|
|
70 |
index, texts = build_faiss_index(text_chunks)
|
|
|
|
|
|
|
71 |
|
72 |
-
#
|
|
|
73 |
all_chunks = set()
|
74 |
-
for question in request.questions:
|
|
|
75 |
top_chunks = retrieve_chunks(index, texts, question)
|
|
|
|
|
76 |
all_chunks.update(top_chunks)
|
77 |
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
print(f"Processing all {len(request.questions)} questions in batch...")
|
80 |
response = query_gemini(request.questions, list(all_chunks))
|
|
|
|
|
|
|
81 |
|
|
|
|
|
82 |
# Extract answers from the JSON response
|
83 |
if isinstance(response, dict) and "answers" in response:
|
84 |
answers = response["answers"]
|
@@ -94,35 +132,83 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
|
94 |
answers.append("Not Found")
|
95 |
answers = answers[:len(request.questions)]
|
96 |
|
|
|
|
|
|
|
97 |
print(f"Generated {len(answers)} answers")
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
except Exception as e:
|
101 |
-
|
|
|
102 |
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
103 |
|
104 |
@app.post("/api/v1/hackrx/local")
|
105 |
async def run_local_query(request: LocalQueryRequest):
|
|
|
|
|
|
|
106 |
try:
|
107 |
print(f"Processing local document: {request.document_path}")
|
108 |
print(f"Processing {len(request.questions)} questions...")
|
109 |
|
110 |
-
#
|
|
|
111 |
text_chunks = parse_pdf_from_file(request.document_path)
|
|
|
|
|
|
|
112 |
print(f"Extracted {len(text_chunks)} text chunks from local PDF")
|
113 |
|
|
|
|
|
114 |
index, texts = build_faiss_index(text_chunks)
|
|
|
|
|
|
|
115 |
|
116 |
-
#
|
|
|
117 |
all_chunks = set()
|
118 |
-
for question in request.questions:
|
|
|
119 |
top_chunks = retrieve_chunks(index, texts, question)
|
|
|
|
|
120 |
all_chunks.update(top_chunks)
|
121 |
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
print(f"Processing all {len(request.questions)} questions in batch...")
|
124 |
response = query_gemini(request.questions, list(all_chunks))
|
|
|
|
|
|
|
125 |
|
|
|
|
|
126 |
# Extract answers from the JSON response
|
127 |
if isinstance(response, dict) and "answers" in response:
|
128 |
answers = response["answers"]
|
@@ -138,11 +224,32 @@ async def run_local_query(request: LocalQueryRequest):
|
|
138 |
answers.append("Not Found")
|
139 |
answers = answers[:len(request.questions)]
|
140 |
|
|
|
|
|
|
|
141 |
print(f"Generated {len(answers)} answers")
|
142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
except Exception as e:
|
145 |
-
|
|
|
146 |
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
147 |
|
148 |
if __name__ == "__main__":
|
|
|
1 |
import os
|
2 |
import warnings
|
3 |
import logging
|
4 |
+
import time
|
5 |
+
from datetime import datetime
|
6 |
|
7 |
# Suppress TensorFlow warnings
|
8 |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
|
|
18 |
from fastapi.middleware.cors import CORSMiddleware
|
19 |
from pydantic import BaseModel
|
20 |
from parser import parse_pdf_from_url, parse_pdf_from_file
|
21 |
+
from embedder import build_faiss_index, preload_model
|
22 |
from retriever import retrieve_chunks
|
23 |
from llm import query_gemini
|
24 |
import uvicorn
|
|
|
34 |
allow_headers=["*"],
|
35 |
)
|
36 |
|
37 |
+
# Preload the model at startup
|
38 |
+
@app.on_event("startup")
|
39 |
+
async def startup_event():
|
40 |
+
print("Starting up HackRx Insurance Policy Assistant...")
|
41 |
+
print("Preloading sentence transformer model...")
|
42 |
+
preload_model()
|
43 |
+
print("Model preloading completed. API is ready to serve requests!")
|
44 |
+
|
45 |
@app.get("/")
|
46 |
async def root():
|
47 |
return {"message": "HackRx Insurance Policy Assistant API is running!"}
|
|
|
71 |
|
72 |
@app.post("/api/v1/hackrx/run")
|
73 |
async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
|
74 |
+
start_time = time.time()
|
75 |
+
timing_data = {}
|
76 |
+
|
77 |
try:
|
78 |
print(f"Processing {len(request.questions)} questions...")
|
79 |
|
80 |
+
# Time PDF parsing
|
81 |
+
pdf_start = time.time()
|
82 |
text_chunks = parse_pdf_from_url(request.documents)
|
83 |
+
pdf_time = time.time() - pdf_start
|
84 |
+
timing_data['pdf_parsing'] = round(pdf_time, 2)
|
85 |
+
print(f"PDF Parsing took: {pdf_time:.2f} seconds")
|
86 |
print(f"Extracted {len(text_chunks)} text chunks from PDF")
|
87 |
|
88 |
+
# Time FAISS index building
|
89 |
+
index_start = time.time()
|
90 |
index, texts = build_faiss_index(text_chunks)
|
91 |
+
index_time = time.time() - index_start
|
92 |
+
timing_data['faiss_index_building'] = round(index_time, 2)
|
93 |
+
print(f"FAISS Index Building took: {index_time:.2f} seconds")
|
94 |
|
95 |
+
# Time chunk retrieval for all questions
|
96 |
+
retrieval_start = time.time()
|
97 |
all_chunks = set()
|
98 |
+
for i, question in enumerate(request.questions):
|
99 |
+
question_start = time.time()
|
100 |
top_chunks = retrieve_chunks(index, texts, question)
|
101 |
+
question_time = time.time() - question_start
|
102 |
+
print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
|
103 |
all_chunks.update(top_chunks)
|
104 |
|
105 |
+
retrieval_time = time.time() - retrieval_start
|
106 |
+
timing_data['chunk_retrieval'] = round(retrieval_time, 2)
|
107 |
+
print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
|
108 |
+
print(f"Retrieved {len(all_chunks)} unique chunks")
|
109 |
+
|
110 |
+
# Time LLM processing
|
111 |
+
llm_start = time.time()
|
112 |
print(f"Processing all {len(request.questions)} questions in batch...")
|
113 |
response = query_gemini(request.questions, list(all_chunks))
|
114 |
+
llm_time = time.time() - llm_start
|
115 |
+
timing_data['llm_processing'] = round(llm_time, 2)
|
116 |
+
print(f"LLM Processing took: {llm_time:.2f} seconds")
|
117 |
|
118 |
+
# Time response processing
|
119 |
+
response_start = time.time()
|
120 |
# Extract answers from the JSON response
|
121 |
if isinstance(response, dict) and "answers" in response:
|
122 |
answers = response["answers"]
|
|
|
132 |
answers.append("Not Found")
|
133 |
answers = answers[:len(request.questions)]
|
134 |
|
135 |
+
response_time = time.time() - response_start
|
136 |
+
timing_data['response_processing'] = round(response_time, 2)
|
137 |
+
print(f"Response Processing took: {response_time:.2f} seconds")
|
138 |
print(f"Generated {len(answers)} answers")
|
139 |
+
|
140 |
+
# Calculate total time
|
141 |
+
total_time = time.time() - start_time
|
142 |
+
timing_data['total_time'] = round(total_time, 2)
|
143 |
+
timing_data['timestamp'] = datetime.now().isoformat()
|
144 |
+
|
145 |
+
print(f"\n=== TIMING BREAKDOWN ===")
|
146 |
+
print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
|
147 |
+
print(f"FAISS Index Building: {timing_data['faiss_index_building']}s")
|
148 |
+
print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
|
149 |
+
print(f"LLM Processing: {timing_data['llm_processing']}s")
|
150 |
+
print(f"Response Processing: {timing_data['response_processing']}s")
|
151 |
+
print(f"TOTAL TIME: {timing_data['total_time']}s")
|
152 |
+
print(f"=======================\n")
|
153 |
+
|
154 |
+
return {
|
155 |
+
"answers": answers
|
156 |
+
}
|
157 |
|
158 |
except Exception as e:
|
159 |
+
total_time = time.time() - start_time
|
160 |
+
print(f"Error after {total_time:.2f} seconds: {str(e)}")
|
161 |
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
162 |
|
163 |
@app.post("/api/v1/hackrx/local")
|
164 |
async def run_local_query(request: LocalQueryRequest):
|
165 |
+
start_time = time.time()
|
166 |
+
timing_data = {}
|
167 |
+
|
168 |
try:
|
169 |
print(f"Processing local document: {request.document_path}")
|
170 |
print(f"Processing {len(request.questions)} questions...")
|
171 |
|
172 |
+
# Time local PDF parsing
|
173 |
+
pdf_start = time.time()
|
174 |
text_chunks = parse_pdf_from_file(request.document_path)
|
175 |
+
pdf_time = time.time() - pdf_start
|
176 |
+
timing_data['pdf_parsing'] = round(pdf_time, 2)
|
177 |
+
print(f"Local PDF Parsing took: {pdf_time:.2f} seconds")
|
178 |
print(f"Extracted {len(text_chunks)} text chunks from local PDF")
|
179 |
|
180 |
+
# Time FAISS index building
|
181 |
+
index_start = time.time()
|
182 |
index, texts = build_faiss_index(text_chunks)
|
183 |
+
index_time = time.time() - index_start
|
184 |
+
timing_data['faiss_index_building'] = round(index_time, 2)
|
185 |
+
print(f"FAISS Index Building took: {index_time:.2f} seconds")
|
186 |
|
187 |
+
# Time chunk retrieval for all questions
|
188 |
+
retrieval_start = time.time()
|
189 |
all_chunks = set()
|
190 |
+
for i, question in enumerate(request.questions):
|
191 |
+
question_start = time.time()
|
192 |
top_chunks = retrieve_chunks(index, texts, question)
|
193 |
+
question_time = time.time() - question_start
|
194 |
+
print(f"Question {i+1} retrieval took: {question_time:.2f} seconds")
|
195 |
all_chunks.update(top_chunks)
|
196 |
|
197 |
+
retrieval_time = time.time() - retrieval_start
|
198 |
+
timing_data['chunk_retrieval'] = round(retrieval_time, 2)
|
199 |
+
print(f"Total Chunk Retrieval took: {retrieval_time:.2f} seconds")
|
200 |
+
print(f"Retrieved {len(all_chunks)} unique chunks")
|
201 |
+
|
202 |
+
# Time LLM processing
|
203 |
+
llm_start = time.time()
|
204 |
print(f"Processing all {len(request.questions)} questions in batch...")
|
205 |
response = query_gemini(request.questions, list(all_chunks))
|
206 |
+
llm_time = time.time() - llm_start
|
207 |
+
timing_data['llm_processing'] = round(llm_time, 2)
|
208 |
+
print(f"LLM Processing took: {llm_time:.2f} seconds")
|
209 |
|
210 |
+
# Time response processing
|
211 |
+
response_start = time.time()
|
212 |
# Extract answers from the JSON response
|
213 |
if isinstance(response, dict) and "answers" in response:
|
214 |
answers = response["answers"]
|
|
|
224 |
answers.append("Not Found")
|
225 |
answers = answers[:len(request.questions)]
|
226 |
|
227 |
+
response_time = time.time() - response_start
|
228 |
+
timing_data['response_processing'] = round(response_time, 2)
|
229 |
+
print(f"Response Processing took: {response_time:.2f} seconds")
|
230 |
print(f"Generated {len(answers)} answers")
|
231 |
+
|
232 |
+
# Calculate total time
|
233 |
+
total_time = time.time() - start_time
|
234 |
+
timing_data['total_time'] = round(total_time, 2)
|
235 |
+
timing_data['timestamp'] = datetime.now().isoformat()
|
236 |
+
|
237 |
+
print(f"\n=== TIMING BREAKDOWN ===")
|
238 |
+
print(f"PDF Parsing: {timing_data['pdf_parsing']}s")
|
239 |
+
print(f"FAISS Index Building: {timing_data['faiss_index_building']}s")
|
240 |
+
print(f"Chunk Retrieval: {timing_data['chunk_retrieval']}s")
|
241 |
+
print(f"LLM Processing: {timing_data['llm_processing']}s")
|
242 |
+
print(f"Response Processing: {timing_data['response_processing']}s")
|
243 |
+
print(f"TOTAL TIME: {timing_data['total_time']}s")
|
244 |
+
print(f"=======================\n")
|
245 |
+
|
246 |
+
return {
|
247 |
+
"answers": answers
|
248 |
+
}
|
249 |
|
250 |
except Exception as e:
|
251 |
+
total_time = time.time() - start_time
|
252 |
+
print(f"Error after {total_time:.2f} seconds: {str(e)}")
|
253 |
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
254 |
|
255 |
if __name__ == "__main__":
|
parser.py
CHANGED
@@ -1,19 +1,37 @@
|
|
1 |
import fitz # PyMuPDF
|
2 |
import requests
|
3 |
from io import BytesIO
|
|
|
4 |
|
5 |
def parse_pdf_from_url(url):
|
|
|
|
|
|
|
|
|
6 |
res = requests.get(url)
|
|
|
|
|
|
|
|
|
7 |
doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
|
8 |
chunks = []
|
9 |
for page in doc:
|
10 |
text = page.get_text()
|
11 |
if text.strip():
|
12 |
chunks.append(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
return chunks
|
14 |
|
15 |
def parse_pdf_from_file(file_path):
|
16 |
"""Parse a local PDF file and extract text chunks"""
|
|
|
|
|
|
|
17 |
try:
|
18 |
doc = fitz.open(file_path)
|
19 |
chunks = []
|
@@ -22,6 +40,11 @@ def parse_pdf_from_file(file_path):
|
|
22 |
if text.strip():
|
23 |
chunks.append(text)
|
24 |
doc.close()
|
|
|
|
|
|
|
25 |
return chunks
|
26 |
except Exception as e:
|
|
|
|
|
27 |
raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")
|
|
|
1 |
import fitz # PyMuPDF
|
2 |
import requests
|
3 |
from io import BytesIO
|
4 |
+
import time
|
5 |
|
6 |
def parse_pdf_from_url(url):
|
7 |
+
start_time = time.time()
|
8 |
+
print(f"Starting PDF download and parsing from URL...")
|
9 |
+
|
10 |
+
download_start = time.time()
|
11 |
res = requests.get(url)
|
12 |
+
download_time = time.time() - download_start
|
13 |
+
print(f"PDF Download took: {download_time:.2f} seconds")
|
14 |
+
|
15 |
+
parse_start = time.time()
|
16 |
doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
|
17 |
chunks = []
|
18 |
for page in doc:
|
19 |
text = page.get_text()
|
20 |
if text.strip():
|
21 |
chunks.append(text)
|
22 |
+
doc.close()
|
23 |
+
parse_time = time.time() - parse_start
|
24 |
+
print(f"PDF Text Extraction took: {parse_time:.2f} seconds")
|
25 |
+
|
26 |
+
total_time = time.time() - start_time
|
27 |
+
print(f"Total PDF parsing from URL took: {total_time:.2f} seconds")
|
28 |
return chunks
|
29 |
|
30 |
def parse_pdf_from_file(file_path):
|
31 |
"""Parse a local PDF file and extract text chunks"""
|
32 |
+
start_time = time.time()
|
33 |
+
print(f"Starting PDF parsing from local file: {file_path}")
|
34 |
+
|
35 |
try:
|
36 |
doc = fitz.open(file_path)
|
37 |
chunks = []
|
|
|
40 |
if text.strip():
|
41 |
chunks.append(text)
|
42 |
doc.close()
|
43 |
+
|
44 |
+
total_time = time.time() - start_time
|
45 |
+
print(f"Total PDF parsing from file took: {total_time:.2f} seconds")
|
46 |
return chunks
|
47 |
except Exception as e:
|
48 |
+
total_time = time.time() - start_time
|
49 |
+
print(f"Error parsing PDF file after {total_time:.2f} seconds: {str(e)}")
|
50 |
raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")
|
retriever.py
CHANGED
@@ -1,9 +1,34 @@
|
|
1 |
from sentence_transformers import SentenceTransformer
|
2 |
import numpy as np
|
|
|
|
|
3 |
|
4 |
-
model
|
5 |
-
|
6 |
def retrieve_chunks(index, texts, query, k=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
query_vec = model.encode([query])
|
|
|
|
|
|
|
|
|
|
|
8 |
distances, indices = index.search(np.array(query_vec), k)
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from sentence_transformers import SentenceTransformer
|
2 |
import numpy as np
|
3 |
+
import time
|
4 |
+
from embedder import get_model
|
5 |
|
6 |
+
# Use the preloaded model from embedder instead of creating a new instance
|
|
|
7 |
def retrieve_chunks(index, texts, query, k=5):
|
8 |
+
start_time = time.time()
|
9 |
+
print(f"Retrieving chunks for query: '{query[:50]}...'")
|
10 |
+
|
11 |
+
# Time query embedding
|
12 |
+
embed_start = time.time()
|
13 |
+
model = get_model() # Use the preloaded model
|
14 |
query_vec = model.encode([query])
|
15 |
+
embed_time = time.time() - embed_start
|
16 |
+
print(f"Query embedding took: {embed_time:.3f} seconds")
|
17 |
+
|
18 |
+
# Time FAISS search
|
19 |
+
search_start = time.time()
|
20 |
distances, indices = index.search(np.array(query_vec), k)
|
21 |
+
search_time = time.time() - search_start
|
22 |
+
print(f"FAISS search took: {search_time:.3f} seconds")
|
23 |
+
|
24 |
+
# Time result processing
|
25 |
+
process_start = time.time()
|
26 |
+
results = [texts[i] for i in indices[0]]
|
27 |
+
process_time = time.time() - process_start
|
28 |
+
print(f"Result processing took: {process_time:.3f} seconds")
|
29 |
+
|
30 |
+
total_time = time.time() - start_time
|
31 |
+
print(f"Total chunk retrieval took: {total_time:.3f} seconds")
|
32 |
+
print(f"Retrieved {len(results)} chunks")
|
33 |
+
|
34 |
+
return results
|