Spaces:
Running
Running
Rivalcoder
commited on
Commit
Β·
afd28fa
1
Parent(s):
9fc012d
Add Good Prompt
Browse files- embedder.py +3 -10
- llm.py +14 -11
- retriever.py +9 -10
embedder.py
CHANGED
@@ -3,22 +3,19 @@ import numpy as np
|
|
3 |
import os
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
|
6 |
-
# Use a local cache for transformer downloads
|
7 |
cache_dir = os.path.join(os.getcwd(), ".cache")
|
8 |
os.makedirs(cache_dir, exist_ok=True)
|
9 |
os.environ['HF_HOME'] = cache_dir
|
10 |
os.environ['TRANSFORMERS_CACHE'] = cache_dir
|
11 |
|
12 |
-
# Lazy-loaded model
|
13 |
_model = None
|
14 |
|
15 |
-
def preload_model(model_name="
|
16 |
global _model
|
17 |
if _model is not None:
|
18 |
return _model
|
19 |
|
20 |
-
print("Preloading sentence transformer model...")
|
21 |
-
|
22 |
try:
|
23 |
_model = SentenceTransformer(model_name, cache_folder=cache_dir)
|
24 |
except Exception as e:
|
@@ -35,18 +32,14 @@ def get_model():
|
|
35 |
|
36 |
def build_faiss_index(chunks, batch_size=128, show_progress_bar=False):
|
37 |
model = get_model()
|
38 |
-
|
39 |
-
# Encode using batching for speed
|
40 |
embeddings = model.encode(
|
41 |
chunks,
|
42 |
batch_size=batch_size,
|
43 |
show_progress_bar=show_progress_bar,
|
44 |
convert_to_numpy=True,
|
45 |
-
normalize_embeddings=True
|
46 |
)
|
47 |
-
|
48 |
dim = embeddings.shape[1]
|
49 |
index = faiss.IndexFlatL2(dim)
|
50 |
index.add(embeddings)
|
51 |
-
|
52 |
return index, chunks
|
|
|
3 |
import os
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
|
|
|
6 |
cache_dir = os.path.join(os.getcwd(), ".cache")
|
7 |
os.makedirs(cache_dir, exist_ok=True)
|
8 |
os.environ['HF_HOME'] = cache_dir
|
9 |
os.environ['TRANSFORMERS_CACHE'] = cache_dir
|
10 |
|
|
|
11 |
_model = None
|
12 |
|
13 |
+
def preload_model(model_name="paraphrase-MiniLM-L3-v2"):
|
14 |
global _model
|
15 |
if _model is not None:
|
16 |
return _model
|
17 |
|
18 |
+
print(f"Preloading sentence transformer model: {model_name}...")
|
|
|
19 |
try:
|
20 |
_model = SentenceTransformer(model_name, cache_folder=cache_dir)
|
21 |
except Exception as e:
|
|
|
32 |
|
33 |
def build_faiss_index(chunks, batch_size=128, show_progress_bar=False):
|
34 |
model = get_model()
|
|
|
|
|
35 |
embeddings = model.encode(
|
36 |
chunks,
|
37 |
batch_size=batch_size,
|
38 |
show_progress_bar=show_progress_bar,
|
39 |
convert_to_numpy=True,
|
40 |
+
normalize_embeddings=True
|
41 |
)
|
|
|
42 |
dim = embeddings.shape[1]
|
43 |
index = faiss.IndexFlatL2(dim)
|
44 |
index.add(embeddings)
|
|
|
45 |
return index, chunks
|
llm.py
CHANGED
@@ -2,6 +2,7 @@ import google.generativeai as genai
|
|
2 |
import os
|
3 |
import json
|
4 |
from dotenv import load_dotenv
|
|
|
5 |
load_dotenv()
|
6 |
|
7 |
api_key = os.getenv("GOOGLE_API_KEY")
|
@@ -16,30 +17,36 @@ def query_gemini(questions, contexts):
|
|
16 |
context = "\n\n".join(contexts)
|
17 |
questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
|
18 |
prompt = f"""
|
19 |
-
You are an expert insurance assistant generating formal yet user-facing answers to policy questions. Your goal is to write professional, structured answers that reflect the language of policy documents β but are still human-readable and easy to understand.
|
20 |
|
21 |
π§ FORMAT & TONE GUIDELINES:
|
22 |
-
- Write in
|
23 |
-
- Use
|
24 |
- Do NOT write in legalese or robotic passive constructions.
|
25 |
- Include eligibility, limits, and waiting periods explicitly where relevant.
|
26 |
- Keep it factual, neutral, and easy to follow.
|
27 |
-
-
|
28 |
-
-
|
|
|
|
|
|
|
29 |
|
30 |
π DO NOT:
|
31 |
- Use words like "context", "document", or "text".
|
32 |
- Output markdown, bullets, emojis, or markdown code blocks.
|
33 |
- Say "helpful", "available", "allowed", "indemnified", "excluded", etc.
|
34 |
- Use overly robotic passive constructions like "shall be indemnified".
|
|
|
35 |
|
36 |
β
DO:
|
37 |
- Write in clean, informative language.
|
38 |
- Give complete answers in 2β3 sentences maximum.
|
39 |
-
|
|
|
|
|
40 |
|
41 |
π€ OUTPUT FORMAT (strict):
|
42 |
-
Respond with
|
43 |
|
44 |
{{
|
45 |
"answers": [
|
@@ -57,10 +64,6 @@ Respond with **only** the following JSON β no explanations, no comments, no ma
|
|
57 |
|
58 |
Your task: For each question, provide a complete, professional, and clearly written answer in 2β3 sentences using a formal but readable tone.
|
59 |
"""
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
model = genai.GenerativeModel('gemini-2.5-flash-lite')
|
65 |
response = model.generate_content(prompt)
|
66 |
response_text = response.text.strip()
|
|
|
2 |
import os
|
3 |
import json
|
4 |
from dotenv import load_dotenv
|
5 |
+
|
6 |
load_dotenv()
|
7 |
|
8 |
api_key = os.getenv("GOOGLE_API_KEY")
|
|
|
17 |
context = "\n\n".join(contexts)
|
18 |
questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
|
19 |
prompt = f"""
|
20 |
+
You are an expert insurance assistant generating formal yet user-facing answers to policy questions and Other Human Questions. Your goal is to write professional, structured answers that reflect the language of policy documents β but are still human-readable and easy to understand.
|
21 |
|
22 |
π§ FORMAT & TONE GUIDELINES:
|
23 |
+
- Write in professional third-person language (no "you", no "we").
|
24 |
+
- Use clear sentence structure with proper punctuation and spacing.
|
25 |
- Do NOT write in legalese or robotic passive constructions.
|
26 |
- Include eligibility, limits, and waiting periods explicitly where relevant.
|
27 |
- Keep it factual, neutral, and easy to follow.
|
28 |
+
- First, try to answer each question using information from the provided context.
|
29 |
+
- If the question is NOT covered by the context Provide Then Give The General Answer It Not Be In Context if Nothing Found Give Normal Ai Answer for The Question Correctly
|
30 |
+
- Limit each answer to 2β3 sentences, and do not repeat unnecessary information.
|
31 |
+
- If a question can be answered with a simple "Yes", "No", "Can apply", or "Cannot apply", then begin the answer with that phrase, followed by a short supporting Statement In Natural Human Like response.So Give A Good Answer For The Question With Correct Information.
|
32 |
+
- Avoid giving theory Based Long Long answers Try to Give Short Good Reasonable Answers.
|
33 |
|
34 |
π DO NOT:
|
35 |
- Use words like "context", "document", or "text".
|
36 |
- Output markdown, bullets, emojis, or markdown code blocks.
|
37 |
- Say "helpful", "available", "allowed", "indemnified", "excluded", etc.
|
38 |
- Use overly robotic passive constructions like "shall be indemnified".
|
39 |
+
- Dont Give In Message Like "Based On The Context "Or "Nothing Refered In The context" Like That Dont Give In Response Try To Give Answer For The Question Alone
|
40 |
|
41 |
β
DO:
|
42 |
- Write in clean, informative language.
|
43 |
- Give complete answers in 2β3 sentences maximum.
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
|
48 |
π€ OUTPUT FORMAT (strict):
|
49 |
+
Respond with only the following JSON β no explanations, no comments, no markdown:
|
50 |
|
51 |
{{
|
52 |
"answers": [
|
|
|
64 |
|
65 |
Your task: For each question, provide a complete, professional, and clearly written answer in 2β3 sentences using a formal but readable tone.
|
66 |
"""
|
|
|
|
|
|
|
|
|
67 |
model = genai.GenerativeModel('gemini-2.5-flash-lite')
|
68 |
response = model.generate_content(prompt)
|
69 |
response_text = response.text.strip()
|
retriever.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
-
from sentence_transformers import
|
2 |
-
import numpy as np
|
3 |
-
import time
|
4 |
from embedder import get_model
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
query_vec = model.encode([query])
|
10 |
-
distances, indices = index.search(np.array(query_vec), k)
|
11 |
-
results = [texts[i] for i in indices[0]]
|
12 |
-
return results
|
|
|
1 |
+
from sentence_transformers.util import cos_sim
|
|
|
|
|
2 |
from embedder import get_model
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
def retrieve_chunks(index, texts, question, top_k=15):
|
6 |
+
model = get_model()
|
7 |
+
q_embedding = model.encode([question], convert_to_numpy=True, normalize_embeddings=True)[0]
|
8 |
|
9 |
+
scores, indices = index.search(np.array([q_embedding]), top_k)
|
10 |
+
selected = [texts[i] for i in indices[0]]
|
11 |
+
return selected
|
|
|
|
|
|
|
|