File size: 1,320 Bytes
ec96972
 
e15840d
7acce36
ec96972
e15840d
 
 
 
 
 
 
afd28fa
e15840d
7acce36
 
 
afd28fa
7acce36
 
 
 
 
 
 
 
 
e15840d
ec96972
eb87b3b
7acce36
eb87b3b
7acce36
e15840d
7acce36
 
 
 
 
afd28fa
7acce36
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import faiss
import numpy as np
import os
from sentence_transformers import SentenceTransformer

cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir

_model = None

def preload_model(model_name="paraphrase-MiniLM-L3-v2"):
    global _model
    if _model is not None:
        return _model

    print(f"Preloading sentence transformer model: {model_name}...")
    try:
        _model = SentenceTransformer(model_name, cache_folder=cache_dir)
    except Exception as e:
        print(f"Primary model load failed: {e}")
        fallback_name = "sentence-transformers/" + model_name
        print(f"Trying fallback: {fallback_name}")
        _model = SentenceTransformer(fallback_name, cache_folder=cache_dir)

    print("✅ Model ready.")
    return _model

def get_model():
    return preload_model()

def build_faiss_index(chunks, batch_size=128, show_progress_bar=False):
    model = get_model()
    embeddings = model.encode(
        chunks,
        batch_size=batch_size,
        show_progress_bar=show_progress_bar,
        convert_to_numpy=True,
        normalize_embeddings=True
    )
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    return index, chunks