Spaces:
Running
Running
File size: 1,320 Bytes
ec96972 e15840d 7acce36 ec96972 e15840d afd28fa e15840d 7acce36 afd28fa 7acce36 e15840d ec96972 eb87b3b 7acce36 eb87b3b 7acce36 e15840d 7acce36 afd28fa 7acce36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import faiss
import numpy as np
import os
from sentence_transformers import SentenceTransformer
cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir
_model = None
def preload_model(model_name="paraphrase-MiniLM-L3-v2"):
global _model
if _model is not None:
return _model
print(f"Preloading sentence transformer model: {model_name}...")
try:
_model = SentenceTransformer(model_name, cache_folder=cache_dir)
except Exception as e:
print(f"Primary model load failed: {e}")
fallback_name = "sentence-transformers/" + model_name
print(f"Trying fallback: {fallback_name}")
_model = SentenceTransformer(fallback_name, cache_folder=cache_dir)
print("✅ Model ready.")
return _model
def get_model():
return preload_model()
def build_faiss_index(chunks, batch_size=128, show_progress_bar=False):
model = get_model()
embeddings = model.encode(
chunks,
batch_size=batch_size,
show_progress_bar=show_progress_bar,
convert_to_numpy=True,
normalize_embeddings=True
)
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)
return index, chunks
|