Spaces:
Running
Running
File size: 2,762 Bytes
ec96972 e15840d eb87b3b ec96972 e15840d eb87b3b e15840d eb87b3b e15840d eb87b3b e15840d eb87b3b e15840d ec96972 eb87b3b ec96972 eb87b3b e15840d eb87b3b ec96972 eb87b3b ec96972 eb87b3b ec96972 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import faiss
from sentence_transformers import SentenceTransformer
import numpy as np
import os
import time
# Set up cache directory in a writable location
cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir
# Initialize model as None - will be loaded lazily
_model = None
def preload_model():
"""Preload the sentence transformer model at startup"""
global _model
if _model is None:
model_start = time.time()
print("Preloading sentence transformer model...")
try:
_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
model_time = time.time() - model_start
print(f"Model preloading completed in {model_time:.2f} seconds")
except Exception as e:
print(f"Error loading model: {e}")
# Fallback to a different model if the first one fails
try:
_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
model_time = time.time() - model_start
print(f"Fallback model preloading completed in {model_time:.2f} seconds")
except Exception as e2:
print(f"Error loading fallback model: {e2}")
raise
return _model
def get_model():
"""Get the sentence transformer model, loading it lazily if needed"""
global _model
if _model is None:
# If model is not preloaded, load it now (should not happen in production)
print("Warning: Model not preloaded, loading now...")
return preload_model()
return _model
def build_faiss_index(chunks):
start_time = time.time()
print(f"Building FAISS index for {len(chunks)} chunks...")
# Time model retrieval (should be instant now)
model_start = time.time()
model = get_model()
model_time = time.time() - model_start
print(f"Model retrieval took: {model_time:.3f} seconds")
# Time embedding generation
embed_start = time.time()
embeddings = model.encode(chunks)
embed_time = time.time() - embed_start
print(f"Embedding generation took: {embed_time:.2f} seconds")
print(f"Generated embeddings shape: {embeddings.shape}")
# Time FAISS index creation
index_start = time.time()
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))
index_time = time.time() - index_start
print(f"FAISS index creation took: {index_time:.2f} seconds")
total_time = time.time() - start_time
print(f"Total FAISS index building took: {total_time:.2f} seconds")
return index, chunks
|