import faiss from sentence_transformers import SentenceTransformer import numpy as np import os import time # Set up cache directory in a writable location cache_dir = os.path.join(os.getcwd(), ".cache") os.makedirs(cache_dir, exist_ok=True) os.environ['HF_HOME'] = cache_dir os.environ['TRANSFORMERS_CACHE'] = cache_dir # Initialize model as None - will be loaded lazily _model = None def preload_model(): """Preload the sentence transformer model at startup""" global _model if _model is None: model_start = time.time() print("Preloading sentence transformer model...") try: _model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir) model_time = time.time() - model_start print(f"Model preloading completed in {model_time:.2f} seconds") except Exception as e: print(f"Error loading model: {e}") # Fallback to a different model if the first one fails try: _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir) model_time = time.time() - model_start print(f"Fallback model preloading completed in {model_time:.2f} seconds") except Exception as e2: print(f"Error loading fallback model: {e2}") raise return _model def get_model(): """Get the sentence transformer model, loading it lazily if needed""" global _model if _model is None: # If model is not preloaded, load it now (should not happen in production) print("Warning: Model not preloaded, loading now...") return preload_model() return _model def build_faiss_index(chunks): start_time = time.time() print(f"Building FAISS index for {len(chunks)} chunks...") # Time model retrieval (should be instant now) model_start = time.time() model = get_model() model_time = time.time() - model_start print(f"Model retrieval took: {model_time:.3f} seconds") # Time embedding generation embed_start = time.time() embeddings = model.encode(chunks) embed_time = time.time() - embed_start print(f"Embedding generation took: {embed_time:.2f} seconds") print(f"Generated embeddings shape: {embeddings.shape}") # Time FAISS index creation index_start = time.time() dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(np.array(embeddings)) index_time = time.time() - index_start print(f"FAISS index creation took: {index_time:.2f} seconds") total_time = time.time() - start_time print(f"Total FAISS index building took: {total_time:.2f} seconds") return index, chunks