File size: 2,762 Bytes
ec96972
 
 
e15840d
eb87b3b
ec96972
e15840d
 
 
 
 
 
 
 
 
eb87b3b
 
e15840d
 
eb87b3b
 
e15840d
 
eb87b3b
 
e15840d
 
 
 
 
eb87b3b
 
e15840d
 
 
 
ec96972
eb87b3b
 
 
 
 
 
 
 
 
ec96972
eb87b3b
 
 
 
 
e15840d
eb87b3b
 
 
 
 
ec96972
eb87b3b
 
 
 
 
 
ec96972
 
 
eb87b3b
 
 
 
 
 
ec96972
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import faiss
from sentence_transformers import SentenceTransformer
import numpy as np
import os
import time

# Set up cache directory in a writable location
cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir

# Initialize model as None - will be loaded lazily
_model = None

def preload_model():
    """Preload the sentence transformer model at startup"""
    global _model
    if _model is None:
        model_start = time.time()
        print("Preloading sentence transformer model...")
        try:
            _model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
            model_time = time.time() - model_start
            print(f"Model preloading completed in {model_time:.2f} seconds")
        except Exception as e:
            print(f"Error loading model: {e}")
            # Fallback to a different model if the first one fails
            try:
                _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=cache_dir)
                model_time = time.time() - model_start
                print(f"Fallback model preloading completed in {model_time:.2f} seconds")
            except Exception as e2:
                print(f"Error loading fallback model: {e2}")
                raise
    return _model

def get_model():
    """Get the sentence transformer model, loading it lazily if needed"""
    global _model
    if _model is None:
        # If model is not preloaded, load it now (should not happen in production)
        print("Warning: Model not preloaded, loading now...")
        return preload_model()
    return _model

def build_faiss_index(chunks):
    start_time = time.time()
    print(f"Building FAISS index for {len(chunks)} chunks...")
    
    # Time model retrieval (should be instant now)
    model_start = time.time()
    model = get_model()
    model_time = time.time() - model_start
    print(f"Model retrieval took: {model_time:.3f} seconds")
    
    # Time embedding generation
    embed_start = time.time()
    embeddings = model.encode(chunks)
    embed_time = time.time() - embed_start
    print(f"Embedding generation took: {embed_time:.2f} seconds")
    print(f"Generated embeddings shape: {embeddings.shape}")
    
    # Time FAISS index creation
    index_start = time.time()
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings))
    index_time = time.time() - index_start
    print(f"FAISS index creation took: {index_time:.2f} seconds")
    
    total_time = time.time() - start_time
    print(f"Total FAISS index building took: {total_time:.2f} seconds")
    
    return index, chunks