import os from pinecone import Pinecone, ServerlessSpec from sentence_transformers import SentenceTransformer from dotenv import load_dotenv load_dotenv() cache_dir = os.path.join(os.getcwd(), ".cache") os.makedirs(cache_dir, exist_ok=True) os.environ['HF_HOME'] = cache_dir os.environ['TRANSFORMERS_CACHE'] = cache_dir PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") PINECONE_ENV = os.getenv("PINECONE_ENV") # Not used in new SDK, keep cloud+region below instead PINECONE_INDEX_NAME = 'bajaj-rag-assistant' PINECONE_CLOUD = 'aws' # or 'gcp', or your choice, must match Pinecone project PINECONE_REGION = 'us-east-1' # or your choice, must match Pinecone project # Create Pinecone client globally pc = Pinecone(api_key=PINECONE_API_KEY) _model = None def preload_model(model_name="paraphrase-MiniLM-L3-v2"): global _model if _model is not None: return _model _model = SentenceTransformer(model_name, cache_folder=cache_dir) return _model def get_model(): return preload_model() def build_pinecone_index(chunks, index_name=PINECONE_INDEX_NAME): model = get_model() embeddings = model.encode( chunks, batch_size=128, convert_to_numpy=True, normalize_embeddings=True ) # Create index if it doesn't exist if index_name not in pc.list_indexes().names(): pc.create_index( name=index_name, dimension=embeddings.shape[1], metric='cosine', spec=ServerlessSpec( cloud=PINECONE_CLOUD, region=PINECONE_REGION ) ) index = pc.Index(index_name) # Upsert embeddings in Pinecone vectors = [(f"id-{i}", emb.tolist(), {"text": chunk}) for i, (emb, chunk) in enumerate(zip(embeddings, chunks))] index.upsert(vectors) return index