In [1]:

# +
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

## dockerized streamlit app wants to read from os.getenv(), otherwise use st.secrets
import streamlit as st
import os
api_key = os.getenv("LITELLM_KEY")
if api_key is None:
    api_key = st.secrets["LITELLM_KEY"]
cirrus_key = os.getenv("CIRRUS_KEY")
if cirrus_key is None:
    cirrus_key = st.secrets["CIRRUS_KEY"]        


In [34]:
import os
import requests
import zipfile

def download_and_unzip(url, output_dir):
    """
    Downloads a ZIP file from a URL and unzips it to a specified directory.
    
    Args:
        url (str): The URL of the ZIP file.
        output_dir (str): The directory where the ZIP file will be unzipped.
    """
    # Download the ZIP file
    response = requests.get(url)
    zip_file_path = os.path.basename(url)

    # Save the ZIP file to the current directory
    with open(zip_file_path, 'wb') as f:
        f.write(response.content)

    # Unzip the ZIP file
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(output_dir)

    # Remove the ZIP file
    os.remove(zip_file_path)

# Example usage:
url = "https://minio.carlboettiger.info/public-data/hwc.zip"
output_dir = "hwc"

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

download_and_unzip(url, output_dir)

In [13]:
import pathlib

def pdf_loader(path):
    all_documents = []
    docs_dir = pathlib.Path(path)
    for file in docs_dir.iterdir():
        loader = PyPDFLoader(file)
        documents = loader.load()
        all_documents.extend(documents)
    return all_documents

docs = pdf_loader('/home/rstudio/data/hwc/')


In [14]:

# Set up the language model
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = "llama3-sdsc",
                 api_key = api_key, 
                 base_url = "https://llm.nrp-nautilus.io",  
                 temperature=0)

# Set up the embedding model
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(model = "embed-mistral", api_key = api_key, base_url = "https://llm.nrp-nautilus.io")


In [23]:
## Cirrus instead:
embedding = OpenAIEmbeddings(
                 model = "cirrus",
                 api_key =  cirrus_key, 
                 base_url = "https://llm.cirrus.carlboettiger.info/v1",
)



text = "A test"

vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embedding,
)


In [24]:


# Build a retrival agent
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)



In [25]:
# slow part here, runs on remote GPU
from langchain_core.vectorstores import InMemoryVectorStore
vectorstore = InMemoryVectorStore.from_documents(documents=splits, embedding=embedding)


In [26]:
retriever = vectorstore.as_retriever()

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [27]:
prompt = "I live in Tanzania and am having issues with lions breaking into my boma and preying on cattle. What interventions might work best for me?"
results = rag_chain.invoke({"input": prompt})
results

{'input': 'I live in Tanzania and am having issues with lions breaking into my boma and preying on cattle. What interventions might work best for me?',
 'context': [Document(id='898b2a4c-40ac-482a-8da6-33d50a8a1daa', metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2022-06-23T12:24:35+08:00', 'author': 'Lydia N. Tiller, Ernest Oniba, Godfrey Opira, Ewan J. Brennan, Lucy E. King, Victor Ndombi, Derick Wanjala and Marion R. Robertson', 'keywords': 'African elephants; human-elephant conflict; crop raiding; olfactory mitigation;elephant repellent', 'moddate': '2022-06-23T06:36:53+02:00', 'subject': 'Human–elephant conflict is increasing across many parts of Asia and Africa. Mitigating elephant crop raiding has become a major focus of conservation intervention, however, many existing methods for tackling this problem are expensive and difficult to execute. Thus, there is a need for more affordable, farm-based methods. Testing these methods is key to

In [28]:
prompt = "What are the most cost-effective prevention methods for elephants raiding my crops?"

results = rag_chain.invoke({"input": prompt})
results

{'input': 'What are the most cost-effective prevention methods for elephants raiding my crops?',
 'context': [Document(id='6b17529c-1918-481c-ba42-819c173a3917', metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2022-06-23T12:24:35+08:00', 'author': 'Lydia N. Tiller, Ernest Oniba, Godfrey Opira, Ewan J. Brennan, Lucy E. King, Victor Ndombi, Derick Wanjala and Marion R. Robertson', 'keywords': 'African elephants; human-elephant conflict; crop raiding; olfactory mitigation;elephant repellent', 'moddate': '2022-06-23T06:36:53+02:00', 'subject': 'Human–elephant conflict is increasing across many parts of Asia and Africa. Mitigating elephant crop raiding has become a major focus of conservation intervention, however, many existing methods for tackling this problem are expensive and difficult to execute. Thus, there is a need for more affordable, farm-based methods. Testing these methods is key to ensuring their effectiveness and feasibility. In this 

In [29]:
rag_chain.invoke({"input": 
                  "I have a small herd of goats and cattle and I am worried about jaguars preying on them. What preventative measures can I take?"
                 })

{'input': 'I have a small herd of goats and cattle and I am worried about jaguars preying on them. What preventative measures can I take?',
 'context': [Document(id='959100a0-dd92-4fc2-8e19-6b32770dc0c3', metadata={'producer': 'Acrobat Distiller 11.0 (Windows); modified using iText 4.2.0 by 1T3XT', 'creator': 'Arbortext Advanced Print Publisher 9.1.520/W Unicode', 'creationdate': '2023-06-16T15:56:20+05:30', 'keywords': '', 'moddate': '2025-05-27T12:15:01-07:00', 'subject': 'Conservat Sci and Prac 2023.5:e12948', 'wps-proclevel': '3', 'wps-journaldoi': '10.1111/(ISSN)2578-4854', 'title': 'Using behavioral studies to adapt management decisions and reduce negative interactions between humans and baboons in Cape Town, South Africa', 'wps-articledoi': '10.1111/csp2.12948', 'source': '/home/rstudio/data/hwc/Fehlmann et al. 2022.pdf', 'total_pages': 16, 'page': 11, 'page_label': '12'}, page_content="of chasing the baboons away. Most GPS fixes in the vine-\nyards are close to the main sleepin

In [30]:
rag_chain.invoke({"input": "I am trying to prevent coyotes from eating the calves of my free-range cattle. What may work best?"})

{'input': 'I am trying to prevent coyotes from eating the calves of my free-range cattle. What may work best?',
 'context': [Document(id='65c01063-cb6a-469a-b03d-019861cf1ffd', metadata={'producer': 'PDFlib PLOP 2.0.0p6 (SunOS)/Acrobat Distiller 6.0 (Windows); modified using iText 4.2.0 by 1T3XT', 'creator': '3B2 Total Publishing System 8.07c/W', 'creationdate': '2006-04-19T19:41:41-05:00', 'moddate': '2025-05-27T12:08:26-07:00', 'subject': 'Wildlife Society Bulletin 2006.34:191-200', 'wps-proclevel': '2', 'wps-journaldoi': '10.1111/wsb4.2006.34.issue-1', 'title': 'From the Field: Fences and Deer‐Damage Management: A Review of Designs and Efficacy', 'wps-articledoi': '10.2193/0091-7648(2006)34[191:FADMAR]2.0.CO;2', 'source': '/home/rstudio/data/hwc/VerCauteren et al. 2006.pdf', 'total_pages': 10, 'page': 3, 'page_label': '4'}, page_content='harm to an animal impacting it and facilitates installation on\nuneven terrain (Bryant et al. 1993). Wire-mesh fence is available\nwith wider spaci

In [31]:
rag_chain.invoke({"input": "We have major issues with deer raiding our large agricultural fields. Is there anything I can try to prevent this that won’t break the bank?"})

{'input': 'We have major issues with deer raiding our large agricultural fields. Is there anything I can try to prevent this that won’t break the bank?',
 'context': [Document(id='63bb63bd-43db-48a1-a246-a0dcd346830f', metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2022-06-23T12:24:35+08:00', 'author': 'Lydia N. Tiller, Ernest Oniba, Godfrey Opira, Ewan J. Brennan, Lucy E. King, Victor Ndombi, Derick Wanjala and Marion R. Robertson', 'keywords': 'African elephants; human-elephant conflict; crop raiding; olfactory mitigation;elephant repellent', 'moddate': '2022-06-23T06:36:53+02:00', 'subject': 'Human–elephant conflict is increasing across many parts of Asia and Africa. Mitigating elephant crop raiding has become a major focus of conservation intervention, however, many existing methods for tackling this problem are expensive and difficult to execute. Thus, there is a need for more affordable, farm-based methods. Testing these methods is key 

In [32]:
rag_chain.invoke({"input": "We live in a suburban area and bears sometimes come into our town to eat from our fruit trees and trash. What are the best ways for us to prevent this as a community? We don’t want to have to get rid of our fruit trees…"})

{'input': 'We live in a suburban area and bears sometimes come into our town to eat from our fruit trees and trash. What are the best ways for us to prevent this as a community? We don’t want to have to get rid of our fruit trees…',
 'context': [Document(id='d17e3a26-e842-4e14-94d0-68e8bf12c897', metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2022-06-23T12:24:35+08:00', 'author': 'Lydia N. Tiller, Ernest Oniba, Godfrey Opira, Ewan J. Brennan, Lucy E. King, Victor Ndombi, Derick Wanjala and Marion R. Robertson', 'keywords': 'African elephants; human-elephant conflict; crop raiding; olfactory mitigation;elephant repellent', 'moddate': '2022-06-23T06:36:53+02:00', 'subject': 'Human–elephant conflict is increasing across many parts of Asia and Africa. Mitigating elephant crop raiding has become a major focus of conservation intervention, however, many existing methods for tackling this problem are expensive and difficult to execute. Thus, there i

In [33]:
prompt = "What cattle husbandry strategies might be helpful to prevent conflict if we live in wolf country?"

rag_chain.invoke({"input": prompt})

{'input': 'What cattle husbandry strategies might be helpful to prevent conflict if we live in wolf country?',
 'context': [Document(id='8e5f9f92-0821-4c43-bb26-33f927c55cb5', metadata={'producer': 'Adobe PDF Library 15.0; modified using iText® 5.3.5 ©2000-2012 1T3XT BVBA (SPRINGER SBM; licensed version)', 'creator': 'Springer', 'creationdate': '2020-09-14T15:09:33+05:30', 'crossmarkdomains[1]': 'springer.com', 'moddate': '2020-09-14T15:58:07+02:00', 'crossmarkmajorversiondate': '2010-04-23', 'subject': 'Scientific Reports, https://doi.org/10.1038/s41598-020-72343-6', 'author': 'Igor Khorozyan', 'title': 'Variation and conservation implications of the effectiveness of anti-bear interventions', 'crossmarkdomainexclusive': 'true', 'robots': 'noindex', 'doi': '10.1038/s41598-020-72343-6', 'crossmarkdomains[2]': 'springerlink.com', 'source': '/home/rstudio/data/hwc/Khorozyan and Waltert 2020.pdf', 'total_pages': 9, 'page': 2, 'page_label': '3'}, page_content='Guarding animals 3 1\nMixed te

In [17]:
import os
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import Qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams
import gc
import torch

# Option 1: FAISS (Facebook AI Similarity Search) - Most memory efficient
def create_faiss_vectorstore(splits, embedding, persist_directory="./faiss_db", batch_size=100):
    """
    Create FAISS vector store with batched processing to minimize GPU RAM usage
    """
    os.makedirs(persist_directory, exist_ok=True)
    
    # Process documents in batches to avoid GPU memory overflow
    vectorstore = None
    
    for i in range(0, len(splits), batch_size):
        batch = splits[i:i + batch_size]
        print(f"Processing batch {i//batch_size + 1}/{(len(splits) + batch_size - 1)//batch_size}")
        
        if vectorstore is None:
            # Create initial vectorstore with first batch
            vectorstore = FAISS.from_documents(
                documents=batch,
                embedding=embedding
            )
        else:
            # Add subsequent batches to existing vectorstore
            batch_vectorstore = FAISS.from_documents(
                documents=batch,
                embedding=embedding
            )
            vectorstore.merge_from(batch_vectorstore)
            
            # Clean up temporary vectorstore
            del batch_vectorstore
        
        # Force garbage collection and clear GPU cache if using CUDA
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    
    # Save to disk
    vectorstore.save_local(persist_directory)
    print(f"Vector store saved to {persist_directory}")
    
    return vectorstore

def load_faiss_vectorstore(embedding, persist_directory="./faiss_db"):
    """Load existing FAISS vector store from disk"""
    return FAISS.load_local(
        persist_directory,
        embedding,
        allow_dangerous_deserialization=True  # Only if you trust the source
    )

# Option 2: Chroma - Persistent SQLite-based storage
def create_chroma_vectorstore(splits, embedding, persist_directory="./chroma_db", batch_size=100):
    """
    Create Chroma vector store with batched processing
    """
    # Initialize Chroma with persistence
    vectorstore = Chroma(
        persist_directory=persist_directory,
        embedding_function=embedding
    )
    
    # Add documents in batches
    for i in range(0, len(splits), batch_size):
        batch = splits[i:i + batch_size]
        print(f"Processing batch {i//batch_size + 1}/{(len(splits) + batch_size - 1)//batch_size}")
        
        vectorstore.add_documents(batch)
        
        # Force garbage collection and clear GPU cache
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    
    # Persist to disk
    vectorstore.persist()
    print(f"Vector store persisted to {persist_directory}")
    
    return vectorstore

def load_chroma_vectorstore(embedding, persist_directory="./chroma_db"):
    """Load existing Chroma vector store from disk"""
    return Chroma(
        persist_directory=persist_directory,
        embedding_function=embedding
    )

# Option 3: Qdrant - High-performance vector database
def create_qdrant_vectorstore(splits, embedding, collection_name="documents", 
                            path="./qdrant_db", batch_size=100):
    """
    Create Qdrant vector store with local file-based storage
    """
    # Initialize local Qdrant client
    client = QdrantClient(path=path)
    
    # Get embedding dimension (embed a sample text)
    sample_embedding = embedding.embed_query("sample text")
    embedding_dim = len(sample_embedding)
    
    # Create collection if it doesn't exist
    try:
        client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE)
        )
    except Exception as e:
        print(f"Collection might already exist: {e}")
    
    # Create vectorstore
    vectorstore = Qdrant(
        client=client,
        collection_name=collection_name,
        embeddings=embedding
    )
    
    # Add documents in batches
    for i in range(0, len(splits), batch_size):
        batch = splits[i:i + batch_size]
        print(f"Processing batch {i//batch_size + 1}/{(len(splits) + batch_size - 1)//batch_size}")
        
        vectorstore.add_documents(batch)
        
        # Force garbage collection and clear GPU cache
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    
    print(f"Vector store created in {path}")
    return vectorstore

def load_qdrant_vectorstore(embedding, collection_name="documents", path="./qdrant_db"):
    """Load existing Qdrant vector store from disk"""
    client = QdrantClient(path=path)
    return Qdrant(
        client=client,
        collection_name=collection_name,
        embeddings=embedding
    )


ModuleNotFoundError: No module named 'torch'

In [None]:

# Usage examples:

# Replace your original code with one of these options:

# Option 1: FAISS (Recommended for most use cases)
vectorstore = create_faiss_vectorstore(
    splits=splits, 
    embedding=embedding, 
    persist_directory="./my_faiss_db",
    batch_size=50  # Adjust based on your GPU memory
)

# To load later:
# vectorstore = load_faiss_vectorstore(embedding, "./my_faiss_db")

# Option 2: Chroma (Good for development and moderate scale)
# vectorstore = create_chroma_vectorstore(
#     splits=splits,
#     embedding=embedding,
#     persist_directory="./my_chroma_db",
#     batch_size=50
# )

# Option 3: Qdrant (Best for production and very large scale)
# vectorstore = create_qdrant_vectorstore(
#     splits=splits,
#     embedding=embedding,
#     collection_name="my_documents",
#     path="./my_qdrant_db",
#     batch_size=50
# )

# Memory optimization settings
def optimize_gpu_memory():
    """Additional GPU memory optimization"""
    if torch.cuda.is_available():
        # Set memory fraction if needed
        torch.cuda.set_per_process_memory_fraction(0.8)  # Use 80% of GPU memory
        
        # Enable memory mapping for large tensors
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True

# Call before processing if you have GPU memory issues
# optimize_gpu_memory()