PrajwalW's picture
Update app.py
dfb3884 verified
import os
import tempfile
import streamlit as st
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.chat_models import BedrockChat
from langchain.chains import RetrievalQA
import boto3
# Load AWS credentials from .env if available
load_dotenv()
# Setup AWS Bedrock runtime
bedrock_runtime = boto3.client("bedrock-runtime", region_name="us-east-1")
# UI setup
st.set_page_config(page_title="PDF chatbot", layout="wide")
st.title("RAG Demo - PDF Q&A")
st.markdown("""
1. **Upload Your Documents**: You can upload multiple PDF files for processing.
2. **Ask a Question**: Then ask any question based on the documents' content.
""")
CHROMA_PATH = os.path.join(os.getcwd(), "chroma_db")
def main():
st.header("Ask a question")
# Initialize vector store with Amazon Titan Embeddings
embeddings = BedrockEmbeddings(
client=bedrock_runtime,
model_id="amazon.titan-embed-text-v1"
)
vectorstore = Chroma(
persist_directory=CHROMA_PATH,
embedding_function=embeddings
)
# Sidebar: Upload & Process PDFs
with st.sidebar:
st.title("Menu:")
uploaded_files = st.file_uploader(
"Upload PDF files and click Submit",
accept_multiple_files=True,
key="pdf_uploader"
)
if st.button("Submit & Process", key="process_button") and uploaded_files:
with st.spinner("Processing..."):
for uploaded_file in uploaded_files:
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
loader = PyPDFLoader(tmp_path)
pages = loader.load()
for page in pages:
page.metadata["page_number"] = pages.index(page) + 1
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n\n", "\n", " ", ""]
)
chunks = text_splitter.split_documents(pages)
os.unlink(tmp_path)
vectorstore.add_documents(chunks)
vectorstore.persist()
except Exception as e:
st.error(f"Error processing {uploaded_file.name}: {str(e)}")
continue
st.success("Vector store updated with uploaded documents.")
# Main QA interface
user_question = st.text_input("Ask a Question from the PDF Files", key="user_question")
if user_question:
retriever = vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 5})
llm = BedrockChat(
client=bedrock_runtime,
model_id="anthropic.claude-v2", # or v2:1
model_kwargs={"temperature": 0.0}
)
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
with st.spinner("Generating answer..."):
answer = chain.invoke({"query": user_question})
st.write("**Reply:**", answer["result"])
if __name__ == "__main__":
main()