import streamlit as st import PyPDF2 import os import requests import json from dotenv import load_dotenv from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings import tempfile # Load environment variables load_dotenv() GROQ_API_TOKEN = os.getenv("GROQ_API_TOKEN") # Function to extract text from PDF def extract_text_from_pdf(file): with tempfile.NamedTemporaryFile(delete=False) as temp_file: temp_file.write(file.getvalue()) temp_file_path = temp_file.name try: with open(temp_file_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() except Exception as e: st.error(f"Error processing PDF: {str(e)}") text = "" finally: os.unlink(temp_file_path) return text # Function to extract text from TXT def extract_text_from_txt(file): return file.getvalue().decode("utf-8") # Function to query GROQ API def query_groq(prompt, context, temperature, max_tokens): headers = { "Authorization": f"Bearer {GROQ_API_TOKEN}", "Content-Type": "application/json" } data = { "model": "mixtral-8x7b-32768", "messages": [ {"role": "system", "content": "You are a helpful assistant. Answer questions based only on the provided context."}, {"role": "user", "content": f"Context: {context}\n\nQuestion: {prompt}"} ], "temperature": temperature, "max_tokens": max_tokens } try: response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=data) response.raise_for_status() return response.json()["choices"][0]["message"]["content"] except requests.exceptions.RequestException as e: st.error(f"Error querying GROQ API: {str(e)}") return None # Function to create vector store def create_vector_store(text): text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = text_splitter.split_text(text) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vector_store = FAISS.from_texts(chunks, embeddings) return vector_store # Streamlit UI st.set_page_config(layout="wide") # Custom CSS for scrollable chat container st.markdown(""" """, unsafe_allow_html=True) st.title("Enhanced Document Query System") # Create two columns for the split-screen layout left_column, right_column = st.columns(2) # Left column: Document upload and processing with left_column: st.header("Document Upload") uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt"]) doc_type = st.selectbox("Select document type", ["PDF", "TXT"]) # Model parameters st.subheader("Model Parameters") temperature = st.slider("Temperature", 0.0, 1.0, 0.5, 0.1) max_tokens = st.slider("Max Tokens", 100, 2000, 1000, 100) if uploaded_file is not None: # Extract text based on document type if doc_type == "PDF": doc_text = extract_text_from_pdf(uploaded_file) else: doc_text = extract_text_from_txt(uploaded_file) if doc_text: st.success("File uploaded and processed successfully!") # Create vector store vector_store = create_vector_store(doc_text) st.session_state.vector_store = vector_store else: st.error("Failed to extract text from the document. Please try again.") # Clear chat history button if st.button("Clear Chat History"): st.session_state.messages = [] st.rerun() # Right column: Chat interface with right_column: st.header("Chat Interface") # Chat history if "messages" not in st.session_state: st.session_state.messages = [] # Scrollable chat container chat_container = st.container() with chat_container: st.markdown('

', unsafe_allow_html=True) for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) st.markdown('

', unsafe_allow_html=True) # # Display chat history # for message in st.session_state.messages: # with st.chat_message(message["role"]): # st.markdown(message["content"]) # User query input user_query = st.chat_input("Enter your question about the document:") if user_query and 'vector_store' in st.session_state: # Add user message to chat history st.session_state.messages.append({"role": "user", "content": user_query}) with chat_container: with st.chat_message("user"): st.markdown(user_query) # Retrieve relevant context relevant_docs = st.session_state.vector_store.similarity_search(user_query, k=3) context = "\n".join([doc.page_content for doc in relevant_docs]) # Query GROQ API response = query_groq(user_query, context, temperature, max_tokens) if response: # Add assistant message to chat history st.session_state.messages.append({"role": "assistant", "content": response}) with st.chat_message("assistant"): st.markdown(response) elif user_query: st.warning("Please upload and process a document first.")