Spaces:

1amr1ddl3
/

Enhanced-Document-Query-System

Sleeping

App Files Files Community

1amr1ddl3 commited on Sep 26, 2024

Commit

06ab73b

verified ·

1 Parent(s): b9365a8

Create app.py

Browse files

Files changed (1) hide show

app.py +181 -0

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import streamlit as st
+import PyPDF2
+import os
+import requests
+import json
+from dotenv import load_dotenv
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+import tempfile
+# Load environment variables
+load_dotenv()
+GROQ_API_TOKEN = os.getenv("GROQ_API_TOKEN")
+# Function to extract text from PDF
+def extract_text_from_pdf(file):
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+        temp_file.write(file.getvalue())
+        temp_file_path = temp_file.name
+    try:
+        with open(temp_file_path, 'rb') as file:
+            pdf_reader = PyPDF2.PdfReader(file)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+    except Exception as e:
+        st.error(f"Error processing PDF: {str(e)}")
+        text = ""
+    finally:
+        os.unlink(temp_file_path)
+    return text
+# Function to extract text from TXT
+def extract_text_from_txt(file):
+    return file.getvalue().decode("utf-8")
+# Function to query GROQ API
+def query_groq(prompt, context, temperature, max_tokens):
+    headers = {
+        "Authorization": f"Bearer {GROQ_API_TOKEN}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": "mixtral-8x7b-32768",
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant. Answer questions based only on the provided context."},
+            {"role": "user", "content": f"Context: {context}\n\nQuestion: {prompt}"}
+        ],
+        "temperature": temperature,
+        "max_tokens": max_tokens
+    }
+    try:
+        response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=data)
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except requests.exceptions.RequestException as e:
+        st.error(f"Error querying GROQ API: {str(e)}")
+        return None
+# Function to create vector store
+def create_vector_store(text):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    chunks = text_splitter.split_text(text)
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    vector_store = FAISS.from_texts(chunks, embeddings)
+    return vector_store
+# Streamlit UI
+st.set_page_config(layout="wide")
+# Custom CSS for scrollable chat container
+st.markdown("""
+<style>
+    .chat-container {
+        height: 600px;
+        display: flex;
+        flex-direction: column;
+        border: 1px solid #ccc;
+        border-radius: 5px;
+    }
+    .chat-messages {
+        flex: 1;
+        overflow-y: auto;
+        padding: 10px;
+    }
+    .chat-input {
+        border-top: 1px solid #ccc;
+        padding: 10px;
+    }
+</style>
+""", unsafe_allow_html=True)
+st.title("Enhanced Document Query System")
+# Create two columns for the split-screen layout
+left_column, right_column = st.columns(2)
+# Left column: Document upload and processing
+with left_column:
+    st.header("Document Upload")
+    uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt"])
+    doc_type = st.selectbox("Select document type", ["PDF", "TXT"])
+    # Model parameters
+    st.subheader("Model Parameters")
+    temperature = st.slider("Temperature", 0.0, 1.0, 0.5, 0.1)
+    max_tokens = st.slider("Max Tokens", 100, 2000, 1000, 100)
+    if uploaded_file is not None:
+        # Extract text based on document type
+        if doc_type == "PDF":
+            doc_text = extract_text_from_pdf(uploaded_file)
+        else:
+            doc_text = extract_text_from_txt(uploaded_file)
+        if doc_text:
+            st.success("File uploaded and processed successfully!")
+            # Create vector store
+            vector_store = create_vector_store(doc_text)
+            st.session_state.vector_store = vector_store
+        else:
+            st.error("Failed to extract text from the document. Please try again.")
+    # Clear chat history button
+    if st.button("Clear Chat History"):
+        st.session_state.messages = []
+        st.rerun()
+# Right column: Chat interface
+with right_column:
+    st.header("Chat Interface")
+    # Chat history
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Scrollable chat container
+    chat_container = st.container()
+    with chat_container:
+        st.markdown('<div class="scrollable-chat">', unsafe_allow_html=True)
+        for message in st.session_state.messages:
+            with st.chat_message(message["role"]):
+                st.markdown(message["content"])
+        st.markdown('</div>', unsafe_allow_html=True)
+    # # Display chat history
+    # for message in st.session_state.messages:
+    #     with st.chat_message(message["role"]):
+    #         st.markdown(message["content"])
+    # User query input
+    user_query = st.chat_input("Enter your question about the document:")
+    if user_query and 'vector_store' in st.session_state:
+        # Add user message to chat history
+        st.session_state.messages.append({"role": "user", "content": user_query})
+        with chat_container:
+            with st.chat_message("user"):
+                st.markdown(user_query)
+        # Retrieve relevant context
+        relevant_docs = st.session_state.vector_store.similarity_search(user_query, k=3)
+        context = "\n".join([doc.page_content for doc in relevant_docs])
+        # Query GROQ API
+        response = query_groq(user_query, context, temperature, max_tokens)
+        if response:
+            # Add assistant message to chat history
+            st.session_state.messages.append({"role": "assistant", "content": response})
+            with st.chat_message("assistant"):
+                st.markdown(response)
+    elif user_query:
+        st.warning("Please upload and process a document first.")