File size: 5,986 Bytes
06ab73b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import streamlit as st
import PyPDF2
import os
import requests
import json
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import tempfile

# Load environment variables
load_dotenv()
GROQ_API_TOKEN = os.getenv("GROQ_API_TOKEN")

# Function to extract text from PDF
def extract_text_from_pdf(file):
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(file.getvalue())
        temp_file_path = temp_file.name

    try:
        with open(temp_file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()
    except Exception as e:
        st.error(f"Error processing PDF: {str(e)}")
        text = ""
    finally:
        os.unlink(temp_file_path)
    return text

# Function to extract text from TXT
def extract_text_from_txt(file):
    return file.getvalue().decode("utf-8")

# Function to query GROQ API
def query_groq(prompt, context, temperature, max_tokens):
    headers = {
        "Authorization": f"Bearer {GROQ_API_TOKEN}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "mixtral-8x7b-32768",
        "messages": [
            {"role": "system", "content": "You are a helpful assistant. Answer questions based only on the provided context."},
            {"role": "user", "content": f"Context: {context}\n\nQuestion: {prompt}"}
        ],
        "temperature": temperature,
        "max_tokens": max_tokens
    }
    
    try:
        response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=data)
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"]
    except requests.exceptions.RequestException as e:
        st.error(f"Error querying GROQ API: {str(e)}")
        return None

# Function to create vector store
def create_vector_store(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_text(text)
    
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_texts(chunks, embeddings)
    
    return vector_store

# Streamlit UI
st.set_page_config(layout="wide")

# Custom CSS for scrollable chat container
st.markdown("""
<style>
    .chat-container {
        height: 600px;
        display: flex;
        flex-direction: column;
        border: 1px solid #ccc;
        border-radius: 5px;
    }
    .chat-messages {
        flex: 1;
        overflow-y: auto;
        padding: 10px;
    }
    .chat-input {
        border-top: 1px solid #ccc;
        padding: 10px;
    }
</style>
""", unsafe_allow_html=True)

st.title("Enhanced Document Query System")

# Create two columns for the split-screen layout
left_column, right_column = st.columns(2)

# Left column: Document upload and processing
with left_column:
    st.header("Document Upload")
    uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt"])
    doc_type = st.selectbox("Select document type", ["PDF", "TXT"])

    # Model parameters
    st.subheader("Model Parameters")
    temperature = st.slider("Temperature", 0.0, 1.0, 0.5, 0.1)
    max_tokens = st.slider("Max Tokens", 100, 2000, 1000, 100)

    if uploaded_file is not None:
        # Extract text based on document type
        if doc_type == "PDF":
            doc_text = extract_text_from_pdf(uploaded_file)
        else:
            doc_text = extract_text_from_txt(uploaded_file)
        
        if doc_text:
            st.success("File uploaded and processed successfully!")
            
            # Create vector store
            vector_store = create_vector_store(doc_text)
            st.session_state.vector_store = vector_store
        else:
            st.error("Failed to extract text from the document. Please try again.")

    # Clear chat history button
    if st.button("Clear Chat History"):
        st.session_state.messages = []
        st.rerun()

# Right column: Chat interface
with right_column:
    st.header("Chat Interface")

    # Chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []

    # Scrollable chat container
    chat_container = st.container()
    with chat_container:
        st.markdown('<div class="scrollable-chat">', unsafe_allow_html=True)
        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])
        st.markdown('</div>', unsafe_allow_html=True)

    # # Display chat history
    # for message in st.session_state.messages:
    #     with st.chat_message(message["role"]):
    #         st.markdown(message["content"])

    # User query input
    user_query = st.chat_input("Enter your question about the document:")
    
    if user_query and 'vector_store' in st.session_state:
        # Add user message to chat history
        st.session_state.messages.append({"role": "user", "content": user_query})
        with chat_container:
            with st.chat_message("user"):
                st.markdown(user_query)

        # Retrieve relevant context
        relevant_docs = st.session_state.vector_store.similarity_search(user_query, k=3)
        context = "\n".join([doc.page_content for doc in relevant_docs])

        # Query GROQ API
        response = query_groq(user_query, context, temperature, max_tokens)
        
        if response:
            # Add assistant message to chat history
            st.session_state.messages.append({"role": "assistant", "content": response})
            with st.chat_message("assistant"):
                st.markdown(response)
    elif user_query:
        st.warning("Please upload and process a document first.")