1amr1ddl3 commited on
Commit
06ab73b
·
verified ·
1 Parent(s): b9365a8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -0
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ import os
4
+ import requests
5
+ import json
6
+ from dotenv import load_dotenv
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ import tempfile
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+ GROQ_API_TOKEN = os.getenv("GROQ_API_TOKEN")
15
+
16
+ # Function to extract text from PDF
17
+ def extract_text_from_pdf(file):
18
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
19
+ temp_file.write(file.getvalue())
20
+ temp_file_path = temp_file.name
21
+
22
+ try:
23
+ with open(temp_file_path, 'rb') as file:
24
+ pdf_reader = PyPDF2.PdfReader(file)
25
+ text = ""
26
+ for page in pdf_reader.pages:
27
+ text += page.extract_text()
28
+ except Exception as e:
29
+ st.error(f"Error processing PDF: {str(e)}")
30
+ text = ""
31
+ finally:
32
+ os.unlink(temp_file_path)
33
+ return text
34
+
35
+ # Function to extract text from TXT
36
+ def extract_text_from_txt(file):
37
+ return file.getvalue().decode("utf-8")
38
+
39
+ # Function to query GROQ API
40
+ def query_groq(prompt, context, temperature, max_tokens):
41
+ headers = {
42
+ "Authorization": f"Bearer {GROQ_API_TOKEN}",
43
+ "Content-Type": "application/json"
44
+ }
45
+
46
+ data = {
47
+ "model": "mixtral-8x7b-32768",
48
+ "messages": [
49
+ {"role": "system", "content": "You are a helpful assistant. Answer questions based only on the provided context."},
50
+ {"role": "user", "content": f"Context: {context}\n\nQuestion: {prompt}"}
51
+ ],
52
+ "temperature": temperature,
53
+ "max_tokens": max_tokens
54
+ }
55
+
56
+ try:
57
+ response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=data)
58
+ response.raise_for_status()
59
+ return response.json()["choices"][0]["message"]["content"]
60
+ except requests.exceptions.RequestException as e:
61
+ st.error(f"Error querying GROQ API: {str(e)}")
62
+ return None
63
+
64
+ # Function to create vector store
65
+ def create_vector_store(text):
66
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
67
+ chunks = text_splitter.split_text(text)
68
+
69
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
70
+ vector_store = FAISS.from_texts(chunks, embeddings)
71
+
72
+ return vector_store
73
+
74
+ # Streamlit UI
75
+ st.set_page_config(layout="wide")
76
+
77
+ # Custom CSS for scrollable chat container
78
+ st.markdown("""
79
+ <style>
80
+ .chat-container {
81
+ height: 600px;
82
+ display: flex;
83
+ flex-direction: column;
84
+ border: 1px solid #ccc;
85
+ border-radius: 5px;
86
+ }
87
+ .chat-messages {
88
+ flex: 1;
89
+ overflow-y: auto;
90
+ padding: 10px;
91
+ }
92
+ .chat-input {
93
+ border-top: 1px solid #ccc;
94
+ padding: 10px;
95
+ }
96
+ </style>
97
+ """, unsafe_allow_html=True)
98
+
99
+ st.title("Enhanced Document Query System")
100
+
101
+ # Create two columns for the split-screen layout
102
+ left_column, right_column = st.columns(2)
103
+
104
+ # Left column: Document upload and processing
105
+ with left_column:
106
+ st.header("Document Upload")
107
+ uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt"])
108
+ doc_type = st.selectbox("Select document type", ["PDF", "TXT"])
109
+
110
+ # Model parameters
111
+ st.subheader("Model Parameters")
112
+ temperature = st.slider("Temperature", 0.0, 1.0, 0.5, 0.1)
113
+ max_tokens = st.slider("Max Tokens", 100, 2000, 1000, 100)
114
+
115
+ if uploaded_file is not None:
116
+ # Extract text based on document type
117
+ if doc_type == "PDF":
118
+ doc_text = extract_text_from_pdf(uploaded_file)
119
+ else:
120
+ doc_text = extract_text_from_txt(uploaded_file)
121
+
122
+ if doc_text:
123
+ st.success("File uploaded and processed successfully!")
124
+
125
+ # Create vector store
126
+ vector_store = create_vector_store(doc_text)
127
+ st.session_state.vector_store = vector_store
128
+ else:
129
+ st.error("Failed to extract text from the document. Please try again.")
130
+
131
+ # Clear chat history button
132
+ if st.button("Clear Chat History"):
133
+ st.session_state.messages = []
134
+ st.rerun()
135
+
136
+ # Right column: Chat interface
137
+ with right_column:
138
+ st.header("Chat Interface")
139
+
140
+ # Chat history
141
+ if "messages" not in st.session_state:
142
+ st.session_state.messages = []
143
+
144
+ # Scrollable chat container
145
+ chat_container = st.container()
146
+ with chat_container:
147
+ st.markdown('<div class="scrollable-chat">', unsafe_allow_html=True)
148
+ for message in st.session_state.messages:
149
+ with st.chat_message(message["role"]):
150
+ st.markdown(message["content"])
151
+ st.markdown('</div>', unsafe_allow_html=True)
152
+
153
+ # # Display chat history
154
+ # for message in st.session_state.messages:
155
+ # with st.chat_message(message["role"]):
156
+ # st.markdown(message["content"])
157
+
158
+ # User query input
159
+ user_query = st.chat_input("Enter your question about the document:")
160
+
161
+ if user_query and 'vector_store' in st.session_state:
162
+ # Add user message to chat history
163
+ st.session_state.messages.append({"role": "user", "content": user_query})
164
+ with chat_container:
165
+ with st.chat_message("user"):
166
+ st.markdown(user_query)
167
+
168
+ # Retrieve relevant context
169
+ relevant_docs = st.session_state.vector_store.similarity_search(user_query, k=3)
170
+ context = "\n".join([doc.page_content for doc in relevant_docs])
171
+
172
+ # Query GROQ API
173
+ response = query_groq(user_query, context, temperature, max_tokens)
174
+
175
+ if response:
176
+ # Add assistant message to chat history
177
+ st.session_state.messages.append({"role": "assistant", "content": response})
178
+ with st.chat_message("assistant"):
179
+ st.markdown(response)
180
+ elif user_query:
181
+ st.warning("Please upload and process a document first.")