|
import streamlit as st |
|
import PyPDF2 |
|
import os |
|
import requests |
|
import json |
|
from dotenv import load_dotenv |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
import tempfile |
|
|
|
|
|
load_dotenv() |
|
GROQ_API_TOKEN = os.getenv("GROQ_API_TOKEN") |
|
|
|
|
|
def extract_text_from_pdf(file): |
|
with tempfile.NamedTemporaryFile(delete=False) as temp_file: |
|
temp_file.write(file.getvalue()) |
|
temp_file_path = temp_file.name |
|
|
|
try: |
|
with open(temp_file_path, 'rb') as file: |
|
pdf_reader = PyPDF2.PdfReader(file) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
except Exception as e: |
|
st.error(f"Error processing PDF: {str(e)}") |
|
text = "" |
|
finally: |
|
os.unlink(temp_file_path) |
|
return text |
|
|
|
|
|
def extract_text_from_txt(file): |
|
return file.getvalue().decode("utf-8") |
|
|
|
|
|
def query_groq(prompt, context, temperature, max_tokens): |
|
headers = { |
|
"Authorization": f"Bearer {GROQ_API_TOKEN}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
data = { |
|
"model": "mixtral-8x7b-32768", |
|
"messages": [ |
|
{"role": "system", "content": "You are a helpful assistant. Answer questions based only on the provided context."}, |
|
{"role": "user", "content": f"Context: {context}\n\nQuestion: {prompt}"} |
|
], |
|
"temperature": temperature, |
|
"max_tokens": max_tokens |
|
} |
|
|
|
try: |
|
response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=data) |
|
response.raise_for_status() |
|
return response.json()["choices"][0]["message"]["content"] |
|
except requests.exceptions.RequestException as e: |
|
st.error(f"Error querying GROQ API: {str(e)}") |
|
return None |
|
|
|
|
|
def create_vector_store(text): |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
chunks = text_splitter.split_text(text) |
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
vector_store = FAISS.from_texts(chunks, embeddings) |
|
|
|
return vector_store |
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.chat-container { |
|
height: 600px; |
|
display: flex; |
|
flex-direction: column; |
|
border: 1px solid #ccc; |
|
border-radius: 5px; |
|
} |
|
.chat-messages { |
|
flex: 1; |
|
overflow-y: auto; |
|
padding: 10px; |
|
} |
|
.chat-input { |
|
border-top: 1px solid #ccc; |
|
padding: 10px; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
st.title("Enhanced Document Query System") |
|
|
|
|
|
left_column, right_column = st.columns(2) |
|
|
|
|
|
with left_column: |
|
st.header("Document Upload") |
|
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt"]) |
|
doc_type = st.selectbox("Select document type", ["PDF", "TXT"]) |
|
|
|
|
|
st.subheader("Model Parameters") |
|
temperature = st.slider("Temperature", 0.0, 1.0, 0.5, 0.1) |
|
max_tokens = st.slider("Max Tokens", 100, 2000, 1000, 100) |
|
|
|
if uploaded_file is not None: |
|
|
|
if doc_type == "PDF": |
|
doc_text = extract_text_from_pdf(uploaded_file) |
|
else: |
|
doc_text = extract_text_from_txt(uploaded_file) |
|
|
|
if doc_text: |
|
st.success("File uploaded and processed successfully!") |
|
|
|
|
|
vector_store = create_vector_store(doc_text) |
|
st.session_state.vector_store = vector_store |
|
else: |
|
st.error("Failed to extract text from the document. Please try again.") |
|
|
|
|
|
if st.button("Clear Chat History"): |
|
st.session_state.messages = [] |
|
st.rerun() |
|
|
|
|
|
with right_column: |
|
st.header("Chat Interface") |
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
|
|
chat_container = st.container() |
|
with chat_container: |
|
st.markdown('<div class="scrollable-chat">', unsafe_allow_html=True) |
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
user_query = st.chat_input("Enter your question about the document:") |
|
|
|
if user_query and 'vector_store' in st.session_state: |
|
|
|
st.session_state.messages.append({"role": "user", "content": user_query}) |
|
with chat_container: |
|
with st.chat_message("user"): |
|
st.markdown(user_query) |
|
|
|
|
|
relevant_docs = st.session_state.vector_store.similarity_search(user_query, k=3) |
|
context = "\n".join([doc.page_content for doc in relevant_docs]) |
|
|
|
|
|
response = query_groq(user_query, context, temperature, max_tokens) |
|
|
|
if response: |
|
|
|
st.session_state.messages.append({"role": "assistant", "content": response}) |
|
with st.chat_message("assistant"): |
|
st.markdown(response) |
|
elif user_query: |
|
st.warning("Please upload and process a document first.") |