Spaces:

CosmoAI
/

Web_QnA

Runtime error

App Files Files Community

CosmoAI commited on Oct 5, 2023

Commit

31794c3

1 Parent(s): f42fa3c

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -23

app.py CHANGED Viewed

@@ -1,24 +1,102 @@
 import streamlit as st
-from streamlit_option_menu import option_menu
-embed = """<iframe
-	src="https://cosmoai-cosmos.hf.space"
-	frameborder="0"
-	width="850"
-	height="450"
-></iframe>
-"""
-st.markdown(embed, unsafe_allow_html=True)
-# Create a nested streamlit-option-menu
-optons = [
-    "Option 1",
-    option_menu(
-        "Sub Menu",
-        options=["Option 2", "Option 3"]
-    ),
-]
-# Display the streamlit-option-menu
-option_menu("Main Menu", options = optons)

 import streamlit as st
+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+# from langchain.chat_models import ChatOpenAI
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from htmlTemplates import css, bot_template, user_template
+from langchain.llms import HuggingFaceHub
+import os
+# from transformers import T5Tokenizer, T5ForConditionalGeneration
+# from langchain.callbacks import get_openai_callback
+hub_token = os.environ["HUGGINGFACE_HUB_TOKEN"]
+def split_pdfs(pdf_docs):
+  """Splits a list of PDF documents into smaller chunks.
+  Args:
+    pdf_docs: A list of PDF documents.
+  Returns:
+    A list of lists of PDF documents, where each sublist contains a smaller chunk of the original PDF documents.
+  """
+  pdf_chunks = []
+  for pdf_doc in pdf_docs:
+    # Split the PDF document into pages.
+    pdf_reader = PdfReader(pdf_doc)
+    pdf_pages = pdf_reader.pages
+    # Split the PDF pages into chunks.
+    pdf_chunks.append([])
+    for pdf_page in pdf_pages:
+      # Add the PDF page to the current chunk.
+      pdf_chunks[-1].append(pdf_page)
+      # If the chunk is too large, start a new chunk.
+      if len(pdf_chunks[-1]) >= 10:
+        pdf_chunks.append([])
+  return pdf_chunks
+def generate_response(pdf_chunks, llm_model):
+  """Generates a response to a query using a list of PDF documents and an LLM model.
+  Args:
+    pdf_chunks: A list of lists of PDF documents, where each sublist contains a smaller chunk of the original PDF documents.
+    llm_model: An LLM model.
+  Returns:
+    A response to the query.
+  """
+  # Generate a summary of each PDF chunk.
+  pdf_summaries = []
+  for pdf_chunk in pdf_chunks:
+    # Generate a summary of the PDF chunk.
+    pdf_summary = llm_model.generate(
+      prompt=f"Summarize the following text:\n{get_pdf_text(pdf_chunk)}",
+      max_new_tokens=100
+    )
+    # Add the summary to the list of summaries.
+    pdf_summaries.append(pdf_summary)
+  # Generate a response to the query using the summaries of the PDF chunks.
+  response = llm_model.generate(
+    prompt=f"Answer the following question using the following summaries:\n{get_text_chunks(pdf_summaries)}\n\nQuestion:",
+    max_new_tokens=200
+  )
+  return response
+def main():
+  load_dotenv()
+  st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
+  st.write(css, unsafe_allow_html=True)
+  # Load the LLM model.
+  llm_model = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", huggingfacehub_api_token=hub_token)
+  if "conversation" not in st.session_state:
+    st.session_state.conversation = None
+  if "chat_history" not in st.session_state:
+    st.session_state.chat_history = None
+  st.header("Chat with multiple PDFs :books:")
+  user_question = st.text_input("Ask a question about your documents:")
+  # If the user asked a question, generate a response.
+  if user_question:
+    # Split the PDF documents into smaller chunks.
+    pdf_chunks = split_pdfs(st.session_state.pdf_docs)
+    # Generate a response to the query.
+    response = generate_response(pdf_chunks, llm_model)
+    st.write(response)