heyal commited on
Commit
fa411ea
·
1 Parent(s): 62cbeec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -12,6 +12,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter ,CharacterTex
12
  from langchain.chains import RetrievalQA
13
  from langchain.document_loaders import TextLoader ,PyPDFLoader ,DirectoryLoader
14
  from langchain.document_loaders import GoogleDriveLoader
 
 
15
 
16
  def create_vecotrstore(embedding , texts, db_name = 'chromadb' ) -> None:
17
  "Extract vector embeddings from text and store to persistance directory and return vector object."
@@ -28,8 +30,9 @@ def create_vecotrstore(embedding , texts, db_name = 'chromadb' ) -> None:
28
  def load_chunk(data_dir):
29
 
30
  #loader = DirectoryLoader(data_dir , glob="./*.pdf", loader_cls=PyPDFLoader)
31
- loader = GoogleDriveLoader(folder_id = data_dir, glob="./*.pdf", loader_cls=PyPDFLoader, credentials_path='googlecreds.json')
32
- documents = loader.load()
 
33
  print(f"{len(documents)} documents are loaded.")
34
 
35
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
 
12
  from langchain.chains import RetrievalQA
13
  from langchain.document_loaders import TextLoader ,PyPDFLoader ,DirectoryLoader
14
  from langchain.document_loaders import GoogleDriveLoader
15
+ from datasets import load_dataset
16
+ dataset = load_dataset("heyal/carbon_data")
17
 
18
  def create_vecotrstore(embedding , texts, db_name = 'chromadb' ) -> None:
19
  "Extract vector embeddings from text and store to persistance directory and return vector object."
 
30
  def load_chunk(data_dir):
31
 
32
  #loader = DirectoryLoader(data_dir , glob="./*.pdf", loader_cls=PyPDFLoader)
33
+ #loader = GoogleDriveLoader(folder_id = data_dir, glob="./*.pdf", loader_cls=PyPDFLoader, credentials_path='googlecreds.json')
34
+ #documents = loader.load()
35
+ documents = dataset
36
  print(f"{len(documents)} documents are loaded.")
37
 
38
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,