Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter ,CharacterTex
|
|
12 |
from langchain.chains import RetrievalQA
|
13 |
from langchain.document_loaders import TextLoader ,PyPDFLoader ,DirectoryLoader
|
14 |
from langchain.document_loaders import GoogleDriveLoader
|
|
|
|
|
15 |
|
16 |
def create_vecotrstore(embedding , texts, db_name = 'chromadb' ) -> None:
|
17 |
"Extract vector embeddings from text and store to persistance directory and return vector object."
|
@@ -28,8 +30,9 @@ def create_vecotrstore(embedding , texts, db_name = 'chromadb' ) -> None:
|
|
28 |
def load_chunk(data_dir):
|
29 |
|
30 |
#loader = DirectoryLoader(data_dir , glob="./*.pdf", loader_cls=PyPDFLoader)
|
31 |
-
loader = GoogleDriveLoader(folder_id = data_dir, glob="./*.pdf", loader_cls=PyPDFLoader, credentials_path='googlecreds.json')
|
32 |
-
documents = loader.load()
|
|
|
33 |
print(f"{len(documents)} documents are loaded.")
|
34 |
|
35 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
|
|
|
12 |
from langchain.chains import RetrievalQA
|
13 |
from langchain.document_loaders import TextLoader ,PyPDFLoader ,DirectoryLoader
|
14 |
from langchain.document_loaders import GoogleDriveLoader
|
15 |
+
from datasets import load_dataset
|
16 |
+
dataset = load_dataset("heyal/carbon_data")
|
17 |
|
18 |
def create_vecotrstore(embedding , texts, db_name = 'chromadb' ) -> None:
|
19 |
"Extract vector embeddings from text and store to persistance directory and return vector object."
|
|
|
30 |
def load_chunk(data_dir):
|
31 |
|
32 |
#loader = DirectoryLoader(data_dir , glob="./*.pdf", loader_cls=PyPDFLoader)
|
33 |
+
#loader = GoogleDriveLoader(folder_id = data_dir, glob="./*.pdf", loader_cls=PyPDFLoader, credentials_path='googlecreds.json')
|
34 |
+
#documents = loader.load()
|
35 |
+
documents = dataset
|
36 |
print(f"{len(documents)} documents are loaded.")
|
37 |
|
38 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
|