Spaces:

J266501
/

LabAid_v1

Sleeping

J266501 commited on 27 days ago

Commit

43da375

1 Parent(s): dbd9be6

change file

Files changed (9) hide show

api_service.py CHANGED Viewed

@@ -24,7 +24,7 @@ TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
 if not TOGETHER_API_KEY:
     raise ValueError("TOGETHER_API_KEY environment variable not set. Please check your .env file.")
-VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "/tmp/vector_db_chroma")
 COLLECTION_NAME = "my_instrument_manual_chunks"
 LLM_MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"

 if not TOGETHER_API_KEY:
     raise ValueError("TOGETHER_API_KEY environment variable not set. Please check your .env file.")
+VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "data/vector_db_chroma")
 COLLECTION_NAME = "my_instrument_manual_chunks"
 LLM_MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"

data/vector_db_chroma/chroma.sqlite3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2d0d855229b7e7711de0cecbee3f1b2214b1b4b9a84f9e6a47510907654a0ae
 size 7811072

 version https://git-lfs.github.com/spec/v1
+oid sha256:348208d4507d476ce65898670e6da2444745ea2e93a9db3a84aa6391838a7976
 size 7811072

data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/data_level0.bin RENAMED Viewed

File without changes

data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/header.bin RENAMED Viewed

File without changes

data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/length.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29382d806e774618a2c3512be096526ba7c53fe3fcfb120b10c4f353accbad9f
 size 40000

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9be858a747c8b75b3f6c0b8e9d8e232339742a22cecb6e5b5d653483206a73d
 size 40000

data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/link_lists.bin RENAMED Viewed

File without changes

database/processed_documents.db CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bdefd49f0d6b66e66b97cf2f699ae75fa5aa6963380031c31d7a61e4b3d6ce0
 size 999424

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd059441f29cb975054524e0fd1eb53f00ebfbc5fc51277c449a4e8400f55494
 size 999424

main.py CHANGED Viewed

@@ -19,7 +19,7 @@ load_dotenv()
 # --- Configuration ---
 db_directory = "database"
 db_path = os.path.join(db_directory, "processed_documents.db")
-VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "/tmp/vector_db_chroma")
 collection_name = "my_instrument_manual_chunks"
 # Ensure TOGETHER_API_KEY is set

 # --- Configuration ---
 db_directory = "database"
 db_path = os.path.join(db_directory, "processed_documents.db")
+VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "data/vector_db_chroma")
 collection_name = "my_instrument_manual_chunks"
 # Ensure TOGETHER_API_KEY is set

setup_knowledge_base.py CHANGED Viewed

@@ -224,7 +224,7 @@ def get_chunks_from_db_for_embedding(db_path):
         if conn:
             conn.close()
-def load_chunks_to_vector_db(chunks_data, db_path="vector_db_chroma", collection_name="document_chunks", embeddings_model_name="togethercomputer/m2-bert-80M-32k-retrieval"):
     """
     Loads text chunks and their embeddings into a ChromaDB vector database.
     This function will now ADD chunks if they are new (based on their IDs).
@@ -285,7 +285,7 @@ if __name__ == "__main__":
     #pdf_input_directory = "input" # Ensure this directory exists and contains your PDFs
     db_directory = "database"
     db_path = os.path.join(db_directory, "processed_documents.db")
-    vector_db_dir = "vector_db_chroma"
     collection_name = "my_instrument_manual_chunks"
     embeddings_model_name = "togethercomputer/m2-bert-80M-32k-retrieval" # 確保與 main.py 中使用的一致

         if conn:
             conn.close()
+def load_chunks_to_vector_db(chunks_data, db_path="data/vector_db_chroma", collection_name="document_chunks", embeddings_model_name="togethercomputer/m2-bert-80M-32k-retrieval"):
     """
     Loads text chunks and their embeddings into a ChromaDB vector database.
     This function will now ADD chunks if they are new (based on their IDs).
     #pdf_input_directory = "input" # Ensure this directory exists and contains your PDFs
     db_directory = "database"
     db_path = os.path.join(db_directory, "processed_documents.db")
+    vector_db_dir = "data/vector_db_chroma"
     collection_name = "my_instrument_manual_chunks"
     embeddings_model_name = "togethercomputer/m2-bert-80M-32k-retrieval" # 確保與 main.py 中使用的一致