J266501 commited on
Commit
43da375
·
1 Parent(s): dbd9be6

change file

Browse files
api_service.py CHANGED
@@ -24,7 +24,7 @@ TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
24
  if not TOGETHER_API_KEY:
25
  raise ValueError("TOGETHER_API_KEY environment variable not set. Please check your .env file.")
26
 
27
- VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "/tmp/vector_db_chroma")
28
  COLLECTION_NAME = "my_instrument_manual_chunks"
29
 
30
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
 
24
  if not TOGETHER_API_KEY:
25
  raise ValueError("TOGETHER_API_KEY environment variable not set. Please check your .env file.")
26
 
27
+ VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "data/vector_db_chroma")
28
  COLLECTION_NAME = "my_instrument_manual_chunks"
29
 
30
  LLM_MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
data/vector_db_chroma/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2d0d855229b7e7711de0cecbee3f1b2214b1b4b9a84f9e6a47510907654a0ae
3
  size 7811072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348208d4507d476ce65898670e6da2444745ea2e93a9db3a84aa6391838a7976
3
  size 7811072
data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/data_level0.bin RENAMED
File without changes
data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/header.bin RENAMED
File without changes
data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/length.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29382d806e774618a2c3512be096526ba7c53fe3fcfb120b10c4f353accbad9f
3
  size 40000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9be858a747c8b75b3f6c0b8e9d8e232339742a22cecb6e5b5d653483206a73d
3
  size 40000
data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/link_lists.bin RENAMED
File without changes
database/processed_documents.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bdefd49f0d6b66e66b97cf2f699ae75fa5aa6963380031c31d7a61e4b3d6ce0
3
  size 999424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd059441f29cb975054524e0fd1eb53f00ebfbc5fc51277c449a4e8400f55494
3
  size 999424
main.py CHANGED
@@ -19,7 +19,7 @@ load_dotenv()
19
  # --- Configuration ---
20
  db_directory = "database"
21
  db_path = os.path.join(db_directory, "processed_documents.db")
22
- VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "/tmp/vector_db_chroma")
23
  collection_name = "my_instrument_manual_chunks"
24
 
25
  # Ensure TOGETHER_API_KEY is set
 
19
  # --- Configuration ---
20
  db_directory = "database"
21
  db_path = os.path.join(db_directory, "processed_documents.db")
22
+ VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "data/vector_db_chroma")
23
  collection_name = "my_instrument_manual_chunks"
24
 
25
  # Ensure TOGETHER_API_KEY is set
setup_knowledge_base.py CHANGED
@@ -224,7 +224,7 @@ def get_chunks_from_db_for_embedding(db_path):
224
  if conn:
225
  conn.close()
226
 
227
- def load_chunks_to_vector_db(chunks_data, db_path="vector_db_chroma", collection_name="document_chunks", embeddings_model_name="togethercomputer/m2-bert-80M-32k-retrieval"):
228
  """
229
  Loads text chunks and their embeddings into a ChromaDB vector database.
230
  This function will now ADD chunks if they are new (based on their IDs).
@@ -285,7 +285,7 @@ if __name__ == "__main__":
285
  #pdf_input_directory = "input" # Ensure this directory exists and contains your PDFs
286
  db_directory = "database"
287
  db_path = os.path.join(db_directory, "processed_documents.db")
288
- vector_db_dir = "vector_db_chroma"
289
  collection_name = "my_instrument_manual_chunks"
290
  embeddings_model_name = "togethercomputer/m2-bert-80M-32k-retrieval" # 確保與 main.py 中使用的一致
291
 
 
224
  if conn:
225
  conn.close()
226
 
227
+ def load_chunks_to_vector_db(chunks_data, db_path="data/vector_db_chroma", collection_name="document_chunks", embeddings_model_name="togethercomputer/m2-bert-80M-32k-retrieval"):
228
  """
229
  Loads text chunks and their embeddings into a ChromaDB vector database.
230
  This function will now ADD chunks if they are new (based on their IDs).
 
285
  #pdf_input_directory = "input" # Ensure this directory exists and contains your PDFs
286
  db_directory = "database"
287
  db_path = os.path.join(db_directory, "processed_documents.db")
288
+ vector_db_dir = "data/vector_db_chroma"
289
  collection_name = "my_instrument_manual_chunks"
290
  embeddings_model_name = "togethercomputer/m2-bert-80M-32k-retrieval" # 確保與 main.py 中使用的一致
291