change file
Browse files- api_service.py +1 -1
- data/vector_db_chroma/chroma.sqlite3 +1 -1
- data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/data_level0.bin +0 -0
- data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/header.bin +0 -0
- data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/length.bin +1 -1
- data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/link_lists.bin +0 -0
- database/processed_documents.db +1 -1
- main.py +1 -1
- setup_knowledge_base.py +2 -2
api_service.py
CHANGED
@@ -24,7 +24,7 @@ TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
|
|
24 |
if not TOGETHER_API_KEY:
|
25 |
raise ValueError("TOGETHER_API_KEY environment variable not set. Please check your .env file.")
|
26 |
|
27 |
-
VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "/
|
28 |
COLLECTION_NAME = "my_instrument_manual_chunks"
|
29 |
|
30 |
LLM_MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
|
|
|
24 |
if not TOGETHER_API_KEY:
|
25 |
raise ValueError("TOGETHER_API_KEY environment variable not set. Please check your .env file.")
|
26 |
|
27 |
+
VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "data/vector_db_chroma")
|
28 |
COLLECTION_NAME = "my_instrument_manual_chunks"
|
29 |
|
30 |
LLM_MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
|
data/vector_db_chroma/chroma.sqlite3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7811072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:348208d4507d476ce65898670e6da2444745ea2e93a9db3a84aa6391838a7976
|
3 |
size 7811072
|
data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/data_level0.bin
RENAMED
File without changes
|
data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/header.bin
RENAMED
File without changes
|
data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/length.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9be858a747c8b75b3f6c0b8e9d8e232339742a22cecb6e5b5d653483206a73d
|
3 |
size 40000
|
data/vector_db_chroma/{e00074a2-0e3e-4a43-a595-44f28c720a1a → f93d1fc5-a823-4a36-8ed3-005459a3df60}/link_lists.bin
RENAMED
File without changes
|
database/processed_documents.db
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 999424
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd059441f29cb975054524e0fd1eb53f00ebfbc5fc51277c449a4e8400f55494
|
3 |
size 999424
|
main.py
CHANGED
@@ -19,7 +19,7 @@ load_dotenv()
|
|
19 |
# --- Configuration ---
|
20 |
db_directory = "database"
|
21 |
db_path = os.path.join(db_directory, "processed_documents.db")
|
22 |
-
VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "/
|
23 |
collection_name = "my_instrument_manual_chunks"
|
24 |
|
25 |
# Ensure TOGETHER_API_KEY is set
|
|
|
19 |
# --- Configuration ---
|
20 |
db_directory = "database"
|
21 |
db_path = os.path.join(db_directory, "processed_documents.db")
|
22 |
+
VECTOR_DB_DIR = os.getenv("VECTOR_DB_DIR", "data/vector_db_chroma")
|
23 |
collection_name = "my_instrument_manual_chunks"
|
24 |
|
25 |
# Ensure TOGETHER_API_KEY is set
|
setup_knowledge_base.py
CHANGED
@@ -224,7 +224,7 @@ def get_chunks_from_db_for_embedding(db_path):
|
|
224 |
if conn:
|
225 |
conn.close()
|
226 |
|
227 |
-
def load_chunks_to_vector_db(chunks_data, db_path="vector_db_chroma", collection_name="document_chunks", embeddings_model_name="togethercomputer/m2-bert-80M-32k-retrieval"):
|
228 |
"""
|
229 |
Loads text chunks and their embeddings into a ChromaDB vector database.
|
230 |
This function will now ADD chunks if they are new (based on their IDs).
|
@@ -285,7 +285,7 @@ if __name__ == "__main__":
|
|
285 |
#pdf_input_directory = "input" # Ensure this directory exists and contains your PDFs
|
286 |
db_directory = "database"
|
287 |
db_path = os.path.join(db_directory, "processed_documents.db")
|
288 |
-
vector_db_dir = "vector_db_chroma"
|
289 |
collection_name = "my_instrument_manual_chunks"
|
290 |
embeddings_model_name = "togethercomputer/m2-bert-80M-32k-retrieval" # 確保與 main.py 中使用的一致
|
291 |
|
|
|
224 |
if conn:
|
225 |
conn.close()
|
226 |
|
227 |
+
def load_chunks_to_vector_db(chunks_data, db_path="data/vector_db_chroma", collection_name="document_chunks", embeddings_model_name="togethercomputer/m2-bert-80M-32k-retrieval"):
|
228 |
"""
|
229 |
Loads text chunks and their embeddings into a ChromaDB vector database.
|
230 |
This function will now ADD chunks if they are new (based on their IDs).
|
|
|
285 |
#pdf_input_directory = "input" # Ensure this directory exists and contains your PDFs
|
286 |
db_directory = "database"
|
287 |
db_path = os.path.join(db_directory, "processed_documents.db")
|
288 |
+
vector_db_dir = "data/vector_db_chroma"
|
289 |
collection_name = "my_instrument_manual_chunks"
|
290 |
embeddings_model_name = "togethercomputer/m2-bert-80M-32k-retrieval" # 確保與 main.py 中使用的一致
|
291 |
|