Spaces:
Runtime error
Runtime error
| import os, time | |
| import pandas as pd | |
| from tqdm import tqdm | |
| import chromadb | |
| import openai | |
| from llama_index import ( | |
| SimpleDirectoryReader, | |
| StorageContext, | |
| Document, | |
| VectorStoreIndex, | |
| ServiceContext | |
| ) | |
| from llama_index.vector_stores.chroma import ChromaVectorStore | |
| from llama_index.llms import OpenAI | |
| from llama_index.embeddings import HuggingFaceEmbedding | |
| from trulens_eval import Tru | |
| import utils | |
| from utils import get_prebuilt_trulens_recorder | |
| openai.api_key = utils.get_openai_api_key() | |
| def main(): | |
| start_time = time.time() | |
| llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0) | |
| fine_tuned_path = "local:./models/fine-tuned-embeddings" | |
| db = chromadb.PersistentClient(path="./models/chroma_db") | |
| chroma_collection = db.get_or_create_collection("quickstart") | |
| # assign chroma as the vector_store to the context | |
| vector_store = ChromaVectorStore(chroma_collection=chroma_collection) | |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
| service_context = ServiceContext.from_defaults(llm=llm, embed_model=fine_tuned_path) | |
| print("Loading embeddings from vector store..") | |
| index = VectorStoreIndex.from_vector_store( | |
| vector_store=vector_store, | |
| storage_context=storage_context, | |
| service_context=service_context | |
| ) | |
| query_engine = index.as_query_engine() | |
| mock_qna_source = pd.read_csv("./database/mock_qna_source.csv") | |
| mock_qna_source = mock_qna_source[ mock_qna_source["question"].notnull() ] | |
| print("mock_qna_source.shape", mock_qna_source.shape) | |
| with open("./raw_documents/eval_questions.txt", "r") as fp: | |
| questions_content = fp.read() | |
| questions_content_ls = questions_content.split("\n\n") | |
| eval_questions = mock_qna_source["question"].tolist() + questions_content_ls | |
| response = query_engine.query(eval_questions[0]) | |
| print(str(response)) | |
| tru = Tru(database_file="./models/trulens_eval.sqlite") | |
| tru_recorder = get_prebuilt_trulens_recorder(query_engine, | |
| app_id="Direct Query Engine") | |
| print("Sending each question to llm..") | |
| with tru_recorder as recording: | |
| for question in tqdm(eval_questions): | |
| response = query_engine.query(question) | |
| records, feedback = tru.get_records_and_feedback(app_ids=[]) | |
| os.makedirs("./results", exist_ok=True) | |
| records.to_csv("./results/records.csv", index=False) | |
| print(tru.db.engine.url.render_as_string(hide_password=False)) | |
| end_time = time.time() | |
| time_spent_mins = (end_time - start_time) / 60 | |
| with open("./results/time_cost.txt", "w") as fp: | |
| fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.") | |
| if __name__ == "__main__": | |
| main() |