Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import logging | |
| import os | |
| import random | |
| from datetime import datetime | |
| from functools import lru_cache | |
| from typing import Sequence | |
| from zoneinfo import ZoneInfo | |
| import langsmith | |
| from langchain_core.documents import Document | |
| from langchain_community.document_transformers import LongContextReorder | |
| from langchain.retrievers.document_compressors import FlashrankRerank | |
| logging.basicConfig(level=logging.ERROR) | |
| class DocumentFormatter: | |
| def __init__(self, prefix: str): | |
| self.prefix = prefix | |
| def __call__(self, docs: list[Document]) -> str: | |
| return "\n---\n".join( | |
| [ | |
| f"- {self.prefix} {i+1}:\n\n\t" + d.page_content | |
| for i, d in enumerate(docs) | |
| ] | |
| ) | |
| def get_datetime() -> str: | |
| return datetime.now(ZoneInfo("America/Vancouver")).strftime("%A, %Y-%b-%d %H:%M:%S") | |
| def reorder_documents(docs: list[Document]) -> Sequence[Document]: | |
| return LongContextReorder().transform_documents(docs) | |
| def randomize_documents(documents: list[Document]) -> list[Document]: | |
| random.shuffle(documents) | |
| return documents | |
| def create_langsmith_client(): | |
| os.environ["LANGCHAIN_TRACING_V2"] = "true" | |
| os.environ["LANGCHAIN_PROJECT"] = "talltree-ai-assistant" | |
| os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com" | |
| langsmith_api_key = os.getenv("LANGCHAIN_API_KEY") | |
| if not langsmith_api_key: | |
| raise EnvironmentError("Missing environment variable: LANGCHAIN_API_KEY") | |
| return langsmith.Client() | |
| def get_reranker( | |
| top_n: int = 3, model: str = "ms-marco-MiniLM-L-12-v2" | |
| ) -> FlashrankRerank: | |
| return FlashrankRerank(top_n=top_n, model=model) | |