|
"""Shared functions for state management.""" |
|
|
|
import hashlib |
|
import uuid |
|
from typing import Any, Literal, Optional, Union |
|
|
|
from langgraph.graph import MessagesState |
|
from langchain_core.documents import Document |
|
|
|
class State(MessagesState): |
|
next: Optional[str] |
|
user_feedback: Optional[str] |
|
|
|
def _generate_uuid(page_content: str) -> str: |
|
"""Generate a UUID for a document based on page content.""" |
|
md5_hash = hashlib.md5(page_content.encode()).hexdigest() |
|
return str(uuid.UUID(md5_hash)) |
|
|
|
def reduce_docs( |
|
existing: Optional[list[Document]], |
|
new: Union[ |
|
list[Document], |
|
list[dict[str, Any]], |
|
list[str], |
|
str, |
|
Literal["delete"], |
|
], |
|
) -> list[Document]: |
|
"""Reduce and process documents based on the input type. |
|
|
|
This function handles various input types and converts them into a sequence of Document objects. |
|
It can delete existing documents, create new ones from strings or dictionaries, or return the existing documents. |
|
It also combines existing documents with the new one based on the document ID. |
|
|
|
Args: |
|
existing (Optional[Sequence[Document]]): The existing docs in the state, if any. |
|
new (Union[Sequence[Document], Sequence[dict[str, Any]], Sequence[str], str, Literal["delete"]]): |
|
The new input to process. Can be a sequence of Documents, dictionaries, strings, a single string, |
|
or the literal "delete". |
|
""" |
|
if new == "delete": |
|
return [] |
|
|
|
existing_list = list(existing) if existing else [] |
|
if isinstance(new, str): |
|
return existing_list + [ |
|
Document(page_content=new, metadata={"uuid": _generate_uuid(new)}) |
|
] |
|
|
|
new_list = [] |
|
if isinstance(new, list): |
|
existing_ids = set(doc.metadata.get("uuid") for doc in existing_list) |
|
for item in new: |
|
if isinstance(item, str): |
|
item_id = _generate_uuid(item) |
|
new_list.append(Document(page_content=item, metadata={"uuid": item_id})) |
|
existing_ids.add(item_id) |
|
|
|
elif isinstance(item, dict): |
|
metadata = item.get("metadata", {}) |
|
item_id = metadata.get("uuid") or _generate_uuid( |
|
item.get("page_content", "") |
|
) |
|
|
|
if item_id not in existing_ids: |
|
new_list.append( |
|
Document(**{**item, "metadata": {**metadata, "uuid": item_id}}) |
|
) |
|
existing_ids.add(item_id) |
|
|
|
elif isinstance(item, Document): |
|
item_id = item.metadata.get("uuid", "") |
|
if not item_id: |
|
item_id = _generate_uuid(item.page_content) |
|
new_item = item.copy(deep=True) |
|
new_item.metadata["uuid"] = item_id |
|
else: |
|
new_item = item |
|
|
|
if item_id not in existing_ids: |
|
new_list.append(new_item) |
|
existing_ids.add(item_id) |
|
|
|
return existing_list + new_list |
|
|