brainsqueeze's picture
Smarter document context retrieval
f86d7f2 verified
from typing import Dict, Any
from langchain_core.documents import Document
from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult
from ask_candid.retrieval.sources.utils import get_context
CandidLearningConfig = ElasticSourceConfig(
index_name="search-semantic-candid-learning_ve1",
text_fields=("content", "title", "training_topics", "staff_recommendations")
)
def process_learning_hit(hit: ElasticHitsResult) -> Document:
title = hit.source.get("title", "")
content_with_context_txt = get_context("content", hit, context_length=12)
training_topics = hit.source.get("training_topics", "")
staff_recommendations = hit.source.get("staff_recommendations", "")
return Document(
page_content='\n\n'.join([title, staff_recommendations, training_topics, content_with_context_txt]),
metadata={
"title": hit.source["title"],
"source": "Candid Learning",
"source_id": hit.source["post_id"],
"url": hit.source.get("url", "")
}
)
def build_card_html(doc: Dict[str, Any], height_px: int = 200, show_chunks=False) -> str:
url = f"{doc['url']}"
fields = ["title", "excerpt"]
fields_dict = {}
fields_len = 0
for field in fields:
if doc.get(field, None) is not None:
fields_dict[field] = doc[field]
fields_dict[field + "_txt"] = f"<div>{doc[field]}</div>"
if (fields_len + len(doc[field])) > 999:
rest_text_len = 999 - fields_len
if rest_text_len > 0:
fields_dict[field + "_txt"] = f"<div>{doc[field][:rest_text_len] + '[...]'}</div>"
else: fields_dict[field + "_txt"] = f"<span>{'[...]'}</span>"
fields_len = fields_len + len(doc[field])
else:
fields_dict[field] = ""
fields_dict[field + "_txt"] = ""
html = f"""
<div style='height: {height_px}px; padding: 5px;'>
<div style='height: {height_px}px; border: 1px solid #febe10;'>
<span style='padding-left: 10px; display: inline-block; width: 100%;'>
<div>
<span>
<b>Candid Learning resource:</b>
<a href='{url}' target='_blank' style='text-decoration: none;'>
{doc['title']}
</a>
</span>
<br>
</div>
</span>
</div>
</div>
"""
return html