Spaces:
Running
Running
from typing import Dict, Any | |
from langchain_core.documents import Document | |
from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult | |
from ask_candid.retrieval.sources.utils import get_context | |
IssueLabConfig = ElasticSourceConfig( | |
index_name="search-semantic-issuelab-elser_ve2", | |
text_fields=("description", "content", "combined_issuelab_findings", "combined_item_description") | |
) | |
def process_issuelab_hit(hit: ElasticHitsResult) -> Document: | |
combined_item_description = hit.source.get("combined_item_description", "") # title inside | |
description = hit.source.get("description", "") | |
combined_issuelab_findings = hit.source.get("combined_issuelab_findings", "") | |
# we only need to process long texts | |
chunks_with_context_txt = get_context("content", hit, context_length=12) | |
return Document( | |
page_content='\n\n'.join([ | |
combined_item_description, | |
combined_issuelab_findings, | |
description, | |
chunks_with_context_txt | |
]), | |
metadata={ | |
"title": hit.source["title"], | |
"source": "IssueLab", | |
"source_id": hit.source["resource_id"], | |
"url": hit.source.get("permalink", "") | |
} | |
) | |
def issuelab_card_html(doc: Dict[str, Any], height_px: int = 200, show_chunks=False) -> str: | |
chunks_html = "" | |
if show_chunks: | |
cleaned_text = [] | |
for _, v in doc["inner_hits"].items(): | |
hits = v["hits"]["hits"] | |
for h in hits: | |
for k1, v1 in h["fields"].items(): | |
# we don't want other chunks | |
if "content" in k1: | |
cleaned_text.append(f"<div><p>{v1[0]['chunk'][0]}</p></div>") | |
chunks_html ="<span><b>Relevant parts of the content:</b></span>" + "<br>".join(cleaned_text) | |
html = f""" | |
<div style='height: auto; padding: 5px;'> | |
<div style='border: 1px solid #febe10;'> | |
<span style='display: inline-block; height: {height_px - 10}px; padding: 5px; vertical-align: top;'> | |
<img | |
src='{doc['cover_graphic_small']}' | |
style='max-height: 100%; overflow: hidden; border-radius: 3%;' | |
> | |
</span> | |
<span style='padding: 10px; display: inline-block; width: 70%;'> | |
<div> | |
<span><b>Issuelab ID:</b> {doc['resource_id']}</span> | |
<br> | |
<span> | |
<a href='{doc['issuelab_url']}' target='_blank' style='text-decoration: none;'> | |
{doc['title']} | |
</a> | |
</span> | |
<br> | |
<span><b>Description:</b> {doc['description']}</span> | |
<br> | |
<div>{doc['combined_item_description']}</div> | |
<br> | |
<div>{chunks_html}</div> | |
</div> | |
</span> | |
</div> | |
</div> | |
""" | |
return html | |