Spaces:
Running
Running
File size: 3,081 Bytes
a0e37e2 cc80c3d f86d7f2 cc80c3d a0e37e2 f86d7f2 a0e37e2 f86d7f2 a0e37e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from typing import Dict, Any
from langchain_core.documents import Document
from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult
from ask_candid.retrieval.sources.utils import get_context
IssueLabConfig = ElasticSourceConfig(
index_name="search-semantic-issuelab-elser_ve2",
text_fields=("description", "content", "combined_issuelab_findings", "combined_item_description")
)
def process_issuelab_hit(hit: ElasticHitsResult) -> Document:
combined_item_description = hit.source.get("combined_item_description", "") # title inside
description = hit.source.get("description", "")
combined_issuelab_findings = hit.source.get("combined_issuelab_findings", "")
# we only need to process long texts
chunks_with_context_txt = get_context("content", hit, context_length=12)
return Document(
page_content='\n\n'.join([
combined_item_description,
combined_issuelab_findings,
description,
chunks_with_context_txt
]),
metadata={
"title": hit.source["title"],
"source": "IssueLab",
"source_id": hit.source["resource_id"],
"url": hit.source.get("permalink", "")
}
)
def issuelab_card_html(doc: Dict[str, Any], height_px: int = 200, show_chunks=False) -> str:
chunks_html = ""
if show_chunks:
cleaned_text = []
for _, v in doc["inner_hits"].items():
hits = v["hits"]["hits"]
for h in hits:
for k1, v1 in h["fields"].items():
# we don't want other chunks
if "content" in k1:
cleaned_text.append(f"<div><p>{v1[0]['chunk'][0]}</p></div>")
chunks_html ="<span><b>Relevant parts of the content:</b></span>" + "<br>".join(cleaned_text)
html = f"""
<div style='height: auto; padding: 5px;'>
<div style='border: 1px solid #febe10;'>
<span style='display: inline-block; height: {height_px - 10}px; padding: 5px; vertical-align: top;'>
<img
src='{doc['cover_graphic_small']}'
style='max-height: 100%; overflow: hidden; border-radius: 3%;'
>
</span>
<span style='padding: 10px; display: inline-block; width: 70%;'>
<div>
<span><b>Issuelab ID:</b> {doc['resource_id']}</span>
<br>
<span>
<a href='{doc['issuelab_url']}' target='_blank' style='text-decoration: none;'>
{doc['title']}
</a>
</span>
<br>
<span><b>Description:</b> {doc['description']}</span>
<br>
<div>{doc['combined_item_description']}</div>
<br>
<div>{chunks_html}</div>
</div>
</span>
</div>
</div>
"""
return html
|