File size: 3,081 Bytes
a0e37e2
cc80c3d
f86d7f2
 
 
 
cc80c3d
 
 
 
 
a0e37e2
 
f86d7f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0e37e2
 
 
 
f86d7f2
a0e37e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from typing import Dict, Any

from langchain_core.documents import Document

from ask_candid.retrieval.sources.schema import ElasticSourceConfig, ElasticHitsResult
from ask_candid.retrieval.sources.utils import get_context

IssueLabConfig = ElasticSourceConfig(
    index_name="search-semantic-issuelab-elser_ve2",
    text_fields=("description", "content", "combined_issuelab_findings", "combined_item_description")
)


def process_issuelab_hit(hit: ElasticHitsResult) -> Document:
    combined_item_description = hit.source.get("combined_item_description", "") # title inside
    description = hit.source.get("description", "")
    combined_issuelab_findings = hit.source.get("combined_issuelab_findings", "")
    # we only need to process long texts
    chunks_with_context_txt = get_context("content", hit, context_length=12)
    return Document(
        page_content='\n\n'.join([
            combined_item_description,
            combined_issuelab_findings,
            description,
            chunks_with_context_txt
        ]),
        metadata={
            "title": hit.source["title"],
            "source": "IssueLab",
            "source_id": hit.source["resource_id"],
            "url": hit.source.get("permalink", "")
        }
    )


def issuelab_card_html(doc: Dict[str, Any], height_px: int = 200, show_chunks=False) -> str:
    chunks_html = ""
    if show_chunks:
        cleaned_text = []
        for _, v in doc["inner_hits"].items():
            hits = v["hits"]["hits"]
            for h in hits:
                for k1, v1 in h["fields"].items():
                    # we don't want other chunks
                    if "content" in k1:
                        cleaned_text.append(f"<div><p>{v1[0]['chunk'][0]}</p></div>")

        chunks_html ="<span><b>Relevant parts of the content:</b></span>" + "<br>".join(cleaned_text)

    html = f"""
    <div style='height: auto; padding: 5px;'>
        <div style='border: 1px solid #febe10;'>
            <span style='display: inline-block; height: {height_px - 10}px; padding: 5px; vertical-align: top;'>
                <img
                    src='{doc['cover_graphic_small']}'
                    style='max-height: 100%; overflow: hidden; border-radius: 3%;'
                >
            </span>

            <span style='padding: 10px; display: inline-block; width: 70%;'>
                <div>
                    <span><b>Issuelab ID:</b> {doc['resource_id']}</span>
                    <br>
                    <span>
                        <a href='{doc['issuelab_url']}' target='_blank' style='text-decoration: none;'>
                            {doc['title']}
                        </a>
                    </span>
                    <br>

                    <span><b>Description:</b> {doc['description']}</span>
                    <br>
                    <div>{doc['combined_item_description']}</div>
                    <br>
                    <div>{chunks_html}</div>
                    
                </div>
            </span>
        </div>
    </div>
    """
    return html