File size: 15,585 Bytes
7042c3c
 
 
4a6af9d
21cb336
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
 
 
 
 
 
 
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a6af9d
 
 
 
7042c3c
 
4a6af9d
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
 
7042c3c
 
 
21cb336
 
 
 
 
 
 
 
 
 
 
 
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
7042c3c
 
21cb336
 
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
7042c3c
 
 
 
21cb336
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
7042c3c
 
 
 
 
 
 
 
 
 
 
21cb336
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a6af9d
 
7042c3c
 
21cb336
 
 
 
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
 
 
 
 
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21cb336
 
 
 
 
 
4a6af9d
21cb336
 
 
7042c3c
 
21cb336
7042c3c
 
 
 
 
 
 
 
 
 
21cb336
7042c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a6af9d
7042c3c
 
 
 
 
 
 
4a6af9d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
import ast

#core libraries
from langchain_core.runnables import RunnableConfig

from langchain_core.messages import (
    AIMessage,
)
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts.chat import ChatPromptTemplate

from langchain import hub

from langgraph.graph import (
    END, 
    StateGraph,
)

from ea4all.src.ea4all_gra.configuration import AgentConfiguration
from ea4all.src.ea4all_gra.data import (
    CapabilityGap,
    GradeAnswer,
    GradeDocuments,  
    LandscapeAsIs,  
)

from ea4all.src.shared.utils import (
    get_llm_client,
    extract_structured_output,
    extract_topic_from_business_input,
    set_max_new_tokens,
    get_predicted_num_tokens_from_prompt,
)

from ea4all.src.shared.prompts import (
    LLAMA31_CHAT_PROMPT_FORMAT,
    LLAMA31_PROMPT_FORMAT,
)

from ea4all.src.shared import vectorstore

from ea4all.src.ea4all_gra.togaf_task2.state import Task2State

from ea4all.src.ea4all_apm.graph import get_retrieval_chain

# Retrieval Grader score whether retrieved IT Landscape  address business query
def retrieval_grader(model):
    # LLM with function call
    structured_llm_grader = model.with_structured_output(GradeDocuments)

    #Prompt
    system = """You are an enterprise architect grader assessing relevance of applications to address a business query.
        It does not need to be a stringent test. The objective is to filter out erroneous retrievals.
        If the application contains any keyword or semantic meaning related to the business query, grade it as relevant.
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

    grade_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system),
            ("ai", "Retrieved applications: \n\n {landscape_asis} \n\n Business Query: {business_query}"),
        ]
    )

    grader = grade_prompt | structured_llm_grader

    return grader

# Business Capability needs vs landscap asis gap analysis
def gap_grader(model):
   
    gap_prompt = hub.pull("learn-it-all-do-it-all/ea4all_togaf_capability_gap")

    # Set up a parser: 
    parser = PydanticOutputParser(pydantic_object=CapabilityGap) 
    gap_prompt = gap_prompt.partial(
        format_instructions=parser.get_format_instructions(),
    )

    capability_gap_grader = gap_prompt | model | parser

    return capability_gap_grader

## Question Re-writer
def question_rewriter(model):
    # Rerwriter Prompt
    rewrite_prompt = hub.pull("learn-it-all-do-it-all/ea4all_question_rewriter")
    rewrite_prompt = rewrite_prompt.partial(ai_output=LLAMA31_CHAT_PROMPT_FORMAT)

    rewriter = rewrite_prompt | model

    return rewriter

##Answer Grade: score whether RAG + LLM answer address business query
def answer_grader():
    # Prompt
    answer_prompt = hub.pull('learn-it-all-do-it-all/ea4all_togaf_answer_grade')

    # Set up a parser: 
    parser = PydanticOutputParser(pydantic_object=GradeAnswer) 
    answer_prompt = answer_prompt.partial(
        format_instructions=parser.get_format_instructions(),
        ai_output = LLAMA31_PROMPT_FORMAT
    )

    return answer_prompt

## Hallucination Grader score whether there is any hallucination with between RAG and LLM answers
def hallucination_grader(asis, identified):
    # Prompt": REVISED TO WORK WIHT LLAMA-3 - issue w/ multi-word app
    #changes on prompting e.g. role, rules and restrictions, explicit instructions, change from word to application(s)
    #changed to one-by-one assessment using single text search
    grader_false = []
    for d in identified:
        if d.lower() not in asis.lower():
            grader_false.append(d)

    return grader_false

##Action-1 RAG retrieval - Assess-AsIs-Landscape
async def retrieve(state:Task2State, config: RunnableConfig):
    """
    Retrieve applications

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, applications, that contains retrieved identified applications
    """

    configuration = AgentConfiguration.from_runnable_config(config)

    print("---RETRIEVE---")
    business_query = state['business_query']

    if not state.get( 'landscape_asis'):
        intent=""
        if state['messages']:
            intent = ','.join(ast.literal_eval(str(state['messages'][-1].content))['intent']).lower().replace("'", "")

        business_query=f"""What existent applications can be re-used {intent}?"""

    # Retrieval
    rag_input = 5
    with vectorstore.make_retriever(config) as _retriever:
        retriever = _retriever

    retrieval = await get_retrieval_chain(rag_input,"ea4all_agent",business_query,retriever, config)
    
    landscape_asis = await retrieval.ainvoke(
        {"standalone_question": business_query},
        config={"recursion_limit":configuration.ea4all_recursion_limit})

    ## return Document page_content
    content = ';'.join(asis.page_content.strip() for asis in landscape_asis)

    name = state['next']


    return {
        "messages": [AIMessage(content=content, name=name)],
        "landscape_asis": landscape_asis,
        "business_query": business_query
    }

##Action-2 Grade retrieval against business query, filter out not relevant applications
def grade_landscape_asis(state:Task2State, config: RunnableConfig):
    """
    Determines whether an application is relevant to address a business query.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates landscape_asis key with only filtered relevant applications
    """

    print("---CHECK DOCUMENT RELEVANCE TO BUSINESS QUERY---")
    business_query = state.get('business_query')
    landscape_asis = state.get('landscape_asis')

    # Score each doc
    filtered_docs = []
    if landscape_asis is not None:
        for d in landscape_asis:
            ##Pick relevant Metadata
            application = d.metadata['source']
            capability = d.metadata['capability']
            description = d.metadata['description']  
            business_fit = d.metadata['business fit']
            roadmap = d.metadata['roadmap']
            asis = f"Application:{application}; Capability:{capability}; Description:{description};Business fit: {business_fit}; Roadmap: {roadmap};"

            filtered_docs.append(asis)
                    
    return {
        #"messages": [AIMessage(content=str(filtered_docs), name=name)],
        "business_query": business_query,
        "landscape_asis": landscape_asis, 
        "identified_asis": filtered_docs
    }

##Action-3 Is there relevant applications? Yes, generate, otherwise transform_query
def decide_to_generate(state:Task2State, config: RunnableConfig):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED APPLICATIONS---")
    filtered_applications = state['identified_asis']

    if not filtered_applications:
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print(
            "---DECISION: ALL APPLICATIONS ARE NOT RELEVANT TO BUSINESS QUERY, TRANSFORM QUERY---"
        )
        return "transform_query"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

##Action-4a Generate if relevant applications found
def generate(state:Task2State, config: RunnableConfig):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key
        added to state, identified_asis, that contains LLM generation
    """
    configuration = AgentConfiguration.from_runnable_config(config)
    model = get_llm_client(configuration.togaf_model, configuration.api_base_url)
    
    print("---GENERATE---")
    landscape_asis = state['landscape_asis']

    values =  { 
        "business_query": state['business_query'],
        "applications": state['identified_asis']
        }

    parser = PydanticOutputParser(pydantic_object=LandscapeAsIs)

    hub_prompt = hub.pull('learn-it-all-do-it-all/ea4all_togaf_landscape_business_query')
    hub_prompt = hub_prompt.partial(
        format_instructions=parser.get_format_instructions(),
    )

    model.max_tokens = set_max_new_tokens(get_predicted_num_tokens_from_prompt(model,hub_prompt,values))

    task_2_generate = hub_prompt | model | parser
    generated_asis = task_2_generate.invoke(input=values, config={"recursion_limit":configuration.recursion_limit})

    name = state['next']
    
    return {
        "messages": [AIMessage(content=str(generated_asis.identified_asis), name=name)],
        "landscape_asis": landscape_asis, 
        "business_query": state['business_query'],
        "identified_asis": generated_asis.identified_asis
    }

##Action-4b Re-write query otherwise
def transform_query(state:Task2State, config: RunnableConfig):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """
    # Get any user-provided configs - LLM model in use
    configuration = AgentConfiguration.from_runnable_config(config)
    model = get_llm_client(configuration.togaf_model, configuration.api_base_url)

    print("---TRANSFORM QUERY---")
    business_query = state['business_query']

    # Re-write question
    response = question_rewriter(model).invoke(
        {"user_question": business_query, "target": "vectorstore"},
    )
    
    generated_question = extract_topic_from_business_input(response.content)
    better_question = generated_question['rephrased']

    if better_question == None: better_question = business_query 

    name = state['next']

    return {
        "messages": [AIMessage(content=better_question, name=name)],
        "business_query": better_question
    }

##Action-5 Grade final response
def grade_generation_v_documents_and_question(state:Task2State, config: RunnableConfig):
    """
    Determines whether the generation is grounded in the landscape_asis and answers business query.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    # Get any user-provided configs - LLM model in use
    configuration = AgentConfiguration.from_runnable_config(config)
    model = get_llm_client(configuration.togaf_model, configuration.api_base_url)

    if state.get('remaining_steps') <= 2:
        return "no match"

    print("---CHECK HALLUCINATIONS---")
    business_query = state['business_query']
    landscape_asis = state['landscape_asis']
    identified_asis = state['identified_asis']
    generated_asis = [item.application for item in identified_asis] if identified_asis else []

    score = hallucination_grader(str(landscape_asis),generated_asis)

    if len(score)==0:
        print("---DECISION: IDENTIFIED APPLICATION(s) IS GROUNDED IN LANDSCAPE ASIS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")

        values =  {"business_query": business_query, "application": identified_asis}
        prompt = answer_grader()
        model.max_tokens = set_max_new_tokens(get_predicted_num_tokens_from_prompt(model,prompt,values))
        grader_chain = prompt | model
        score = grader_chain.invoke(values)        
        extracted_answer = extract_structured_output(score.content)
        if extracted_answer is not None: ##REVIEW PROMPT W/ LLAMA3.1-70B
            grade = extracted_answer['binary_score']
        else:
            grade = "no"

        if grade == "yes":
            print("---DECISION: APPLICATION ADDRESSES BUSINESS QUERY---")
            return "useful"
        else:
            print("---DECISION: APPLICATION DOES NOT ADDRESS BUSINESS QUERY---")
            return "not useful"
    else:
        print("---DECISION: IDENTIFIED ASIS IS NOT GROUNDED IN LANDSCAPE ASIS, RE-TRY---")
        print(f"---HALLUCINATIONS: {score}---")
        return "not supported"

##Action-6 Analyse gap between current state and the desired future state - identified capabilities
def grade_landscape_asis_v_capability_gap(state:Task2State, config: RunnableConfig):
    """
    Analyse any gap between existent applications and identified business capability to address the business query.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates landscape_gap key with capability gap status
    """
    
    # Get any user-provided configs - LLM model in use
    configuration = AgentConfiguration.from_runnable_config(config)
    model = get_llm_client(configuration.togaf_model, configuration.api_base_url)

    print("---CHECK SUPPORT IDENTIFIED APP TO BUSINESS CAPABILITY---")
    
    parser = PydanticOutputParser(pydantic_object=CapabilityGap)

    hub_prompt = hub.pull('learn-it-all-do-it-all/ea4all_togaf_capability_gap')
    hub_prompt = hub_prompt.partial(
        format_instructions=parser.get_format_instructions(),
    )
    task_2_landscape_gap = hub_prompt | model | parser

    #capability_gap_grader
    if state['identified_asis']:
        content = ';'.join(str(app) for app in state['identified_asis'])
    else:
        content = "No applications identified"

    if state['biz_capability']:
        capability = ', '.join(ast.literal_eval(str(state['biz_capability']))).replace("'", ", ")
    else:
        capability = "No business capabilities identified"
    
    values = {
        "application": content,
        "capability": capability
        }

    model.max_tokens = set_max_new_tokens(get_predicted_num_tokens_from_prompt(model,hub_prompt,values))

    extracted_gap = task_2_landscape_gap.invoke(input=values, config={"recursion_limit":configuration.recursion_limit})

    for item in extracted_gap.capability_status:
        print(f"---CAPABILITY: {item.capability}  SUPPORT: {item.support}---")

    return {
        "messages": [AIMessage(content=str(state['messages']), name=state['next'])],
        "landscape_gap": extracted_gap #landscape_gap.content
        }

##TASK-2 Graph
task2_builder = StateGraph(Task2State)

# Define the nodes
task2_builder.add_node("assess_landscape", retrieve)  # retrieve
task2_builder.add_node("grade_landscape_asis", grade_landscape_asis)  # grade documents
task2_builder.add_node("generate", generate)  # generate
task2_builder.add_node("transform_query", transform_query)  # transform_query
task2_builder.add_node("grade_landscape_gap", grade_landscape_asis_v_capability_gap) #analyse asis gap

# Build graph
task2_builder.set_entry_point("assess_landscape")

task2_builder.add_edge("assess_landscape", "grade_landscape_asis")
task2_builder.add_conditional_edges(
    "grade_landscape_asis",
    decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    },
)
task2_builder.add_edge("transform_query", "assess_landscape")
task2_builder.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": "grade_landscape_gap",
        "not useful": "transform_query",
        "no match": "grade_landscape_gap"
    },
)

task2_builder.add_edge("grade_landscape_gap", END)

# Compile
task2_graph = task2_builder.compile()
task2_graph.name = "togaf_assess_current_landscape"