Spaces:
Sleeping
Sleeping
import pandas as pd | |
import requests | |
from pydantic import Field, BaseModel | |
from omegaconf import OmegaConf | |
from vectara_agentic.agent import Agent | |
from vectara_agentic.tools import ToolsFactory, VectaraToolFactory | |
initial_prompt = "How can I help you today?" | |
prompt = """ | |
[ | |
{"role": "system", "content": "You are a search bot that forms a coherent answer to a user query based on search results that are provided to you." }, | |
{"role": "user", "content": " [INSTRUCTIONS] | |
If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.*** | |
Search results may include tables in a markdown format. When answering a question using a table be careful about which rows and columns contain the answer and include all relevant information from the relevant rows and columns that the query is asking about. | |
Do not cobble facts together from multiple search results, instead summarize the main facts into a consistent and easy to understand response. | |
Do not base your response on information or knowledge that is not in the search results. | |
Make sure your response is answering the query asked. If the query is related to an entity (such as a person or place), make sure you use search results related to that entity. | |
For queries where only a short answer is required, you can give a brief response. | |
Consider that each search result is a partial segment from a bigger text, and may be incomplete. | |
Your output should always be in a single language - the $vectaraLangName language. Check spelling and grammar for the $vectaraLangName language. | |
Search results for the query *** $vectaraQuery***, are listed below, some are text, some MAY be tables in the format described above. | |
#foreach ($qResult in $vectaraQueryResultsDeduped) | |
[$esc.java($foreach.index + 1)] | |
#if($qResult.hasTable()) | |
Table Title: $qResult.getTable().title() || Table Description: $qResult.getTable().description() || Table Data: | |
$qResult.getTable().markdown() | |
#else | |
$qResult.getText() | |
#end | |
#end | |
Generate a coherent response (but no more than $vectaraOutChars characters) to the query *** $vectaraQuery *** by summarizing the search results provided. Give a slight preference to search results that appear earlier in the list. | |
Include as many statistical numerical evidence from the search results in your response. | |
Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions | |
If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***. Respond always in the $vectaraLangName language, and only in that language."} | |
] | |
""" | |
def create_assistant_tools(cfg): | |
class QueryPublicationsArgs(BaseModel): | |
query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "which drug was use on the and how big was the population?"]) | |
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key, | |
vectara_corpus_key=cfg.corpus_key) | |
summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o' | |
ask_publications = vec_factory.create_rag_tool( | |
tool_name = "ask_publications", | |
tool_description = """ | |
Responds to an user question about a particular result, based on the publications. | |
""", | |
tool_args_schema = QueryPublicationsArgs, | |
reranker = "multilingual_reranker_v1", rerank_k = 100, | |
n_sentences_before = 3, n_sentences_after = 3, lambda_val = 0.005, | |
summary_num_results = 10, | |
vectara_summarizer = summarizer, | |
include_citations = True, | |
vectara_prompt_text=prompt, | |
save_history = True | |
) | |
tools_factory = ToolsFactory() | |
return ( | |
tools_factory.standard_tools() + | |
[ask_publications] | |
) | |
def initialize_agent(_cfg, agent_progress_callback=None): | |
menarini_bot_instructions = """ | |
- You are a helpful clinical trial assistant, with expertise in clinical trial publications, in conversation with a user. | |
- You always respond to the user with supporting evidence based on the data, with p-values where available. | |
- Your responses follow a standard statistical format for results of clinical trials. | |
for example: instead of "Reduced 4-component MACE by 30%" use "Reduced 4-component MACE by 30% (HR: 0.70; p=0.002)" | |
- Your responses should always include the sample size for the clinical trial and the time to effect when available. | |
- Call the ask_publications tool as much as needed, with appropiate queries, until you have all the numerical data to respond properly to the user question. | |
""" | |
agent = Agent( | |
tools=create_assistant_tools(_cfg), | |
topic="Drug trials publications", | |
custom_instructions=menarini_bot_instructions, | |
agent_progress_callback=agent_progress_callback, | |
) | |
agent.report() | |
return agent |