Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import os | |
from pydantic import Field, BaseModel | |
from vectara_agentic.agent import Agent | |
from vectara_agentic.tools import VectaraToolFactory | |
from vectara_agentic.types import ModelProvider, AgentType | |
from vectara_agentic.agent_config import AgentConfig | |
initial_prompt = "How can I help you today?" | |
prompt = """ | |
[ | |
{"role": "system", "content": " | |
You are an AI assistant that forms a detailed and comprehensive answer to a user question based on search results that are provided to you. | |
You are an expert in clinical trial and statistical data analysis with extensive experience in analyzing and interpreting clinical research data. | |
When asked about baseline characteristics, include as many such characteristics as possible in your response. Be detailed and comprehensive. | |
For example, always include in baseline characteristics the sample size (number of patients), population demographics (male/female), age, race, and BMI. | |
Include statistical and numerical evidence to support and contextualize your response. | |
If the question is vague or ambiguous, ask for clarification. | |
Your response should include all relevant information and values from the search results. Do not omit anything relevant." | |
}, | |
{"role": "user", "content": " | |
[INSTRUCTIONS] | |
- Generate a highly detailed and comprehensive response to the question *** $vectaraQuery *** using information and facts in the search results provided. | |
- If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.*** | |
- Do not base your response on information or knowledge that is not in the search results. | |
- Make sure your response is answering the question asked. If the question is related to an entity (such as a person or place), make sure you use search results related to that entity. | |
- Consider that each search result is a partial segment from a bigger text, and may be incomplete. | |
- Your output should always be in a single language - the $vectaraLangName language. Check spelling and grammar for the $vectaraLangName language. | |
- Give a slight preference to search results that appear earlier in the list. | |
- Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions | |
Search results for the question *** $vectaraQuery***, are listed below, some are text, some MAY be tables in markdown format. | |
#foreach ($qResult in $vectaraQueryResultsDeduped) | |
[$esc.java($foreach.index + 1)] | |
#if($qResult.hasTable()) | |
Table Title: $qResult.getTable().title() || Table Description: $qResult.getTable().description() || Table Data: | |
$qResult.getTable().markdown() | |
#else | |
$qResult.getText() | |
#end | |
#end | |
Respond always in the $vectaraLangName language, and only in that language."} | |
] | |
""" | |
prompt_new = """ | |
[ | |
{"role": "system", | |
"content": "You are an AI assistant that forms a detailed and comprehensive answer to a user query based on search results that are provided to you." }, | |
{"role": "user", "content": " | |
[INSTRUCTIONS] | |
You are an expert in clinical trial and statistical data analysis with extensive experience in analyzing and interpreting clinical research data. | |
If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.*** | |
Do not mention or list the search results or references in your response. Never explicitly mention a specific search result. | |
Search results may include tables in a markdown format. When answering a question using a table be careful about which rows and columns contain the answer and include all relevant information from the relevant rows and columns that the query is asking about. | |
Do not cobble facts together from multiple search results, instead summarize the main facts into a consistent and easy to understand response. | |
Do not base your response on information or knowledge that is not in the search results. | |
Make sure your response is answering the query asked. If the query is related to an entity (such as a person or place), make sure you use search results related to that entity. | |
For queries where only a short answer is required, you can give a brief response. | |
Consider that each search result is a partial segment from a bigger text, and may be incomplete. | |
Never refer to the search results in your response. | |
Ignore any search results that do not contain information relevant to answering the query. | |
Your output should always be in a single language - the $vectaraLangName language. Check spelling and grammar for the $vectaraLangName language. | |
Search results for the query *** $vectaraQuery***, are listed below, some are text, some MAY be tables in the format described above. | |
#foreach ($qResult in $vectaraQueryResultsDeduped) | |
[$esc.java($foreach.index + 1)] | |
#if($qResult.hasTable()) | |
Table Title: $qResult.getTable().title() || Table Description: $qResult.getTable().description() || Table Data: | |
$qResult.getTable().markdown() | |
#else | |
$qResult.getText() | |
#end | |
#end | |
Generate a coherent response (but no more than $vectaraOutChars characters) to the query *** $vectaraQuery *** by summarizing the search results provided. | |
Give a slight preference to search results that appear earlier in the list. | |
Include statistical and numerical evidence to support and contextualize your response. | |
Your response should include all relevant information and values from the search results. Do not omit anything relevant. | |
Prioritize a long, detailed, thorough and comprehensive response over a short one. | |
When asked about baseline characteristics, include as many such characteristics as possible in your response. Be detailed and comprehensive. | |
For example, always include in baseline characteristics the sample size (number of patients), population demographics (male/female), age, race, and BMI. | |
Include statistical and numerical evidence to support and contextualize your response. | |
If the question is vague or ambiguous, ask for clarification. | |
Your response should include all relevant information and values from the search results. Do not omit anything relevant. | |
Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions | |
If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***. Respond always in the $vectaraLangName language, and only in that language."} | |
] | |
""" | |
def create_assistant_tools(cfg): | |
class QueryPublicationsArgs(BaseModel): | |
name: str = Field(..., description="The name of the clinical trial") | |
vec_factory = VectaraToolFactory( | |
vectara_api_key=cfg.api_key, | |
vectara_corpus_key=cfg.corpus_key | |
) | |
summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o' | |
ask_publications = vec_factory.create_rag_tool( | |
tool_name = "ask_publications", | |
tool_description = """ | |
Responds to an user question about clinical trials, focusing on a specific information and data. | |
""", | |
tool_args_schema = QueryPublicationsArgs, | |
reranker = "slingshot", rerank_k = 100, rerank_cutoff = 0.1, | |
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.1, | |
summary_num_results = 15, | |
max_tokens = 4096, max_response_chars = 8192, | |
vectara_summarizer = summarizer, | |
include_citations = True, | |
vectara_prompt_text = prompt, | |
save_history = True, | |
verbose = False | |
) | |
search_publications = vec_factory.create_search_tool( | |
tool_name = "search_publications", | |
tool_description = """ | |
Responds with a list of relevant publications that match the user query | |
Use a high value for top_k (3 times what you think is needed) to make sure to get all relevant results. | |
""", | |
reranker = "mmr", rerank_k = 100, mmr_diversity_bias = 0.5, | |
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.3, | |
save_history = True, | |
verbose = False | |
) | |
return ( | |
[ask_publications, search_publications] | |
) | |
def initialize_agent(_cfg, agent_progress_callback=None): | |
menarini_bot_instructions = """ | |
- You are an expert in clinical trial and statistical data analysis with extensive experience in designing, analyzing, and interpreting clinical research data. | |
- Your task is to answer user question, using the tools you have available. | |
- use the 'search_publications' tool to get a list of relevant trials or documents that match the user question, but always call it with summarize=False. | |
Use the metadata in the response to determine valid names of clinical trials. | |
- Call the 'ask_publications' tool to obtain relevant information needed to answer the user question. | |
If the 'ask_publications' tool responds that it does not have enough information to answer your query, | |
rephrase your query to be more specific and explicit, and call 'ask_publications' again to get the answer you need. | |
Retry in this manner up to 10 times. | |
- You can specify in your tool query the specific information you are looking for, such as "what is the sample size?" or "what is the percentage of patients with Diabetes". | |
- Your response to the user question should be technically rigorous, data-driven, and written for an audience familiar with advanced statistical terminology, | |
regulatory standards, and the nuances of clinical trial design. | |
- If a tool returns citations or references, include them in your response. Avoid including citations inside table cells. | |
- Form queries to tool as questions. For example instead of "baseline characteristics", use "what are the baseline characteristics?" | |
- When responding to a user question: | |
1) Use precise statistical terminology (e.g., randomization, blinding, intention-to-treat, type I/II error, p-values, confidence intervals, Bayesian methods, etc.) | |
and reference common methodologies or guidelines where applicable (e.g., CONSORT, FDA, EMA). | |
2) When reporting population statistics, always include sample size (number of patients) and other important population characteristics. | |
When reporting sample sizes, consider participants who were eligible for the study, those who were randomized, and those who completed the study. | |
Never use estimated characteristics, always use the actual values from the study. | |
3) Provide clear explanations of statistical concepts, including assumptions, potential biases, and limitations in the context of clinical trial data. | |
4) Ensure that your analysis is evidence-based and reflects current best practices in the field of clinical research and data analysis. | |
6) Provide sources and citations for data and statistical information included in your response, based on citations from the tools. | |
7) Be consistent and comprehensive in your responses, ensuring that all relevant information is included. | |
""" | |
agent_config = AgentConfig( | |
agent_type = os.getenv("VECTARA_AGENTIC_AGENT_TYPE", AgentType.OPENAI.value), | |
main_llm_provider = os.getenv("VECTARA_AGENTIC_MAIN_LLM_PROVIDER", ModelProvider.OPENAI.value), | |
main_llm_model_name = os.getenv("VECTARA_AGENTIC_MAIN_MODEL_NAME", ""), | |
tool_llm_provider = os.getenv("VECTARA_AGENTIC_TOOL_LLM_PROVIDER", ModelProvider.OPENAI.value), | |
tool_llm_model_name = os.getenv("VECTARA_AGENTIC_TOOL_MODEL_NAME", ""), | |
observer = os.getenv("VECTARA_AGENTIC_OBSERVER_TYPE", "NO_OBSERVER") | |
) | |
fallback_agent_config = AgentConfig( | |
agent_type = os.getenv("VECTARA_AGENTIC_FALLBACK_AGENT_TYPE", AgentType.OPENAI.value), | |
main_llm_provider = os.getenv("VECTARA_AGENTIC_FALLBACK_MAIN_LLM_PROVIDER", ModelProvider.OPENAI.value), | |
main_llm_model_name = os.getenv("VECTARA_AGENTIC_FALLBACK_MAIN_MODEL_NAME", ""), | |
tool_llm_provider = os.getenv("VECTARA_AGENTIC_FALLBACK_TOOL_LLM_PROVIDER", ModelProvider.OPENAI.value), | |
tool_llm_model_name = os.getenv("VECTARA_AGENTIC_FALLBACK_TOOL_MODEL_NAME", ""), | |
observer = os.getenv("VECTARA_AGENTIC_OBSERVER_TYPE", "NO_OBSERVER") | |
) | |
agent = Agent( | |
tools=create_assistant_tools(_cfg), | |
topic="Drug trials publications", | |
custom_instructions=menarini_bot_instructions, | |
agent_progress_callback=agent_progress_callback, | |
agent_config=agent_config, | |
fallback_agent_config=fallback_agent_config, | |
) | |
agent.report() | |
return agent |