from huggingface_hub import InferenceClient # from langchain_together import Together from smolagents import ( CodeAgent, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, HfApiModel, LiteLLMModel, ApiModel, PythonInterpreterTool, ) from smolagents.tools import Tool import yaml import os import requests from urllib.parse import urlencode from gradio_client import Client, handle_file class ReadFileTool(Tool): name = "read_file" description = "Reads the content of a file." inputs = { "file_path": {"type": "string", "description": "The path to the file to read"} } output_type = "string" def forward(self, file_path: str) -> str: with open(file_path, "r", encoding="utf-8") as file: return file.read() def __init__(self, *args, **kwargs): self.is_initialized = False class TranscribeTool(Tool): name = "transcribe" description = "Transcribes audio files to text." inputs = { "audio_file_path": { "type": "string", "description": "The path to the audio file to transcribe", } } output_type = "string" def forward(self, audio_file_path: str) -> str: # Placeholder for transcription logic client = Client("viktor-hu/parakeet-asr-mcp-server") result = client.predict( audio_file=handle_file(audio_file_path), api_name="/transcribe_to_text" ) return result class GetChessBestMoveTool(Tool): name = "get_chess_best_move" description = "Gets the best move for a given chess position." inputs = { "fen_position": { "type": "string", "description": "The FEN string representing the chess position", } } output_type = "string" def forward(self, fen_position: str) -> str: url = "https://stockfish.online/api/s/v2.php" params = {"fen": fen_position, "depth": 5} result = requests.get(url, params=urlencode(params), timeout=60) return result.json() class ImageAnalysisTool(Tool): name = "image_analysis" description = "Analyzes an image and answers questions about it." inputs = { "image_path": { "type": "string", "description": "The path to the image file to analyze", }, "query": { "type": "string", "description": "The question to ask about the image. For example, 'Perform OCR on the text in the image.'", }, } output_type = "string" def forward(self, image_path: str, query: str) -> str: # Placeholder for image analysis logic client = Client("prithivMLmods/DocScope-R1") result = client.predict( model_name="Cosmos-Reason1-7B", text=query, image=handle_file(image_path), max_new_tokens=1024, temperature=0.6, top_p=0.9, top_k=50, repetition_penalty=1.2, api_name="/generate_image", ) return result def __init__(self, *args, **kwargs): self.is_initialized = False class BasicAgent: def __init__( self, model_id: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free" ) -> None: with open("prompts.yaml", "r", encoding="utf-8") as stream: prompt_templates = yaml.safe_load(stream) self.agent = CodeAgent( tools=[ DuckDuckGoSearchTool(max_results=2), WikipediaSearchTool(), VisitWebpageTool(), PythonInterpreterTool(), ReadFileTool(), TranscribeTool(), ImageAnalysisTool(), GetChessBestMoveTool(), ], # model=HfApiModel( # model_id=model_id, # provider="together", # token=os.getenv("TOGETHER_API_KEY"), # ), # model=ApiModel( # model=model_id, # # temperature=0.7, # max_tokens=1500, # api_key=os.getenv("TOGETHER_API_KEY") # ), # model=HfApiModel(), model=LiteLLMModel( # model_id="huggingface/together/Llama-3.3-70B-Instruct-Free", model_id=f"together_ai/{model_id}", # api_base="https://api.together.xyz/v1/chat/completions", # custom_llm_provider="together", api_key=os.getenv("TOGETHER_API_KEY"), temperature=0.0, ), prompt_templates=prompt_templates, additional_authorized_imports=["pandas", "numpy", "re", "requests", "bs4"], ) self.prompt_templates = prompt_templates def __call__(self, question: str) -> str: return self.agent.run( # self.prompt_templates["system_prompt"]+ "\n\n" + "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: final_answer(YOUR FINAL ANSWER). YOUR FINAL ANSWER should be a number (python int of float, not numpy) OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities write Mount instead of Mt., Saint instead of St., etc), and write the digits in plain text unless specified otherwise, floats are shortened to one decimal if they are 0 so write the number as a string to avoid this (return '10.00' instead of 10.00). If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Put spaces between elements in the list such as 'a, b, c'. Divide the task in smaller tasks so thay you do every step correctly. Check if the answer you are going to submit follows all the rules before submitting it. If you are not sure about the answer, ask for more information or clarification. If you are asked to write a code, write the code in a single code block, and make sure that it is correct and that it follows exactly what has been asked. If you are asked to write a function, write the function in a single code block, and make sure that it is correct and that it follows exactly what has been asked." # + "Before submitting your final answer, make sure to check that it is correct and that follows exactly what has been asked and it is represented in the asked format. it is very IMPORTANT that you give the answer exactly as asked by the user." + "\n\n" + question # + "\n\n" + , )