File size: 6,851 Bytes
ff0eb39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from huggingface_hub import InferenceClient

# from langchain_together import Together
from smolagents import (
    CodeAgent,
    DuckDuckGoSearchTool,
    WikipediaSearchTool,
    VisitWebpageTool,
    HfApiModel,
    LiteLLMModel,
    ApiModel,
    PythonInterpreterTool,
)
from smolagents.tools import Tool
import yaml
import os
import requests
from urllib.parse import urlencode
from gradio_client import Client, handle_file


class ReadFileTool(Tool):
    name = "read_file"
    description = "Reads the content of a file."
    inputs = {
        "file_path": {"type": "string", "description": "The path to the file to read"}
    }
    output_type = "string"

    def forward(self, file_path: str) -> str:
        with open(file_path, "r", encoding="utf-8") as file:
            return file.read()

    def __init__(self, *args, **kwargs):
        self.is_initialized = False


class TranscribeTool(Tool):
    name = "transcribe"
    description = "Transcribes audio files to text."
    inputs = {
        "audio_file_path": {
            "type": "string",
            "description": "The path to the audio file to transcribe",
        }
    }
    output_type = "string"

    def forward(self, audio_file_path: str) -> str:
        # Placeholder for transcription logic
        client = Client("viktor-hu/parakeet-asr-mcp-server")
        result = client.predict(
            audio_file=handle_file(audio_file_path), api_name="/transcribe_to_text"
        )
        return result


class GetChessBestMoveTool(Tool):
    name = "get_chess_best_move"
    description = "Gets the best move for a given chess position."
    inputs = {
        "fen_position": {
            "type": "string",
            "description": "The FEN string representing the chess position",
        }
    }
    output_type = "string"

    def forward(self, fen_position: str) -> str:
        url = "https://stockfish.online/api/s/v2.php"
        params = {"fen": fen_position, "depth": 5}
        result = requests.get(url, params=urlencode(params), timeout=60)
        return result.json()


class ImageAnalysisTool(Tool):
    name = "image_analysis"
    description = "Analyzes an image and answers questions about it."
    inputs = {
        "image_path": {
            "type": "string",
            "description": "The path to the image file to analyze",
        },
        "query": {
            "type": "string",
            "description": "The question to ask about the image. For example, 'Perform OCR on the text in the image.'",
        },
    }
    output_type = "string"

    def forward(self, image_path: str, query: str) -> str:
        # Placeholder for image analysis logic
        client = Client("prithivMLmods/DocScope-R1")
        result = client.predict(
            model_name="Cosmos-Reason1-7B",
            text=query,
            image=handle_file(image_path),
            max_new_tokens=1024,
            temperature=0.6,
            top_p=0.9,
            top_k=50,
            repetition_penalty=1.2,
            api_name="/generate_image",
        )
        return result

    def __init__(self, *args, **kwargs):
        self.is_initialized = False


class BasicAgent:
    def __init__(
        self, model_id: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
    ) -> None:
        with open("prompts.yaml", "r", encoding="utf-8") as stream:
            prompt_templates = yaml.safe_load(stream)
        self.agent = CodeAgent(
            tools=[
                DuckDuckGoSearchTool(max_results=2),
                WikipediaSearchTool(),
                VisitWebpageTool(),
                PythonInterpreterTool(),
                ReadFileTool(),
                TranscribeTool(),
                ImageAnalysisTool(),
                GetChessBestMoveTool(),
            ],
            # model=HfApiModel(
            #     model_id=model_id,
            #     provider="together",
            #     token=os.getenv("TOGETHER_API_KEY"),
            # ),
            # model=ApiModel(
            #     model=model_id,
            #     # temperature=0.7,
            #     max_tokens=1500,
            #     api_key=os.getenv("TOGETHER_API_KEY")
            # ),
            # model=HfApiModel(),
            model=LiteLLMModel(
                # model_id="huggingface/together/Llama-3.3-70B-Instruct-Free",
                model_id=f"together_ai/{model_id}",
                # api_base="https://api.together.xyz/v1/chat/completions",
                # custom_llm_provider="together",
                api_key=os.getenv("TOGETHER_API_KEY"),
                temperature=0.0,
            ),
            prompt_templates=prompt_templates,
            additional_authorized_imports=["pandas", "numpy", "re", "requests", "bs4"],
        )
        self.prompt_templates = prompt_templates

    def __call__(self, question: str) -> str:
        return self.agent.run(
            # self.prompt_templates["system_prompt"]+ "\n\n" +
            "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: final_answer(YOUR FINAL ANSWER). YOUR FINAL ANSWER should be a number (python int of float, not numpy) OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities write Mount instead of Mt., Saint instead of St., etc), and write the digits in plain text unless specified otherwise, floats are shortened to one decimal if they are 0 so write the number as a string to avoid this (return '10.00' instead of 10.00). If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Put spaces between elements in the list such as 'a, b, c'. Divide the task in smaller tasks so thay you do every step correctly. Check if the answer you are going to submit follows all the rules before submitting it. If you are not sure about the answer, ask for more information or clarification. If you are asked to write a code, write the code in a single code block, and make sure that it is correct and that it follows exactly what has been asked. If you are asked to write a function, write the function in a single code block, and make sure that it is correct and that it follows exactly what has been asked."
            # + "Before submitting your final answer, make sure to check that it is correct and that follows exactly what has been asked and it is represented in the asked format. it is very IMPORTANT that you give the answer exactly as asked by the user."
            + "\n\n"
            + question
            # + "\n\n" + ,
        )