blasisd commited on
Commit
e12c221
·
1 Parent(s): 81917a3

Initial commit

Browse files
Files changed (3) hide show
  1. app.py +229 -31
  2. requirements.txt +17 -1
  3. tools.py +461 -0
app.py CHANGED
@@ -1,34 +1,201 @@
1
  import os
2
  import gradio as gr
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
  print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
 
 
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -55,16 +222,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -76,26 +243,54 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
 
 
 
 
 
 
 
 
99
  # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
@@ -162,20 +357,19 @@ with gr.Blocks() as demo:
162
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
166
  # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +377,18 @@ if __name__ == "__main__":
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
3
+ import litellm
4
  import requests
5
  import inspect
6
  import pandas as pd
7
 
8
+ from doctest import debug
9
+ from dotenv import load_dotenv
10
+ from smolagents import (
11
+ CodeAgent,
12
+ # HfApiModel,
13
+ LiteLLMModel,
14
+ # OpenAIServerModel,
15
+ Tool,
16
+ FinalAnswerTool,
17
+ )
18
+
19
+ from tools import (
20
+ DuckDuckGoSearchTool,
21
+ FileDownloaderTool,
22
+ HtmlTableExtractorTool,
23
+ ImagesAnalyzerTool,
24
+ LoadTextFileTool,
25
+ LoadXlsxFileTool,
26
+ RelevantInfoRetrieverTool,
27
+ ReverseStringTool,
28
+ # SpeechToTextTool,
29
+ VideoAnalyzerTool,
30
+ VisitWebpageTool,
31
+ WebpageTablesContextRetrieverTool,
32
+ # YoutubeTranscriptTool,
33
+ WikipediaSearchTool,
34
+ YoutubeVideoDownloaderTool,
35
+ )
36
+
37
+ load_dotenv()
38
+
39
+
40
+ HF_TOKEN = os.getenv("HF_U1ACAPP_TOKEN")
41
+
42
  # (Keep Constants as is)
43
  # --- Constants ---
44
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
45
 
46
+
47
+ LLM_API_BASE = os.getenv("LLM_API_BASE")
48
+ LLM_API_KEY = os.getenv("LLM_API_KEY")
49
+ LLM_MODEL_ID = os.getenv("LLM_MODEL_ID")
50
+
51
+ # Tools to use
52
+ reverse_string_tool = ReverseStringTool()
53
+ # speech_to_text_tool = SpeechToTextTool()
54
+ trascriber_tool = Tool.from_space(
55
+ space_id="hf-audio/whisper-large-v3-turbo",
56
+ name="transcriber",
57
+ description="Transcribe an audio file or youtube video either from path or from url",
58
+ )
59
+
60
+
61
+ wikipedia_search_tool = WikipediaSearchTool()
62
+ web_search_tool = DuckDuckGoSearchTool()
63
+ visit_webpage_tool = VisitWebpageTool()
64
+ relevant_info_tool = RelevantInfoRetrieverTool()
65
+ youtube_video_downloader_tool = YoutubeVideoDownloaderTool()
66
+ video_analyzer_tool = VideoAnalyzerTool()
67
+ images_analyzer_tool = ImagesAnalyzerTool()
68
+ file_downloader_tool = FileDownloaderTool()
69
+ load_xls_file_tool = LoadXlsxFileTool()
70
+ load_text_file_tool = LoadTextFileTool()
71
+ webpage_tables_context_retriever_tool = WebpageTablesContextRetrieverTool()
72
+ html_table_extractor_tool = HtmlTableExtractorTool()
73
+
74
+ trascriber_tool.device = "cpu"
75
+
76
+ final_answer_tool = FinalAnswerTool()
77
+ final_answer_tool.description = """Returns the final answer that adheres strictly to the following guidelines:
78
+ - Includes ONLY explicitly requested content in the exact format specified
79
+ - Never includes:
80
+ * Explanations, reasoning blocks, or step-by-step working
81
+ * Measurements, units, or abbreviations unless required by the task
82
+ * Any content not specified in the task
83
+ - Matches requested formats precisely (e.g., CSV lists as "a, b, c")
84
+ - Preserves all specified delimiters, brackets, or structures when requested
85
+ - No Markdown, code blocks, or rich formatting unless explicitly asked
86
+ - In comma separated lists makes sure that there is a space character after each comma
87
+ - Provides ONLY the final output with:
88
+ * No introductory text
89
+ * No closing remarks
90
+ * No supplemental information
91
+ """
92
+
93
+
94
  # --- Basic Agent Definition ---
95
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
96
  class BasicAgent:
97
  def __init__(self):
98
  print("BasicAgent initialized.")
99
+
100
+ # model = OpenAIServerModel(
101
+ # model_id="qwen/qwen2.5-vl-7b",
102
+ # api_base="http://localhost:1234/v1",
103
+ # api_key="not-needed",
104
+ # max_tokens=8192,
105
+ # )
106
+
107
+ model = LiteLLMModel(
108
+ model_id=LLM_MODEL_ID,
109
+ api_base=LLM_API_BASE,
110
+ api_key=LLM_API_KEY,
111
+ num_ctx=8192,
112
+ # flatten_messages_as_text=False,
113
+ )
114
+
115
+ # model = HfApiModel(
116
+ # max_tokens=4096,
117
+ # temperature=0.5,
118
+ # provider="novita",
119
+ # model_id="Qwen/Qwen3-32B",
120
+ # custom_role_conversions=None,
121
+ # token=HF_TOKEN,
122
+ # )
123
+
124
+ self.agent = CodeAgent(
125
+ tools=[
126
+ file_downloader_tool,
127
+ reverse_string_tool,
128
+ wikipedia_search_tool,
129
+ # youtube_transcript_tool,
130
+ web_search_tool,
131
+ visit_webpage_tool,
132
+ youtube_video_downloader_tool,
133
+ trascriber_tool,
134
+ video_analyzer_tool,
135
+ images_analyzer_tool,
136
+ webpage_tables_context_retriever_tool,
137
+ html_table_extractor_tool,
138
+ load_xls_file_tool,
139
+ load_text_file_tool,
140
+ final_answer_tool,
141
+ # relevant_info_tool,
142
+ ],
143
+ model=model,
144
+ # executor_type="e2b",
145
+ additional_authorized_imports=[
146
+ "bs4",
147
+ "datetime",
148
+ "json",
149
+ "numpy",
150
+ "pandas",
151
+ "requests",
152
+ "lxml",
153
+ # "youtube_dl",
154
+ ],
155
+ add_base_tools=True, # Add any additional base tools
156
+ planning_interval=3, # Enable planning every 3 steps
157
+ # max_steps=12,
158
+ )
159
+
160
+ def __call__(
161
+ self, question: str, task_id: str = None, attached_file: bool = False
162
+ ) -> str:
163
+ """Calling the agent
164
+ :param question: the initial query
165
+ :type question: str
166
+ :param task_id: Required if attached_file is True; used to retrieve the file, defaults to None
167
+ :type task_id: str, optional
168
+ :param attached_file: If True, file content for task_id is appended to the question, defaults to False
169
+ :type attached_file: bool, optional
170
+ :raises ValueError: If attached_file is True but task_id is not provided.
171
+ :return: the agent's answer
172
+ :rtype: str
173
+ """
174
+
175
  print(f"Agent received question (first 50 chars): {question[:50]}...")
176
+ if attached_file and not task_id:
177
+ raise ValueError("task_id must be provided when attached_file is True")
178
+
179
+ additional_args = None
180
+
181
+ if attached_file:
182
+ file_url = f"{DEFAULT_API_URL}/files/{task_id}"
183
+ additional_args = {"file_url": file_url}
184
+
185
+ agent_answer = self.agent.run(question, additional_args=additional_args)
186
+ return agent_answer
187
+
188
 
189
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
190
  """
191
  Fetches all questions, runs the BasicAgent on them, submits all answers,
192
  and displays the results.
193
  """
194
  # --- Determine HF Space Runtime URL and Repo URL ---
195
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
196
 
197
  if profile:
198
+ username = f"{profile.username}"
199
  print(f"User logged in: {username}")
200
  else:
201
  print("User not logged in.")
 
222
  response.raise_for_status()
223
  questions_data = response.json()
224
  if not questions_data:
225
+ print("Fetched questions list is empty.")
226
+ return "Fetched questions list is empty or invalid format.", None
227
  print(f"Fetched {len(questions_data)} questions.")
228
  except requests.exceptions.RequestException as e:
229
  print(f"Error fetching questions: {e}")
230
  return f"Error fetching questions: {e}", None
231
  except requests.exceptions.JSONDecodeError as e:
232
+ print(f"Error decoding JSON response from questions endpoint: {e}")
233
+ print(f"Response text: {response.text[:500]}")
234
+ return f"Error decoding server response for questions: {e}", None
235
  except Exception as e:
236
  print(f"An unexpected error occurred fetching questions: {e}")
237
  return f"An unexpected error occurred fetching questions: {e}", None
 
243
  for item in questions_data:
244
  task_id = item.get("task_id")
245
  question_text = item.get("question")
246
+
247
  if not task_id or question_text is None:
248
  print(f"Skipping item with missing task_id or question: {item}")
249
  continue
250
  try:
251
+ file_attached = item.get("file_name", "") != ""
252
+ submitted_answer = agent(question_text, task_id, file_attached)
253
+ answers_payload.append(
254
+ {"task_id": task_id, "submitted_answer": submitted_answer}
255
+ )
256
+ results_log.append(
257
+ {
258
+ "Task ID": task_id,
259
+ "Question": question_text,
260
+ "Submitted Answer": submitted_answer,
261
+ }
262
+ )
263
  except Exception as e:
264
+ print(f"Error running agent on task {task_id}: {e}")
265
+ results_log.append(
266
+ {
267
+ "Task ID": task_id,
268
+ "Question": question_text,
269
+ "Submitted Answer": f"AGENT ERROR: {e}",
270
+ }
271
+ )
272
 
273
  if not answers_payload:
274
  print("Agent did not produce any answers to submit.")
275
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
276
 
277
+ # 4. Prepare Submission
278
+ submission_data = {
279
+ "username": username.strip(),
280
+ "agent_code": agent_code,
281
+ "answers": answers_payload,
282
+ }
283
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
284
  print(status_update)
285
 
286
+ try:
287
+ import json
288
+
289
+ with open("answers.json", "w", encoding="utf-8") as ans_fp:
290
+ json.dump(answers_payload, ans_fp)
291
+ except Exception as e:
292
+ print(f"Could not save answers to a file: {e}.")
293
+
294
  # 5. Submit
295
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
296
  try:
 
357
 
358
  run_button = gr.Button("Run Evaluation & Submit All Answers")
359
 
360
+ status_output = gr.Textbox(
361
+ label="Run Status / Submission Result", lines=5, interactive=False
362
+ )
363
  # Removed max_rows=10 from DataFrame constructor
364
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
365
 
366
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
367
 
368
  if __name__ == "__main__":
369
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
370
  # Check for SPACE_HOST and SPACE_ID at startup for information
371
  space_host_startup = os.getenv("SPACE_HOST")
372
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
373
 
374
  if space_host_startup:
375
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
377
  else:
378
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
379
 
380
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
381
  print(f"✅ SPACE_ID found: {space_id_startup}")
382
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
383
+ print(
384
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
385
+ )
386
  else:
387
+ print(
388
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
389
+ )
390
 
391
+ print("-" * (60 + len(" App Starting ")) + "\n")
392
 
393
  print("Launching Gradio Interface for Basic Agent Evaluation...")
394
+ demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,2 +1,18 @@
 
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bs4
2
  gradio
3
+ gradio[oauth]
4
+ python-dotenv
5
+ requests
6
+ smolagents
7
+ smolagents[litellm, toolkit, transformers, e2b]
8
+ openpyxl
9
+ opencv-python
10
+ protobuf
11
+ sentencepiece
12
+ soundfile
13
+ torch
14
+ transformers
15
+ youtube-transcript-api
16
+ yt-dlp
17
+ langchain-community
18
+ wikipedia-api
tools.py ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ from typing import Dict, List, Optional
5
+
6
+ from bs4 import BeautifulSoup
7
+ import yt_dlp
8
+ import pandas as pd
9
+ import requests
10
+ import torch
11
+
12
+ from langchain_community.document_loaders import YoutubeLoader
13
+ from langchain_community.retrievers import BM25Retriever
14
+ from langchain_community.tools import BearlyInterpreterTool
15
+ from langchain.docstore.document import Document
16
+ from smolagents import (
17
+ DuckDuckGoSearchTool,
18
+ SpeechToTextTool,
19
+ Tool,
20
+ VisitWebpageTool,
21
+ WikipediaSearchTool,
22
+ )
23
+ from transformers import AutoProcessor, AutoModelForImageTextToText
24
+
25
+
26
+ class RelevantInfoRetrieverTool(Tool):
27
+ name = "relevant_info_retriever"
28
+ description = "Retrieves relevant to the query information."
29
+ inputs = {
30
+ "query": {
31
+ "type": "string",
32
+ "description": "The query for which to retrieve information.",
33
+ },
34
+ "docs": {
35
+ "type": "string",
36
+ "description": "The source documents from which to choose in order to retrieve relevant information",
37
+ },
38
+ }
39
+ output_type = "string"
40
+
41
+ def forward(self, query: str, docs: List[Document]):
42
+ self.retriever = BM25Retriever.from_documents(docs)
43
+ results = self.retriever.get_relevant_documents(query)
44
+ if results:
45
+ return "\n\n".join([doc.page_content for doc in results])
46
+ else:
47
+ return "No relevant information found."
48
+
49
+
50
+ class YoutubeTranscriptTool(Tool):
51
+ name = "youtube_transcript"
52
+ description = "Fetches youtube video's transcript."
53
+ inputs = {
54
+ "youtube_url": {
55
+ "type": "string",
56
+ "description": "The youtube video url",
57
+ },
58
+ "source_langs": {
59
+ "type": "array",
60
+ "description": "A list of language codes in a descending priority for the video trascript.",
61
+ "items": {"type": "string"},
62
+ "default": ["en"],
63
+ "required": False,
64
+ "nullable": True,
65
+ },
66
+ "target_lang": {
67
+ "type": "string",
68
+ "description": "The language to which the transcript will be translated.",
69
+ "default": "en",
70
+ "required": False,
71
+ "nullable": True,
72
+ },
73
+ }
74
+ output_type = "string"
75
+
76
+ def forward(
77
+ self,
78
+ youtube_url: str,
79
+ source_langs: Optional[List[str]] = ["en"],
80
+ target_lang: Optional[str] = "en",
81
+ ):
82
+ try:
83
+ loader = YoutubeLoader.from_youtube_url(
84
+ youtube_url,
85
+ add_video_info=True,
86
+ language=source_langs,
87
+ translation=target_lang,
88
+ # transcript_format=TranscriptFormat.CHUNKS,
89
+ # chunk_size_seconds=30,
90
+ )
91
+ transcript_docs = loader.load()
92
+ return transcript_docs
93
+
94
+ except Exception as e:
95
+ return f"Error fetching video's transcript: {e}"
96
+
97
+
98
+ class ReverseStringTool(Tool):
99
+ name = "reverse_string"
100
+ description = "Reverses the input string."
101
+ inputs = {
102
+ "string": {
103
+ "type": "string",
104
+ "description": "The string that needs to be reversed.",
105
+ }
106
+ }
107
+ output_type = "string"
108
+
109
+ def forward(self, string: str):
110
+ try:
111
+ return string[-1::-1]
112
+ except Exception as e:
113
+ return f"Error reversing string: {e}"
114
+
115
+
116
+ class SmolVLM2:
117
+ """The parent class for visual analyzer tools (using SmolVLM2-500M-Video model)"""
118
+
119
+ def __init__(self):
120
+ """Initializations for the analyzer tool"""
121
+ model_path = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
122
+ device = "cpu" # "cuda" if torch.cuda.is_available() else "cpu"
123
+ self.processor = AutoProcessor.from_pretrained(model_path)
124
+ self.model = AutoModelForImageTextToText.from_pretrained(
125
+ model_path,
126
+ torch_dtype=torch.bfloat16,
127
+ # _attn_implementation="flash_attention_2",
128
+ ).to(device)
129
+
130
+
131
+ class ImagesAnalyzerTool(Tool, SmolVLM2):
132
+ name = "image_analyzer"
133
+ description = "Analyzes each input image according to the query"
134
+ inputs = {
135
+ "query": {
136
+ "type": "string",
137
+ "description": "The query according to which the image will be analyzed.",
138
+ },
139
+ "images_urls": {
140
+ "type": "array",
141
+ "description": "A list of strings containing the images' urls",
142
+ "items": {"type": "string"},
143
+ },
144
+ }
145
+ output_type = "string"
146
+
147
+ def __init__(self):
148
+ Tool.__init__(self)
149
+ SmolVLM2.__init__(self)
150
+
151
+ def forward(self, query: str, images_urls: List[str]):
152
+
153
+ try:
154
+
155
+ # Image message entities for the different images' urls
156
+ image_message_ents = [{"type": "image", "url": iu} for iu in images_urls]
157
+
158
+ messages = [
159
+ {
160
+ "role": "user",
161
+ "content": [
162
+ {
163
+ "type": "text",
164
+ "text": query,
165
+ },
166
+ ]
167
+ + image_message_ents,
168
+ },
169
+ ]
170
+
171
+ inputs = self.processor.apply_chat_template(
172
+ messages,
173
+ add_generation_prompt=True,
174
+ tokenize=True,
175
+ return_dict=True,
176
+ return_tensors="pt",
177
+ ).to(self.model.device, dtype=torch.bfloat16)
178
+
179
+ generated_ids = self.model.generate(
180
+ **inputs, do_sample=False, max_new_tokens=64
181
+ )
182
+ generated_texts = self.processor.batch_decode(
183
+ generated_ids,
184
+ skip_special_tokens=True,
185
+ )
186
+ return generated_texts[0]
187
+ except Exception as e:
188
+ return f"Error analyzing image(s): {e}"
189
+
190
+
191
+ class VideoAnalyzerTool(Tool, SmolVLM2):
192
+ name = "video_analyzer"
193
+ description = "Analyzes video at a specified path according to the query"
194
+ inputs = {
195
+ "query": {
196
+ "type": "string",
197
+ "description": "The query according to which the video will be analyzed.",
198
+ },
199
+ "video_path": {
200
+ "type": "string",
201
+ "description": "A string containing the video path",
202
+ },
203
+ }
204
+ output_type = "string"
205
+
206
+ def __init__(self):
207
+ Tool.__init__(self)
208
+ SmolVLM2.__init__(self)
209
+
210
+ def forward(self, query: str, video_path: str) -> str:
211
+ try:
212
+ messages = [
213
+ {
214
+ "role": "user",
215
+ "content": [
216
+ {"type": "video", "path": video_path},
217
+ {"type": "text", "text": query},
218
+ ],
219
+ },
220
+ ]
221
+
222
+ inputs = self.processor.apply_chat_template(
223
+ messages,
224
+ add_generation_prompt=True,
225
+ tokenize=True,
226
+ return_dict=True,
227
+ return_tensors="pt",
228
+ ).to(self.model.device, dtype=torch.bfloat16)
229
+
230
+ generated_ids = self.model.generate(
231
+ **inputs, do_sample=False, max_new_tokens=64
232
+ )
233
+ generated_texts = self.processor.batch_decode(
234
+ generated_ids,
235
+ skip_special_tokens=True,
236
+ )
237
+
238
+ return generated_texts[0]
239
+ except Exception as e:
240
+ return f"Error analyzing video: {e}"
241
+ finally:
242
+ # Cleanup if needed
243
+ if video_path and os.path.exists(video_path):
244
+ os.remove(video_path)
245
+
246
+
247
+ class FileDownloaderTool(Tool):
248
+ name = "file_downloader"
249
+ description = "Downloads a file returning the name of the temporarily saved file"
250
+ inputs = {
251
+ "file_url": {
252
+ "type": "string",
253
+ "description": "The url from which the file shall be downloaded.",
254
+ },
255
+ }
256
+ output_type = "string"
257
+
258
+ def forward(self, file_url: str) -> str:
259
+ response = requests.get(file_url, stream=True)
260
+ response.raise_for_status()
261
+ original_filename = (
262
+ response.headers.get("content-disposition", "")
263
+ .split("=", -1)[-1]
264
+ .strip('"')
265
+ )
266
+
267
+ # Even if original_filename is empty or there is no extension, ext will be ""
268
+ ext = os.path.splitext(original_filename)[-1]
269
+
270
+ with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_file:
271
+ for chunk in response.iter_content(chunk_size=8192):
272
+ tmp_file.write(chunk)
273
+ return tmp_file.name
274
+
275
+
276
+ class YoutubeVideoDownloaderTool(Tool):
277
+ name = "youtube_video_downloader"
278
+ description = "Downloads the video from the specified url and returns the path where the video was saved"
279
+ inputs = {
280
+ "video_url": {
281
+ "type": "string",
282
+ "description": "A string containing the video url",
283
+ },
284
+ }
285
+ output_type = "string"
286
+
287
+ def forward(self, video_url: str) -> str:
288
+ try:
289
+ saved_video_path = ""
290
+ temp_dir = tempfile.gettempdir()
291
+ ydl_opts = {
292
+ "outtmpl": f"{temp_dir}/%(title)s.%(ext)s", # Absolute or relative path
293
+ "quiet": True,
294
+ }
295
+
296
+ # Download youtube video as a file in tmp directory
297
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
298
+ info = ydl.extract_info(video_url, download=True)
299
+ saved_video_path = ydl.prepare_filename(info)
300
+ return saved_video_path
301
+ except Exception as e:
302
+ return f"Error downloading video: {e}"
303
+
304
+
305
+ class LoadXlsxFileTool(Tool):
306
+ name = "load_xlsx_file"
307
+ description = "This tool loads xlsx file into pandas and returns it"
308
+ inputs = {"file_path": {"type": "string", "description": "File path"}}
309
+ output_type = "object"
310
+
311
+ def forward(self, file_path: str) -> object:
312
+ return pd.read_excel(file_path)
313
+
314
+
315
+ class LoadTextFileTool(Tool):
316
+ name = "load_text_file"
317
+ description = "This tool loads any text file"
318
+ inputs = {"file_path": {"type": "string", "description": "File path"}}
319
+ output_type = "string"
320
+
321
+ def forward(self, file_path: str) -> str:
322
+ with open(file_path, "r", encoding="utf-8") as file:
323
+ return file.read()
324
+
325
+
326
+ class WebpageTablesContextRetrieverTool(Tool):
327
+ name = "webpage_tables_context_retriever"
328
+ description = """Retrieves structural context for all tables on a webpage.
329
+ Returns table indexes with captions, headers, and surrounding text to help identify relevant tables.
330
+ Use this first to determine which table index to extract."""
331
+ inputs = {
332
+ "url": {"type": "string", "description": "The URL of the webpage to analyze"}
333
+ }
334
+ output_type = "object"
335
+
336
+ def forward(self, url: str) -> Dict:
337
+ """Retrieve context information for all tables on the page"""
338
+ try:
339
+ response = requests.get(url, timeout=15)
340
+ response.raise_for_status()
341
+ soup = BeautifulSoup(response.text, "html.parser")
342
+
343
+ tables = soup.find_all("table")
344
+ if not tables:
345
+ return {
346
+ "status": "success",
347
+ "tables": [],
348
+ "message": "No tables found on page",
349
+ "url": url,
350
+ }
351
+
352
+ results = []
353
+ for i, table in enumerate(tables):
354
+ context = {
355
+ "index": i,
356
+ "id": table.get("id", ""),
357
+ "class": " ".join(table.get("class", [])),
358
+ "summary": table.get("summary", ""),
359
+ "caption": self._get_table_caption(table),
360
+ "preceding_header": self._get_preceding_header(table),
361
+ "surrounding_text": self._get_surrounding_text(table),
362
+ }
363
+ results.append(context)
364
+
365
+ return {
366
+ "status": "success",
367
+ "tables": results,
368
+ "url": url,
369
+ "message": f"Found {len(results)} tables with context information",
370
+ "suggestion": "Use html_table_extractor with the most relevant index",
371
+ }
372
+
373
+ except Exception as e:
374
+ return {
375
+ "status": "error",
376
+ "url": url,
377
+ "message": f"Failed to retrieve table contexts: {str(e)}",
378
+ }
379
+
380
+ def _get_table_caption(self, table) -> str:
381
+ """Extract table caption text if available"""
382
+ caption = table.find("caption")
383
+ return caption.get_text(strip=True) if caption else ""
384
+
385
+ def _get_preceding_header(self, table) -> str:
386
+ """Find the nearest preceding heading"""
387
+ for tag in table.find_all_previous(["h1", "h2", "h3", "h4", "h5", "h6"]):
388
+ return tag.get_text(strip=True)
389
+ return ""
390
+
391
+ def _get_surrounding_text(self, table, chars=150) -> str:
392
+ """Get relevant text around the table"""
393
+ prev_text = " ".join(
394
+ t.strip()
395
+ for t in table.find_all_previous(string=True, limit=3)
396
+ if t.strip()
397
+ )
398
+ next_text = " ".join(
399
+ t.strip() for t in table.find_all_next(string=True, limit=3) if t.strip()
400
+ )
401
+ return f"...{prev_text[-chars:]} [TABLE] {next_text[:chars]}..."
402
+
403
+
404
+ class HtmlTableExtractorTool(Tool):
405
+ name = "html_table_extractor"
406
+ description = """Extracts a specific HTML table as structured data.
407
+ Use after webpage_tables_context_retriever to get the correct table index."""
408
+ inputs = {
409
+ "page_url": {
410
+ "type": "string",
411
+ "description": "The webpage URL containing the table",
412
+ },
413
+ "table_index": {
414
+ "type": "integer",
415
+ "description": "0-based index of the table to extract (from webpage_tables_context_retriever)",
416
+ },
417
+ }
418
+ output_type = "object"
419
+
420
+ def forward(self, page_url: str, table_index: int) -> Dict:
421
+ """Extract a specific table by index"""
422
+ try:
423
+ # First verify the URL is accessible
424
+ test_request = requests.head(page_url, timeout=5)
425
+ test_request.raise_for_status()
426
+
427
+ # Read all tables
428
+ tables = pd.read_html(page_url)
429
+
430
+ if not tables:
431
+ return {
432
+ "status": "error",
433
+ "message": "No tables found at URL",
434
+ "url": page_url,
435
+ }
436
+
437
+ # Validate index
438
+ if table_index < 0 or table_index >= len(tables):
439
+ return {
440
+ "status": "error",
441
+ "message": f"Invalid table index {table_index}. Page has {len(tables)} tables.",
442
+ "url": page_url,
443
+ "available_indexes": list(range(len(tables))),
444
+ }
445
+
446
+ # Convert DataFrame to JSON-serializable format
447
+ df = tables[table_index]
448
+ return {
449
+ "status": "success",
450
+ "table_index": table_index,
451
+ "table_data": df,
452
+ "url": page_url,
453
+ }
454
+
455
+ except Exception as e:
456
+ return {
457
+ "status": "error",
458
+ "message": f"Table extraction failed: {str(e)}",
459
+ "url": page_url,
460
+ "table_index": table_index,
461
+ }