tommaso1288 commited on
Commit
6220346
·
1 Parent(s): 37c9a6b

Added some tools

Browse files
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .venv
2
+ .idea
3
+ .env
4
+ /tests/data/
requirements.txt CHANGED
@@ -7,4 +7,8 @@ pandas~=2.2.3
7
  openpyxl~=3.1.5
8
  litellm~=1.66.1
9
  easyocr~=1.7.2
10
- wikipedia-api
 
 
 
 
 
7
  openpyxl~=3.1.5
8
  litellm~=1.66.1
9
  easyocr~=1.7.2
10
+ wikipedia-api
11
+ transformers~=4.51.3
12
+ torch~=2.7.0
13
+ pillow~=11.1.0
14
+ pytesseract~=0.3.13
src/agent/base_agent.py CHANGED
@@ -1,13 +1,11 @@
1
  from abc import abstractmethod, ABC
2
-
3
- from smolagents import CodeAgent, Tool, DuckDuckGoSearchTool, WikipediaSearchTool
4
-
5
- from tools.extract_text_from_image import ExtractTextFromImage
6
- from tools.weater_info_tool import WeatherInfoTool
7
 
8
 
9
  class BaseAgent(ABC):
10
- def __init__(self, model_name: str, tools: list[Tool] | None = None, planning_interval: int = 3, max_steps: int = 10, use_all_custom_tools: bool = True):
11
  self.model_name: str = model_name
12
  self.planning_interval = planning_interval
13
  self.max_steps = max_steps
@@ -29,12 +27,7 @@ class BaseAgent(ABC):
29
  if tools is None:
30
  tools = []
31
  if self.use_all_custom_tools:
32
- tools = [
33
- ExtractTextFromImage(),
34
- WeatherInfoTool(),
35
- DuckDuckGoSearchTool(),
36
- WikipediaSearchTool()
37
- ]
38
  return tools
39
 
40
  def add_tool(self, tool: Tool):
@@ -45,18 +38,8 @@ class BaseAgent(ABC):
45
  model=self.get_model(),
46
  tools=[t for t in self.tools],
47
  add_base_tools=True,
48
- verbosity_level=1,
49
- additional_authorized_imports=[
50
- "pandas",
51
- "numpy",
52
- "datetime",
53
- "json",
54
- "re",
55
- "math",
56
- "os",
57
- "requests",
58
- "csv",
59
- "urllib"],
60
  planning_interval=self.planning_interval,
61
  max_steps=self.max_steps
62
  )
 
1
  from abc import abstractmethod, ABC
2
+ from smolagents import CodeAgent, Tool
3
+ from agent.constants import ADDITIONAL_AUTHORIZED_IMPORT
4
+ from tools.tools_utils import ToolsUtils
 
 
5
 
6
 
7
  class BaseAgent(ABC):
8
+ def __init__(self, model_name: str, tools: list[Tool] | None = None, planning_interval: int = 3, max_steps: int = 12, use_all_custom_tools: bool = True):
9
  self.model_name: str = model_name
10
  self.planning_interval = planning_interval
11
  self.max_steps = max_steps
 
27
  if tools is None:
28
  tools = []
29
  if self.use_all_custom_tools:
30
+ tools = ToolsUtils.get_default_tools()
 
 
 
 
 
31
  return tools
32
 
33
  def add_tool(self, tool: Tool):
 
38
  model=self.get_model(),
39
  tools=[t for t in self.tools],
40
  add_base_tools=True,
41
+ verbosity_level=2,
42
+ additional_authorized_imports=ADDITIONAL_AUTHORIZED_IMPORT,
 
 
 
 
 
 
 
 
 
 
43
  planning_interval=self.planning_interval,
44
  max_steps=self.max_steps
45
  )
src/agent/constants.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ADDITIONAL_AUTHORIZED_IMPORT = [
2
+ "pandas",
3
+ "numpy",
4
+ "datetime",
5
+ "json",
6
+ "re",
7
+ "math",
8
+ "os",
9
+ "requests",
10
+ "csv",
11
+ "urllib"
12
+ ]
src/{core → managers}/__init__.py RENAMED
File without changes
src/{core → managers}/evaluator.py RENAMED
File without changes
src/managers/file_manager.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import os
3
+
4
+ class FileManager:
5
+ @staticmethod
6
+ def create_temp_file(content: bytes, suffix: str = ".bin") -> str:
7
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
8
+ temp_file.write(content)
9
+ temp_file.close()
10
+ return temp_file.name
11
+
12
+ @staticmethod
13
+ def create_temp_path(suffix: str = ".bin") -> str:
14
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
15
+ temp_file.close()
16
+ return temp_file.name
17
+
18
+ @staticmethod
19
+ def cleanup_file(file_path: str):
20
+ if os.path.exists(file_path):
21
+ os.remove(file_path)
src/tools/caption_image_tool.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
3
+ from PIL import Image
4
+ import torch
5
+
6
+ class CaptionImageTool(Tool):
7
+ name = "caption_image_tool"
8
+ description = "Caption an image using a free Hugging Face template."
9
+ inputs = {
10
+ "image_path": {
11
+ "type": "string",
12
+ "description": "The path of the local image file to elaborate"
13
+ }
14
+ }
15
+ output_type = "string"
16
+
17
+ def __init__(self):
18
+ super().__init__()
19
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ self.model = "Salesforce/blip-image-captioning-base"
21
+ self.processor = BlipProcessor.from_pretrained(self.model)
22
+ self.model = BlipForConditionalGeneration.from_pretrained(self.model).to(self.device)
23
+
24
+ def forward(self, image_path: str) -> str:
25
+ try:
26
+ image = Image.open(image_path).convert('RGB')
27
+ inputs = self.processor(image, return_tensors="pt").to(self.device)
28
+ out = self.model.generate(**inputs)
29
+ caption = self.processor.decode(out[0], skip_special_tokens=True)
30
+ return "Image caption: " + caption
31
+ except Exception as e:
32
+ return f"Error caption_image is not working properly, error: {e}, please skip this tool"
src/tools/chess_board_recognition_tool.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from transformers import CLIPProcessor, CLIPModel, DetrForObjectDetection, DetrImageProcessor
3
+ from PIL import Image
4
+ import torch
5
+
6
+ class ChessBoardRecognitionTool(Tool):
7
+ name = "chess_board_recognition"
8
+ description = "Recognizes the state of a chess board from an image and returns the position representation."
9
+ inputs = {
10
+ "image_path": {
11
+ "type": "string",
12
+ "description": "The path of the image file to elaborate"
13
+ }
14
+ }
15
+ output_type = "string"
16
+
17
+ def __init__(self):
18
+ super().__init__()
19
+ self.model_name = "aesat/detr-finetuned-chess"
20
+ self.model = DetrForObjectDetection.from_pretrained(self.model_name)
21
+ self.processor = DetrImageProcessor.from_pretrained(self.model_name)
22
+
23
+ def forward(self, image_path: str) -> str:
24
+ try:
25
+ image = Image.open(image_path).convert("RGB")
26
+ inputs = self.processor(images=image, return_tensors="pt")
27
+ with torch.no_grad():
28
+ outputs = self.model(**inputs)
29
+
30
+ target_sizes = torch.tensor([image.size[::-1]])
31
+ results = self.processor.post_process_object_detection(
32
+ outputs, target_sizes=target_sizes, threshold=0.9
33
+ )[0]
34
+
35
+ result_str = "Chess board description:\n"
36
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
37
+ box = [round(i, 2) for i in box.tolist()]
38
+ result_str += f"Label: {label}, Confidence: {round(score.item(), 3)}, Box: {box}\n"
39
+ return result_str
40
+ except Exception as e:
41
+ return f"Error chess_board_recognition is not working properly, error: {e}, please skip this tool"
src/tools/convert_audio_to_text_tool.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from transformers import pipeline
3
+
4
+ class ConvertAudioToTextTool(Tool):
5
+ name = "convert_audio_to_text"
6
+ description = "Transcribe an audio file to text using a free Hugging Face template."
7
+ inputs = {
8
+ "audio_path": {
9
+ "type": "string",
10
+ "description": "The path of the audio file to elaborate"
11
+ }
12
+ }
13
+ output_type = "string"
14
+
15
+ def __init__(self):
16
+ super().__init__()
17
+ self.model = "openai/whisper-small"
18
+ self.transcriber = pipeline(
19
+ "automatic-speech-recognition",
20
+ model=self.model,
21
+ return_timestamps=True
22
+ )
23
+
24
+ def forward(self, audio_path: str) -> str:
25
+ try:
26
+ result = self.transcriber(audio_path)
27
+ return f"Audio transcribed: {result['text']}"
28
+ except Exception as e:
29
+ return f"Error convert_audio_to_text is not working properly error: {e}, please skip this tool."
src/tools/convert_image_to_text_tool.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from transformers import pipeline
3
+
4
+ class ConvertImageToTextTool(Tool):
5
+ name = "convert_image_to_text"
6
+ description = "Transcribe an image file to text using a free Hugging Face template."
7
+ inputs = {
8
+ "image_path": {
9
+ "type": "string",
10
+ "description": "The path of the image file to elaborate"
11
+ }
12
+ }
13
+ output_type = "string"
14
+
15
+ def __init__(self):
16
+ super().__init__()
17
+ self.model = "nlpconnect/vit-gpt2-image-captioning"
18
+ self.transcriber = pipeline(
19
+ "image-to-text",
20
+ model=self.model,
21
+ use_fast=True
22
+ )
23
+
24
+ def forward(self, image_path: str) -> str:
25
+ try:
26
+ result = self.transcriber(image_path)
27
+ return f"Image description: {result[0]['generated_text']}"
28
+ except Exception as e:
29
+ return f"Error convert_image_to_text is not working properly, error: {e}, please skip this tool"
src/tools/extract_text_from_image.py DELETED
@@ -1,43 +0,0 @@
1
- import os
2
- import easyocr
3
- from smolagents import Tool
4
-
5
-
6
- class ExtractTextFromImage(Tool):
7
- name = "extract_text_from_image"
8
- description = "A tool for extracting text from an image using the EasyOCR library."
9
- inputs = {
10
- "image_path": {
11
- "type": "string",
12
- "description": "The file path to the image to be processed."
13
- }
14
- }
15
- output_type = "string"
16
-
17
- def forward(self, image_path: str) -> str:
18
- """
19
- Extract text from an image file using EasyOCR.
20
-
21
- Args:
22
- image_path (str): The path to the image file to be processed.
23
-
24
- Returns:
25
- str: The extracted text from the image or an error message.
26
- """
27
- try:
28
- if not os.path.exists(image_path):
29
- return f"Error: File '{image_path}' does not exist."
30
-
31
- reader = easyocr.Reader(['en'], gpu=False) # Use GPU=True for faster execution if available
32
-
33
- results = reader.readtext(image_path, detail=1)
34
-
35
- if not results:
36
- return "No text detected in the image."
37
- extracted_texts = [result[1] for result in results] # Extract the text field from results
38
- extracted_text = "\n".join(extracted_texts)
39
- return f"Extracted text from image:\n\n{extracted_text}"
40
- except ImportError:
41
- return "Error: easyocr is not installed. Please install it with 'pip install easyocr'."
42
- except Exception as e:
43
- return f"Error extracting text from image: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/tools/fetch_url_content_tool.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from smolagents import Tool
4
+ from managers.file_manager import FileManager
5
+
6
+
7
+ class FetchURLContentTool(Tool):
8
+ name = "fetch_url_content"
9
+ description = "Downloads the content or file at the given url and returns the local path of the downloaded file."
10
+ inputs = {
11
+ "url": {
12
+ "type": "string",
13
+ "description": "The url of the content or file to download."
14
+ }
15
+ }
16
+ output_type = "string"
17
+
18
+ def forward(self, url: str) -> str:
19
+ try:
20
+ headers = {
21
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
22
+ }
23
+ response = requests.get(url, headers=headers)
24
+ response.raise_for_status()
25
+ suffix = os.path.splitext(url)[-1] or '.bin'
26
+ return "The path of the downloaded file is: " + FileManager.create_temp_file(response.content, suffix)
27
+ except Exception as e:
28
+ return f"Error fetch_url_content is not working properly, error: {e}, please skip this tool"
29
+
src/tools/tools_utils.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import DuckDuckGoSearchTool, WikipediaSearchTool, PythonInterpreterTool
2
+
3
+ from tools.convert_audio_to_text_tool import ConvertAudioToTextTool
4
+ from tools.convert_image_to_text_tool import ConvertImageToTextTool
5
+ from tools.fetch_url_content_tool import FetchURLContentTool
6
+
7
+
8
+ class ToolsUtils:
9
+
10
+ @staticmethod
11
+ def get_default_tools():
12
+ return [
13
+ FetchURLContentTool(),
14
+ ConvertAudioToTextTool(),
15
+ # ConvertImageToTextTool(),
16
+ DuckDuckGoSearchTool(),
17
+ WikipediaSearchTool(),
18
+ PythonInterpreterTool()
19
+ ]
20
+
src/ui/App.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from src.core.evaluator import Evaluator
3
 
4
  class App:
5
  def __init__(self):
 
1
  import gradio as gr
2
+ from managers.evaluator import Evaluator
3
 
4
  class App:
5
  def __init__(self):
{src/models → tests}/__init__.py RENAMED
File without changes
tests/tools_integration_test.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from managers.file_manager import FileManager
4
+ from tools.chess_board_recognition_tool import ChessBoardRecognitionTool
5
+ from tools.convert_audio_to_text_tool import ConvertAudioToTextTool
6
+ from tools.convert_image_to_text_tool import ConvertImageToTextTool
7
+ from tools.fetch_url_content_tool import FetchURLContentTool
8
+
9
+
10
+ def test_fetch_url():
11
+ print("Test FetchURLContentTool...")
12
+ tool = FetchURLContentTool()
13
+ url = "https://upload.wikimedia.org/wikipedia/commons/3/3c/Shaki_waterfall.jpg" # immagine piccola
14
+ path = tool.forward(url)
15
+ print(f"Downloaded in: {path}")
16
+ FileManager.cleanup_file(path)
17
+
18
+ def test_transcribe_audio():
19
+ print("Test TranscribeAudioTool...")
20
+ tool = ConvertAudioToTextTool()
21
+ sample_audio = "data/sample_audio.mp3"
22
+ if not os.path.exists(sample_audio):
23
+ print("File not found: data/sample_audio.mp3")
24
+ return
25
+ text = tool.forward(sample_audio)
26
+ print(f"Result:\n{text}")
27
+
28
+ def test_transcribe_image():
29
+ print("Test TranscribeImageTool...")
30
+ tool = ConvertImageToTextTool()
31
+ sample_audio = "data/sample_image.jpg"
32
+ if not os.path.exists(sample_audio):
33
+ print("File not found: data/sample_image.jpg")
34
+ return
35
+ text = tool.forward(sample_audio)
36
+ print(f"Result:\n{text}")
37
+
38
+ def test_chess_board_recognition_image():
39
+ print("Test CaptionImageTool...")
40
+ tool = ChessBoardRecognitionTool()
41
+ sample_image = "data/sample_image.jpg"
42
+ if not os.path.exists(sample_image):
43
+ print("File not found: data/sample_image.jpg")
44
+ return
45
+ caption = tool.forward(sample_image)
46
+ print(f"Result:\n{caption}")
47
+
48
+ def run_all_tests():
49
+ print("\n--- START TEST ---\n")
50
+ test_fetch_url()
51
+ test_transcribe_audio()
52
+ test_transcribe_image()
53
+ # test_chess_board_recognition_image()
54
+ print("\n--- ALL TESTS COMPLETED ---\n")
55
+
56
+ if __name__ == "__main__":
57
+ run_all_tests()