Final_Assignment_Template

Running

App Files Files Community

tommaso1288 commited on Apr 27

Commit

6220346

1 Parent(s): 37c9a6b

Added some tools

Browse files

Files changed (17) hide show

.gitignore +4 -0
requirements.txt +5 -1
src/agent/base_agent.py +7 -24
src/agent/constants.py +12 -0
src/{core → managers}/__init__.py +0 -0
src/{core → managers}/evaluator.py +0 -0
src/managers/file_manager.py +21 -0
src/tools/caption_image_tool.py +32 -0
src/tools/chess_board_recognition_tool.py +41 -0
src/tools/convert_audio_to_text_tool.py +29 -0
src/tools/convert_image_to_text_tool.py +29 -0
src/tools/extract_text_from_image.py +0 -43
src/tools/fetch_url_content_tool.py +29 -0
src/tools/tools_utils.py +20 -0
src/ui/App.py +1 -1
{src/models → tests}/__init__.py +0 -0
tests/tools_integration_test.py +57 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.venv
+.idea
+.env
+/tests/data/

requirements.txt CHANGED Viewed

@@ -7,4 +7,8 @@ pandas~=2.2.3
 openpyxl~=3.1.5
 litellm~=1.66.1
 easyocr~=1.7.2
-wikipedia-api

 openpyxl~=3.1.5
 litellm~=1.66.1
 easyocr~=1.7.2
+wikipedia-api
+transformers~=4.51.3
+torch~=2.7.0
+pillow~=11.1.0
+pytesseract~=0.3.13

src/agent/base_agent.py CHANGED Viewed

@@ -1,13 +1,11 @@
 from abc import abstractmethod, ABC
-from smolagents import CodeAgent, Tool, DuckDuckGoSearchTool, WikipediaSearchTool
-from tools.extract_text_from_image import ExtractTextFromImage
-from tools.weater_info_tool import WeatherInfoTool
 class BaseAgent(ABC):
-    def __init__(self, model_name: str, tools: list[Tool] | None = None, planning_interval: int = 3, max_steps: int = 10, use_all_custom_tools: bool = True):
         self.model_name: str = model_name
         self.planning_interval = planning_interval
         self.max_steps = max_steps
@@ -29,12 +27,7 @@ class BaseAgent(ABC):
         if tools is None:
             tools = []
         if self.use_all_custom_tools:
-            tools = [
-                ExtractTextFromImage(),
-                WeatherInfoTool(),
-                DuckDuckGoSearchTool(),
-                WikipediaSearchTool()
-            ]
         return tools
     def add_tool(self, tool: Tool):
@@ -45,18 +38,8 @@ class BaseAgent(ABC):
             model=self.get_model(),
             tools=[t for t in self.tools],
             add_base_tools=True,
-            verbosity_level=1,
-            additional_authorized_imports=[
-                "pandas",
-                "numpy",
-                "datetime",
-                "json",
-                "re",
-                "math",
-                "os",
-                "requests",
-                "csv",
-                "urllib"],
             planning_interval=self.planning_interval,
             max_steps=self.max_steps
         )

 from abc import abstractmethod, ABC
+from smolagents import CodeAgent, Tool
+from agent.constants import ADDITIONAL_AUTHORIZED_IMPORT
+from tools.tools_utils import ToolsUtils
 class BaseAgent(ABC):
+    def __init__(self, model_name: str, tools: list[Tool] | None = None, planning_interval: int = 3, max_steps: int = 12, use_all_custom_tools: bool = True):
         self.model_name: str = model_name
         self.planning_interval = planning_interval
         self.max_steps = max_steps
         if tools is None:
             tools = []
         if self.use_all_custom_tools:
+            tools = ToolsUtils.get_default_tools()
         return tools
     def add_tool(self, tool: Tool):
             model=self.get_model(),
             tools=[t for t in self.tools],
             add_base_tools=True,
+            verbosity_level=2,
+            additional_authorized_imports=ADDITIONAL_AUTHORIZED_IMPORT,
             planning_interval=self.planning_interval,
             max_steps=self.max_steps
         )

src/agent/constants.py ADDED Viewed

	@@ -0,0 +1,12 @@

+ADDITIONAL_AUTHORIZED_IMPORT = [
+    "pandas",
+    "numpy",
+    "datetime",
+    "json",
+    "re",
+    "math",
+    "os",
+    "requests",
+    "csv",
+    "urllib"
+]

src/{core → managers}/__init__.py RENAMED Viewed

File without changes

src/{core → managers}/evaluator.py RENAMED Viewed

File without changes

src/managers/file_manager.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import tempfile
+import os
+class FileManager:
+    @staticmethod
+    def create_temp_file(content: bytes, suffix: str = ".bin") -> str:
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+        temp_file.write(content)
+        temp_file.close()
+        return temp_file.name
+    @staticmethod
+    def create_temp_path(suffix: str = ".bin") -> str:
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+        temp_file.close()
+        return temp_file.name
+    @staticmethod
+    def cleanup_file(file_path: str):
+        if os.path.exists(file_path):
+            os.remove(file_path)

src/tools/caption_image_tool.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from smolagents import Tool
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from PIL import Image
+import torch
+class CaptionImageTool(Tool):
+    name = "caption_image_tool"
+    description = "Caption an image using a free Hugging Face template."
+    inputs = {
+        "image_path": {
+            "type": "string",
+            "description": "The path of the local image file to elaborate"
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = "Salesforce/blip-image-captioning-base"
+        self.processor = BlipProcessor.from_pretrained(self.model)
+        self.model = BlipForConditionalGeneration.from_pretrained(self.model).to(self.device)
+    def forward(self, image_path: str) -> str:
+        try:
+            image = Image.open(image_path).convert('RGB')
+            inputs = self.processor(image, return_tensors="pt").to(self.device)
+            out = self.model.generate(**inputs)
+            caption = self.processor.decode(out[0], skip_special_tokens=True)
+            return "Image caption: " + caption
+        except Exception as e:
+            return f"Error caption_image is not working properly, error: {e}, please skip this tool"

src/tools/chess_board_recognition_tool.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from smolagents import Tool
+from transformers import CLIPProcessor, CLIPModel, DetrForObjectDetection, DetrImageProcessor
+from PIL import Image
+import torch
+class ChessBoardRecognitionTool(Tool):
+    name = "chess_board_recognition"
+    description = "Recognizes the state of a chess board from an image and returns the position representation."
+    inputs = {
+        "image_path": {
+            "type": "string",
+            "description": "The path of the image file to elaborate"
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.model_name = "aesat/detr-finetuned-chess"
+        self.model = DetrForObjectDetection.from_pretrained(self.model_name)
+        self.processor = DetrImageProcessor.from_pretrained(self.model_name)
+    def forward(self, image_path: str) -> str:
+        try:
+            image = Image.open(image_path).convert("RGB")
+            inputs = self.processor(images=image, return_tensors="pt")
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            target_sizes = torch.tensor([image.size[::-1]])
+            results = self.processor.post_process_object_detection(
+                outputs, target_sizes=target_sizes, threshold=0.9
+            )[0]
+            result_str = "Chess board description:\n"
+            for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+                box = [round(i, 2) for i in box.tolist()]
+                result_str += f"Label: {label}, Confidence: {round(score.item(), 3)}, Box: {box}\n"
+            return result_str
+        except Exception as e:
+            return f"Error chess_board_recognition is not working properly, error: {e}, please skip this tool"

src/tools/convert_audio_to_text_tool.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from smolagents import Tool
+from transformers import pipeline
+class ConvertAudioToTextTool(Tool):
+    name = "convert_audio_to_text"
+    description = "Transcribe an audio file to text using a free Hugging Face template."
+    inputs = {
+        "audio_path": {
+            "type": "string",
+            "description": "The path of the audio file to elaborate"
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.model = "openai/whisper-small"
+        self.transcriber = pipeline(
+            "automatic-speech-recognition",
+            model=self.model,
+            return_timestamps=True
+        )
+    def forward(self, audio_path: str) -> str:
+        try:
+            result = self.transcriber(audio_path)
+            return f"Audio transcribed: {result['text']}"
+        except Exception as e:
+            return f"Error convert_audio_to_text is not working properly error: {e}, please skip this tool."

src/tools/convert_image_to_text_tool.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from smolagents import Tool
+from transformers import pipeline
+class ConvertImageToTextTool(Tool):
+    name = "convert_image_to_text"
+    description = "Transcribe an image file to text using a free Hugging Face template."
+    inputs = {
+        "image_path": {
+            "type": "string",
+            "description": "The path of the image file to elaborate"
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.model = "nlpconnect/vit-gpt2-image-captioning"
+        self.transcriber = pipeline(
+            "image-to-text",
+            model=self.model,
+            use_fast=True
+        )
+    def forward(self, image_path: str) -> str:
+        try:
+            result = self.transcriber(image_path)
+            return f"Image description: {result[0]['generated_text']}"
+        except Exception as e:
+            return f"Error convert_image_to_text is not working properly, error: {e}, please skip this tool"

src/tools/extract_text_from_image.py DELETED Viewed

@@ -1,43 +0,0 @@
-import os
-import easyocr
-from smolagents import Tool
-class ExtractTextFromImage(Tool):
-    name = "extract_text_from_image"
-    description = "A tool for extracting text from an image using the EasyOCR library."
-    inputs = {
-        "image_path": {
-            "type": "string",
-            "description": "The file path to the image to be processed."
-        }
-    }
-    output_type = "string"
-    def forward(self, image_path: str) -> str:
-        """
-        Extract text from an image file using EasyOCR.
-        Args:
-            image_path (str): The path to the image file to be processed.
-        Returns:
-            str: The extracted text from the image or an error message.
-        """
-        try:
-            if not os.path.exists(image_path):
-                return f"Error: File '{image_path}' does not exist."
-            reader = easyocr.Reader(['en'], gpu=False)  # Use GPU=True for faster execution if available
-            results = reader.readtext(image_path, detail=1)
-            if not results:
-                return "No text detected in the image."
-            extracted_texts = [result[1] for result in results]  # Extract the text field from results
-            extracted_text = "\n".join(extracted_texts)
-            return f"Extracted text from image:\n\n{extracted_text}"
-        except ImportError:
-            return "Error: easyocr is not installed. Please install it with 'pip install easyocr'."
-        except Exception as e:
-            return f"Error extracting text from image: {str(e)}"

src/tools/fetch_url_content_tool.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+import requests
+from smolagents import Tool
+from managers.file_manager import FileManager
+class FetchURLContentTool(Tool):
+    name = "fetch_url_content"
+    description = "Downloads the content or file at the given url and returns the local path of the downloaded file."
+    inputs = {
+        "url": {
+            "type": "string",
+            "description": "The url of the content or file to download."
+        }
+    }
+    output_type = "string"
+    def forward(self, url: str) -> str:
+        try:
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+            suffix = os.path.splitext(url)[-1] or '.bin'
+            return "The path of the downloaded file is: " + FileManager.create_temp_file(response.content, suffix)
+        except Exception as e:
+            return f"Error fetch_url_content is not working properly, error: {e}, please skip this tool"

src/tools/tools_utils.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from smolagents import DuckDuckGoSearchTool, WikipediaSearchTool, PythonInterpreterTool
+from tools.convert_audio_to_text_tool import ConvertAudioToTextTool
+from tools.convert_image_to_text_tool import ConvertImageToTextTool
+from tools.fetch_url_content_tool import FetchURLContentTool
+class ToolsUtils:
+    @staticmethod
+    def get_default_tools():
+        return [
+            FetchURLContentTool(),
+            ConvertAudioToTextTool(),
+            # ConvertImageToTextTool(),
+            DuckDuckGoSearchTool(),
+            WikipediaSearchTool(),
+            PythonInterpreterTool()
+        ]

src/ui/App.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from src.core.evaluator import Evaluator
 class App:
     def __init__(self):

 import gradio as gr
+from managers.evaluator import Evaluator
 class App:
     def __init__(self):

{src/models → tests}/__init__.py RENAMED Viewed

File without changes

tests/tools_integration_test.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+from managers.file_manager import FileManager
+from tools.chess_board_recognition_tool import ChessBoardRecognitionTool
+from tools.convert_audio_to_text_tool import ConvertAudioToTextTool
+from tools.convert_image_to_text_tool import ConvertImageToTextTool
+from tools.fetch_url_content_tool import FetchURLContentTool
+def test_fetch_url():
+    print("Test FetchURLContentTool...")
+    tool = FetchURLContentTool()
+    url = "https://upload.wikimedia.org/wikipedia/commons/3/3c/Shaki_waterfall.jpg"  # immagine piccola
+    path = tool.forward(url)
+    print(f"Downloaded in: {path}")
+    FileManager.cleanup_file(path)
+def test_transcribe_audio():
+    print("Test TranscribeAudioTool...")
+    tool = ConvertAudioToTextTool()
+    sample_audio = "data/sample_audio.mp3"
+    if not os.path.exists(sample_audio):
+        print("File not found: data/sample_audio.mp3")
+        return
+    text = tool.forward(sample_audio)
+    print(f"Result:\n{text}")
+def test_transcribe_image():
+    print("Test TranscribeImageTool...")
+    tool = ConvertImageToTextTool()
+    sample_audio = "data/sample_image.jpg"
+    if not os.path.exists(sample_audio):
+        print("File not found: data/sample_image.jpg")
+        return
+    text = tool.forward(sample_audio)
+    print(f"Result:\n{text}")
+def test_chess_board_recognition_image():
+    print("Test CaptionImageTool...")
+    tool = ChessBoardRecognitionTool()
+    sample_image = "data/sample_image.jpg"
+    if not os.path.exists(sample_image):
+        print("File not found: data/sample_image.jpg")
+        return
+    caption = tool.forward(sample_image)
+    print(f"Result:\n{caption}")
+def run_all_tests():
+    print("\n--- START TEST ---\n")
+    test_fetch_url()
+    test_transcribe_audio()
+    test_transcribe_image()
+    # test_chess_board_recognition_image()
+    print("\n--- ALL TESTS COMPLETED ---\n")
+if __name__ == "__main__":
+    run_all_tests()