Final_Assignment_Template

Sleeping

App Files Files Community

huytofu92 commited on May 20, 2025

Commit

05dbca8

1 Parent(s): 48e0b3e

Read python tools + youtube transcripts

Browse files

Files changed (3) hide show

community_tools.py +8 -4
mini_agents.py +3 -3
tools.py +40 -0

community_tools.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from langchain_google_community import GooglePlacesTool
 from langchain_community.agent_toolkits.load_tools import load_tools
-from langchain_community.document_loaders import YoutubeLoader
 from smolagents.tools import Tool, tool
 from youtube_transcript_api import YouTubeTranscriptApi
 from youtube_transcript_api.proxies import WebshareProxyConfig
@@ -50,7 +49,12 @@ def get_youtube_transcript_from_url(video_url: str)->str:
             transcript += f"{snippet['text']}\n"
         return transcript
     except Exception as e:
-        youtube_loader = YoutubeLoader(video_id=video_id)
-        docs = youtube_loader.load()
-        transcript = docs[0].page_content
         return transcript

 from langchain_google_community import GooglePlacesTool
 from langchain_community.agent_toolkits.load_tools import load_tools
 from smolagents.tools import Tool, tool
 from youtube_transcript_api import YouTubeTranscriptApi
 from youtube_transcript_api.proxies import WebshareProxyConfig
             transcript += f"{snippet['text']}\n"
         return transcript
     except Exception as e:
+        #Get manual transcript
+        def get_manual_transcript(video_id: str)->str:
+            curr_dir = os.path.dirname(os.path.abspath(__file__))
+            transcript_path = os.path.join(curr_dir, "transcripts", f"{video_id}.txt")
+            with open(transcript_path, "r") as f:
+                transcript = f.read()
+            return transcript
+        transcript = get_manual_transcript(video_id)
         return transcript

mini_agents.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from smolagents import CodeAgent, InferenceClientModel
 from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel
-from tools import tavily_search_tool, visit_webpage_tool
 from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
 from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
 from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
@@ -36,7 +36,7 @@ AUTHORIZED_IMPORTS = [
     # Audio processing
     "wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis",
     # Image/Video processing
-    "cv2", "pytesseract", "onnxruntime", "PIL", "bs4",
     # Data processing
     "numpy", "pandas", "sklearn", "scipy", "math",
     # File handling
@@ -140,7 +140,7 @@ master_model = InferenceClientModel(
 master_agent = CodeAgent(
     model=master_model,
     managed_agents=[audio_agent, vlm_agent, arithmetic_agent, pandas_agent],
-    tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
     add_base_tools=True,
     max_steps=20,
     additional_authorized_imports=AUTHORIZED_IMPORTS,

 from smolagents import CodeAgent, InferenceClientModel
 from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel
+from tools import tavily_search_tool, visit_webpage_tool, read_python_file
 from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
 from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
 from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
     # Audio processing
     "wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis",
     # Image/Video processing
+    "cv2", "pytesseract", "onnxruntime", "PIL", "bs4", "tesseract",
     # Data processing
     "numpy", "pandas", "sklearn", "scipy", "math",
     # File handling
 master_agent = CodeAgent(
     model=master_model,
     managed_agents=[audio_agent, vlm_agent, arithmetic_agent, pandas_agent],
+    tools=[sort_list, get_youtube_transcript_from_url, read_python_file, *community_tools, tavily_search_tool, visit_webpage_tool],
     add_base_tools=True,
     max_steps=20,
     additional_authorized_imports=AUTHORIZED_IMPORTS,

tools.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Literal, List, Union
 from smolagents import VisitWebpageTool
 from langchain_community.tools.tavily_search import TavilySearchResults
 import pandas as pd
 @tool
 def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S")->str:
@@ -258,6 +259,45 @@ def get_dataframe_groupby(data: pd.DataFrame, column: any, operation: Literal["m
     else:
         raise ValueError("operation must be one of the following: mean, sum, count, min, max, median, std, var")

 from smolagents import VisitWebpageTool
 from langchain_community.tools.tavily_search import TavilySearchResults
 import pandas as pd
+import os
 @tool
 def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S")->str:
     else:
         raise ValueError("operation must be one of the following: mean, sum, count, min, max, median, std, var")
+@tool
+def read_python_file(file_path: str) -> str:
+    """
+    Read and return the contents of a Python file.
+    Args:
+        file_path: Path to the Python file to read
+    Returns:
+        str: Contents of the Python file
+    """
+    try:
+        # Check if file exists
+        # if not os.path.exists(file_path):
+        #     raise FileNotFoundError(f"File not found: {file_path}")
+        # Check if it's a Python file
+        if not file_path.endswith('.py'):
+            raise ValueError(f"File is not a Python file: {file_path}")
+        # Try reading with absolute path first
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        except Exception as e:
+            print(f"Failed to read with absolute path: {str(e)}")
+            # Try with adjusted path
+            current_file_path = os.path.abspath(__file__)
+            current_file_dir = os.path.dirname(current_file_path)
+            adjusted_path = os.path.join(current_file_dir, file_path)
+            print(f"Trying adjusted path: {adjusted_path}")
+            # if not os.path.exists(adjusted_path):
+            #     raise FileNotFoundError(f"File not found at either {file_path} or {adjusted_path}")
+            with open(adjusted_path, 'r', encoding='utf-8') as f:
+                return f.read()
+    except Exception as e:
+        raise RuntimeError(f"Error reading Python file: {str(e)}")