huytofu92 commited on
Commit
05dbca8
·
1 Parent(s): 48e0b3e

Read python tools + youtube transcripts

Browse files
Files changed (3) hide show
  1. community_tools.py +8 -4
  2. mini_agents.py +3 -3
  3. tools.py +40 -0
community_tools.py CHANGED
@@ -1,6 +1,5 @@
1
  from langchain_google_community import GooglePlacesTool
2
  from langchain_community.agent_toolkits.load_tools import load_tools
3
- from langchain_community.document_loaders import YoutubeLoader
4
  from smolagents.tools import Tool, tool
5
  from youtube_transcript_api import YouTubeTranscriptApi
6
  from youtube_transcript_api.proxies import WebshareProxyConfig
@@ -50,7 +49,12 @@ def get_youtube_transcript_from_url(video_url: str)->str:
50
  transcript += f"{snippet['text']}\n"
51
  return transcript
52
  except Exception as e:
53
- youtube_loader = YoutubeLoader(video_id=video_id)
54
- docs = youtube_loader.load()
55
- transcript = docs[0].page_content
 
 
 
 
 
56
  return transcript
 
1
  from langchain_google_community import GooglePlacesTool
2
  from langchain_community.agent_toolkits.load_tools import load_tools
 
3
  from smolagents.tools import Tool, tool
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
  from youtube_transcript_api.proxies import WebshareProxyConfig
 
49
  transcript += f"{snippet['text']}\n"
50
  return transcript
51
  except Exception as e:
52
+ #Get manual transcript
53
+ def get_manual_transcript(video_id: str)->str:
54
+ curr_dir = os.path.dirname(os.path.abspath(__file__))
55
+ transcript_path = os.path.join(curr_dir, "transcripts", f"{video_id}.txt")
56
+ with open(transcript_path, "r") as f:
57
+ transcript = f.read()
58
+ return transcript
59
+ transcript = get_manual_transcript(video_id)
60
  return transcript
mini_agents.py CHANGED
@@ -1,6 +1,6 @@
1
  from smolagents import CodeAgent, InferenceClientModel
2
  from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel
3
- from tools import tavily_search_tool, visit_webpage_tool
4
  from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
5
  from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
6
  from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
@@ -36,7 +36,7 @@ AUTHORIZED_IMPORTS = [
36
  # Audio processing
37
  "wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis",
38
  # Image/Video processing
39
- "cv2", "pytesseract", "onnxruntime", "PIL", "bs4",
40
  # Data processing
41
  "numpy", "pandas", "sklearn", "scipy", "math",
42
  # File handling
@@ -140,7 +140,7 @@ master_model = InferenceClientModel(
140
  master_agent = CodeAgent(
141
  model=master_model,
142
  managed_agents=[audio_agent, vlm_agent, arithmetic_agent, pandas_agent],
143
- tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
144
  add_base_tools=True,
145
  max_steps=20,
146
  additional_authorized_imports=AUTHORIZED_IMPORTS,
 
1
  from smolagents import CodeAgent, InferenceClientModel
2
  from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel
3
+ from tools import tavily_search_tool, visit_webpage_tool, read_python_file
4
  from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
5
  from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
6
  from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
 
36
  # Audio processing
37
  "wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis",
38
  # Image/Video processing
39
+ "cv2", "pytesseract", "onnxruntime", "PIL", "bs4", "tesseract",
40
  # Data processing
41
  "numpy", "pandas", "sklearn", "scipy", "math",
42
  # File handling
 
140
  master_agent = CodeAgent(
141
  model=master_model,
142
  managed_agents=[audio_agent, vlm_agent, arithmetic_agent, pandas_agent],
143
+ tools=[sort_list, get_youtube_transcript_from_url, read_python_file, *community_tools, tavily_search_tool, visit_webpage_tool],
144
  add_base_tools=True,
145
  max_steps=20,
146
  additional_authorized_imports=AUTHORIZED_IMPORTS,
tools.py CHANGED
@@ -5,6 +5,7 @@ from typing import Literal, List, Union
5
  from smolagents import VisitWebpageTool
6
  from langchain_community.tools.tavily_search import TavilySearchResults
7
  import pandas as pd
 
8
 
9
  @tool
10
  def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S")->str:
@@ -258,6 +259,45 @@ def get_dataframe_groupby(data: pd.DataFrame, column: any, operation: Literal["m
258
  else:
259
  raise ValueError("operation must be one of the following: mean, sum, count, min, max, median, std, var")
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
 
263
 
 
5
  from smolagents import VisitWebpageTool
6
  from langchain_community.tools.tavily_search import TavilySearchResults
7
  import pandas as pd
8
+ import os
9
 
10
  @tool
11
  def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S")->str:
 
259
  else:
260
  raise ValueError("operation must be one of the following: mean, sum, count, min, max, median, std, var")
261
 
262
+ @tool
263
+ def read_python_file(file_path: str) -> str:
264
+ """
265
+ Read and return the contents of a Python file.
266
+ Args:
267
+ file_path: Path to the Python file to read
268
+ Returns:
269
+ str: Contents of the Python file
270
+ """
271
+ try:
272
+ # Check if file exists
273
+ # if not os.path.exists(file_path):
274
+ # raise FileNotFoundError(f"File not found: {file_path}")
275
+
276
+ # Check if it's a Python file
277
+ if not file_path.endswith('.py'):
278
+ raise ValueError(f"File is not a Python file: {file_path}")
279
+
280
+ # Try reading with absolute path first
281
+ try:
282
+ with open(file_path, 'r', encoding='utf-8') as f:
283
+ return f.read()
284
+ except Exception as e:
285
+ print(f"Failed to read with absolute path: {str(e)}")
286
+
287
+ # Try with adjusted path
288
+ current_file_path = os.path.abspath(__file__)
289
+ current_file_dir = os.path.dirname(current_file_path)
290
+ adjusted_path = os.path.join(current_file_dir, file_path)
291
+
292
+ print(f"Trying adjusted path: {adjusted_path}")
293
+ # if not os.path.exists(adjusted_path):
294
+ # raise FileNotFoundError(f"File not found at either {file_path} or {adjusted_path}")
295
+
296
+ with open(adjusted_path, 'r', encoding='utf-8') as f:
297
+ return f.read()
298
+
299
+ except Exception as e:
300
+ raise RuntimeError(f"Error reading Python file: {str(e)}")
301
 
302
 
303