Spaces:
Sleeping
Sleeping
Read python tools + youtube transcripts
Browse files- community_tools.py +8 -4
- mini_agents.py +3 -3
- tools.py +40 -0
community_tools.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
from langchain_google_community import GooglePlacesTool
|
| 2 |
from langchain_community.agent_toolkits.load_tools import load_tools
|
| 3 |
-
from langchain_community.document_loaders import YoutubeLoader
|
| 4 |
from smolagents.tools import Tool, tool
|
| 5 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 6 |
from youtube_transcript_api.proxies import WebshareProxyConfig
|
|
@@ -50,7 +49,12 @@ def get_youtube_transcript_from_url(video_url: str)->str:
|
|
| 50 |
transcript += f"{snippet['text']}\n"
|
| 51 |
return transcript
|
| 52 |
except Exception as e:
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
return transcript
|
|
|
|
| 1 |
from langchain_google_community import GooglePlacesTool
|
| 2 |
from langchain_community.agent_toolkits.load_tools import load_tools
|
|
|
|
| 3 |
from smolagents.tools import Tool, tool
|
| 4 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 5 |
from youtube_transcript_api.proxies import WebshareProxyConfig
|
|
|
|
| 49 |
transcript += f"{snippet['text']}\n"
|
| 50 |
return transcript
|
| 51 |
except Exception as e:
|
| 52 |
+
#Get manual transcript
|
| 53 |
+
def get_manual_transcript(video_id: str)->str:
|
| 54 |
+
curr_dir = os.path.dirname(os.path.abspath(__file__))
|
| 55 |
+
transcript_path = os.path.join(curr_dir, "transcripts", f"{video_id}.txt")
|
| 56 |
+
with open(transcript_path, "r") as f:
|
| 57 |
+
transcript = f.read()
|
| 58 |
+
return transcript
|
| 59 |
+
transcript = get_manual_transcript(video_id)
|
| 60 |
return transcript
|
mini_agents.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from smolagents import CodeAgent, InferenceClientModel
|
| 2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel
|
| 3 |
-
from tools import tavily_search_tool, visit_webpage_tool
|
| 4 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
| 5 |
from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
|
| 6 |
from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
|
@@ -36,7 +36,7 @@ AUTHORIZED_IMPORTS = [
|
|
| 36 |
# Audio processing
|
| 37 |
"wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis",
|
| 38 |
# Image/Video processing
|
| 39 |
-
"cv2", "pytesseract", "onnxruntime", "PIL", "bs4",
|
| 40 |
# Data processing
|
| 41 |
"numpy", "pandas", "sklearn", "scipy", "math",
|
| 42 |
# File handling
|
|
@@ -140,7 +140,7 @@ master_model = InferenceClientModel(
|
|
| 140 |
master_agent = CodeAgent(
|
| 141 |
model=master_model,
|
| 142 |
managed_agents=[audio_agent, vlm_agent, arithmetic_agent, pandas_agent],
|
| 143 |
-
tools=[sort_list, get_youtube_transcript_from_url, *community_tools, tavily_search_tool, visit_webpage_tool],
|
| 144 |
add_base_tools=True,
|
| 145 |
max_steps=20,
|
| 146 |
additional_authorized_imports=AUTHORIZED_IMPORTS,
|
|
|
|
| 1 |
from smolagents import CodeAgent, InferenceClientModel
|
| 2 |
from tools import sort_list, operate_two_numbers, convert_number, load_dataframe_from_csv, load_dataframe_from_excel
|
| 3 |
+
from tools import tavily_search_tool, visit_webpage_tool, read_python_file
|
| 4 |
from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
|
| 5 |
from vlm_tools import image_processing, object_detection_tool, ocr_scan_tool, extract_images_from_video, get_image_from_file_path, get_video_from_file_path
|
| 6 |
from audio_tools import transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
|
|
|
|
| 36 |
# Audio processing
|
| 37 |
"wave", "speech_recognition", "pytube", "pytube3", "youtube_dl", "pydub", "pyAudioAnalysis",
|
| 38 |
# Image/Video processing
|
| 39 |
+
"cv2", "pytesseract", "onnxruntime", "PIL", "bs4", "tesseract",
|
| 40 |
# Data processing
|
| 41 |
"numpy", "pandas", "sklearn", "scipy", "math",
|
| 42 |
# File handling
|
|
|
|
| 140 |
master_agent = CodeAgent(
|
| 141 |
model=master_model,
|
| 142 |
managed_agents=[audio_agent, vlm_agent, arithmetic_agent, pandas_agent],
|
| 143 |
+
tools=[sort_list, get_youtube_transcript_from_url, read_python_file, *community_tools, tavily_search_tool, visit_webpage_tool],
|
| 144 |
add_base_tools=True,
|
| 145 |
max_steps=20,
|
| 146 |
additional_authorized_imports=AUTHORIZED_IMPORTS,
|
tools.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Literal, List, Union
|
|
| 5 |
from smolagents import VisitWebpageTool
|
| 6 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 7 |
import pandas as pd
|
|
|
|
| 8 |
|
| 9 |
@tool
|
| 10 |
def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S")->str:
|
|
@@ -258,6 +259,45 @@ def get_dataframe_groupby(data: pd.DataFrame, column: any, operation: Literal["m
|
|
| 258 |
else:
|
| 259 |
raise ValueError("operation must be one of the following: mean, sum, count, min, max, median, std, var")
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
|
| 263 |
|
|
|
|
| 5 |
from smolagents import VisitWebpageTool
|
| 6 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 7 |
import pandas as pd
|
| 8 |
+
import os
|
| 9 |
|
| 10 |
@tool
|
| 11 |
def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S")->str:
|
|
|
|
| 259 |
else:
|
| 260 |
raise ValueError("operation must be one of the following: mean, sum, count, min, max, median, std, var")
|
| 261 |
|
| 262 |
+
@tool
|
| 263 |
+
def read_python_file(file_path: str) -> str:
|
| 264 |
+
"""
|
| 265 |
+
Read and return the contents of a Python file.
|
| 266 |
+
Args:
|
| 267 |
+
file_path: Path to the Python file to read
|
| 268 |
+
Returns:
|
| 269 |
+
str: Contents of the Python file
|
| 270 |
+
"""
|
| 271 |
+
try:
|
| 272 |
+
# Check if file exists
|
| 273 |
+
# if not os.path.exists(file_path):
|
| 274 |
+
# raise FileNotFoundError(f"File not found: {file_path}")
|
| 275 |
+
|
| 276 |
+
# Check if it's a Python file
|
| 277 |
+
if not file_path.endswith('.py'):
|
| 278 |
+
raise ValueError(f"File is not a Python file: {file_path}")
|
| 279 |
+
|
| 280 |
+
# Try reading with absolute path first
|
| 281 |
+
try:
|
| 282 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 283 |
+
return f.read()
|
| 284 |
+
except Exception as e:
|
| 285 |
+
print(f"Failed to read with absolute path: {str(e)}")
|
| 286 |
+
|
| 287 |
+
# Try with adjusted path
|
| 288 |
+
current_file_path = os.path.abspath(__file__)
|
| 289 |
+
current_file_dir = os.path.dirname(current_file_path)
|
| 290 |
+
adjusted_path = os.path.join(current_file_dir, file_path)
|
| 291 |
+
|
| 292 |
+
print(f"Trying adjusted path: {adjusted_path}")
|
| 293 |
+
# if not os.path.exists(adjusted_path):
|
| 294 |
+
# raise FileNotFoundError(f"File not found at either {file_path} or {adjusted_path}")
|
| 295 |
+
|
| 296 |
+
with open(adjusted_path, 'r', encoding='utf-8') as f:
|
| 297 |
+
return f.read()
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
raise RuntimeError(f"Error reading Python file: {str(e)}")
|
| 301 |
|
| 302 |
|
| 303 |
|