Spaces:

Pycrolis
/

shrewd-agent

Sleeping

Pycrolis commited on May 24

Commit

eb08b08

1 Parent(s): fe989a0

feat(tool): add YouTube transcript tool

Files changed (3) hide show

ShrewdAgent.py CHANGED Viewed

@@ -14,6 +14,7 @@ from loguru import logger
 from pydantic import SecretStr
 from tools.web_page_information_extractor import web_page_information_extractor
 class AgentState(TypedDict):
@@ -38,6 +39,7 @@ class ShrewdAgent:
         self.tools = [
             TavilySearch(),
             web_page_information_extractor,
         ]
         self.llm = ChatOpenAI(
             model="gpt-4o-mini",

 from pydantic import SecretStr
 from tools.web_page_information_extractor import web_page_information_extractor
+from tools.youtube_transcript import youtube_transcript
 class AgentState(TypedDict):
         self.tools = [
             TavilySearch(),
             web_page_information_extractor,
+            youtube_transcript,
         ]
         self.llm = ChatOpenAI(
             model="gpt-4o-mini",

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ loguru~=0.7.3
 pydantic~=2.11.4
 html2text~=2025.4.15
 beautifulsoup4~=4.13.4
-readability-lxml~=0.8.4.1

 pydantic~=2.11.4
 html2text~=2025.4.15
 beautifulsoup4~=4.13.4
+readability-lxml~=0.8.4.1
+youtube-transcript-api~=1.0.3

tools/youtube_transcript.py ADDED Viewed

+from langchain_core.tools import tool
+from loguru import logger
+from youtube_transcript_api import YouTubeTranscriptApi, FetchedTranscript
+@tool("youtube_transcript_tool", parse_docstring=True)
+def youtube_transcript(video_id: str) -> str:
+    """
+    Fetches the transcript of a YouTube video using its video ID.
+    The video ID must be provided to successfully fetch the transcript.
+    Args:
+        video_id (str): The unique identifier of a YouTube video. You can retrieve the video_id from the URL of the video. For example, with the URL https://www.youtube.com/watch?v=12345 the video_id is 12345.
+    Returns:
+        FetchedTranscript: The transcript of the specified YouTube video.
+    Raises:
+        Any exceptions related to YouTubeTranscriptApi when a problem
+        occurs during fetching the transcript.
+    """
+    logger.info(f"use youtube_transcript with param: {video_id}")
+    transcript = YouTubeTranscriptApi().fetch(video_id).to_raw_data()
+    bullet_points = '\n'.join(f"- {entry['text']}" for entry in transcript)
+    return bullet_points
+if __name__ == "__main__":
+    transcript = youtube_transcript.invoke("1htKBjuUWec")
+    print(transcript)