Spaces:
Sleeping
Sleeping
Pycrolis
commited on
Commit
·
eb08b08
1
Parent(s):
fe989a0
feat(tool): add YouTube transcript tool
Browse files- ShrewdAgent.py +2 -0
- requirements.txt +2 -1
- tools/youtube_transcript.py +32 -0
ShrewdAgent.py
CHANGED
@@ -14,6 +14,7 @@ from loguru import logger
|
|
14 |
from pydantic import SecretStr
|
15 |
|
16 |
from tools.web_page_information_extractor import web_page_information_extractor
|
|
|
17 |
|
18 |
|
19 |
class AgentState(TypedDict):
|
@@ -38,6 +39,7 @@ class ShrewdAgent:
|
|
38 |
self.tools = [
|
39 |
TavilySearch(),
|
40 |
web_page_information_extractor,
|
|
|
41 |
]
|
42 |
self.llm = ChatOpenAI(
|
43 |
model="gpt-4o-mini",
|
|
|
14 |
from pydantic import SecretStr
|
15 |
|
16 |
from tools.web_page_information_extractor import web_page_information_extractor
|
17 |
+
from tools.youtube_transcript import youtube_transcript
|
18 |
|
19 |
|
20 |
class AgentState(TypedDict):
|
|
|
39 |
self.tools = [
|
40 |
TavilySearch(),
|
41 |
web_page_information_extractor,
|
42 |
+
youtube_transcript,
|
43 |
]
|
44 |
self.llm = ChatOpenAI(
|
45 |
model="gpt-4o-mini",
|
requirements.txt
CHANGED
@@ -8,4 +8,5 @@ loguru~=0.7.3
|
|
8 |
pydantic~=2.11.4
|
9 |
html2text~=2025.4.15
|
10 |
beautifulsoup4~=4.13.4
|
11 |
-
readability-lxml~=0.8.4.1
|
|
|
|
8 |
pydantic~=2.11.4
|
9 |
html2text~=2025.4.15
|
10 |
beautifulsoup4~=4.13.4
|
11 |
+
readability-lxml~=0.8.4.1
|
12 |
+
youtube-transcript-api~=1.0.3
|
tools/youtube_transcript.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.tools import tool
|
2 |
+
from loguru import logger
|
3 |
+
from youtube_transcript_api import YouTubeTranscriptApi, FetchedTranscript
|
4 |
+
|
5 |
+
|
6 |
+
@tool("youtube_transcript_tool", parse_docstring=True)
|
7 |
+
def youtube_transcript(video_id: str) -> str:
|
8 |
+
"""
|
9 |
+
Fetches the transcript of a YouTube video using its video ID.
|
10 |
+
|
11 |
+
The video ID must be provided to successfully fetch the transcript.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
video_id (str): The unique identifier of a YouTube video. You can retrieve the video_id from the URL of the video. For example, with the URL https://www.youtube.com/watch?v=12345 the video_id is 12345.
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
FetchedTranscript: The transcript of the specified YouTube video.
|
18 |
+
|
19 |
+
Raises:
|
20 |
+
Any exceptions related to YouTubeTranscriptApi when a problem
|
21 |
+
occurs during fetching the transcript.
|
22 |
+
"""
|
23 |
+
logger.info(f"use youtube_transcript with param: {video_id}")
|
24 |
+
transcript = YouTubeTranscriptApi().fetch(video_id).to_raw_data()
|
25 |
+
|
26 |
+
bullet_points = '\n'.join(f"- {entry['text']}" for entry in transcript)
|
27 |
+
|
28 |
+
return bullet_points
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
transcript = youtube_transcript.invoke("1htKBjuUWec")
|
32 |
+
print(transcript)
|