DeeeeeeM
commited on
Commit
·
0b9d716
1
Parent(s):
e8fae30
added urgent yt playlist extractor feature
Browse files- app.py +39 -2
- playlist.csv +0 -0
- yt-playlist-export.py +27 -0
app.py
CHANGED
@@ -7,6 +7,9 @@ import torch
|
|
7 |
import stable_whisper
|
8 |
from stable_whisper.text_output import result_to_any, sec2srt
|
9 |
import time
|
|
|
|
|
|
|
10 |
|
11 |
def process_media(
|
12 |
model_size, source_lang, upload, model_type,
|
@@ -169,6 +172,31 @@ def segments2blocks(segments, max_lines_per_segment, line_penalty, longest_line_
|
|
169 |
for i, s in enumerate(segments)
|
170 |
)
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
WHISPER_LANGUAGES = [
|
173 |
("Afrikaans", "af"),
|
174 |
("Albanian", "sq"),
|
@@ -440,7 +468,16 @@ with gr.Blocks() as interface:
|
|
440 |
outputs=[audio_output, video_output, transcript_output, srt_output]
|
441 |
)
|
442 |
|
443 |
-
with gr.TabItem("
|
444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
445 |
|
446 |
interface.launch(share=True)
|
|
|
7 |
import stable_whisper
|
8 |
from stable_whisper.text_output import result_to_any, sec2srt
|
9 |
import time
|
10 |
+
from yt_dlp import YoutubeDL
|
11 |
+
import csv
|
12 |
+
import os
|
13 |
|
14 |
def process_media(
|
15 |
model_size, source_lang, upload, model_type,
|
|
|
172 |
for i, s in enumerate(segments)
|
173 |
)
|
174 |
|
175 |
+
def extract_playlist_to_csv(playlist_url):
|
176 |
+
ydl_opts = {
|
177 |
+
'extract_flat': True,
|
178 |
+
'quiet': True,
|
179 |
+
'dump_single_json': True
|
180 |
+
}
|
181 |
+
try:
|
182 |
+
with YoutubeDL(ydl_opts) as ydl:
|
183 |
+
result = ydl.extract_info(playlist_url, download=False)
|
184 |
+
entries = result.get('entries', [])
|
185 |
+
# Save to a temp file for download
|
186 |
+
fd, csv_path = tempfile.mkstemp(suffix=".csv", text=True)
|
187 |
+
os.close(fd)
|
188 |
+
with open(csv_path, 'w', newline='', encoding='utf-8') as f:
|
189 |
+
writer = csv.writer(f)
|
190 |
+
writer.writerow(['Title', 'Video ID', 'URL'])
|
191 |
+
for video in entries:
|
192 |
+
title = video.get('title', 'N/A')
|
193 |
+
video_id = video['id']
|
194 |
+
url = f'https://www.youtube.com/watch?v={video_id}'
|
195 |
+
writer.writerow([title, video_id, url])
|
196 |
+
return csv_path
|
197 |
+
except Exception as e:
|
198 |
+
return None
|
199 |
+
|
200 |
WHISPER_LANGUAGES = [
|
201 |
("Afrikaans", "af"),
|
202 |
("Albanian", "sq"),
|
|
|
468 |
outputs=[audio_output, video_output, transcript_output, srt_output]
|
469 |
)
|
470 |
|
471 |
+
with gr.TabItem("Youtube playlist extractor"):
|
472 |
+
gr.Markdown("### Extract YT Title, URL, and ID from a YouTube playlist and download as CSV.")
|
473 |
+
playlist_url = gr.Textbox(label="YouTube Playlist URL", placeholder="Paste playlist URL here")
|
474 |
+
process_btn = gr.Button("Process")
|
475 |
+
csv_output = gr.File(label="Download CSV")
|
476 |
+
process_btn.click(
|
477 |
+
extract_playlist_to_csv,
|
478 |
+
inputs=playlist_url,
|
479 |
+
outputs=csv_output
|
480 |
+
)
|
481 |
+
|
482 |
|
483 |
interface.launch(share=True)
|
playlist.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
yt-playlist-export.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from yt_dlp import YoutubeDL
|
2 |
+
import csv
|
3 |
+
|
4 |
+
playlist_url = 'https://www.youtube.com/playlist?list=PLGRhcC_vtOra_TUIec1NgfHJIggPONtqU'
|
5 |
+
|
6 |
+
ydl_opts = {
|
7 |
+
'extract_flat': True,
|
8 |
+
'quiet': True,
|
9 |
+
'dump_single_json': True
|
10 |
+
}
|
11 |
+
|
12 |
+
with YoutubeDL(ydl_opts) as ydl:
|
13 |
+
result = ydl.extract_info(playlist_url, download=False)
|
14 |
+
|
15 |
+
entries = result.get('entries', [])
|
16 |
+
|
17 |
+
with open('playlist.csv', 'w', newline='', encoding='utf-8') as f:
|
18 |
+
writer = csv.writer(f)
|
19 |
+
writer.writerow(['Title', 'Video ID', 'URL']) # Header
|
20 |
+
|
21 |
+
for video in entries:
|
22 |
+
title = video.get('title', 'N/A')
|
23 |
+
video_id = video['id']
|
24 |
+
url = f'https://www.youtube.com/watch?v={video_id}'
|
25 |
+
writer.writerow([title, video_id, url])
|
26 |
+
|
27 |
+
print("✅ Video IDs and URLs saved to 'playlist.csv'")
|