Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,43 +1,141 @@
|
|
1 |
import gradio as gr
|
2 |
import yt_dlp
|
3 |
import whisper
|
4 |
-
import tempfile
|
5 |
-
|
6 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
ydl_opts = {
|
11 |
'format': 'bestaudio/best',
|
12 |
-
'outtmpl': 'audio.%(ext)s',
|
13 |
'postprocessors': [{
|
14 |
'key': 'FFmpegExtractAudio',
|
15 |
-
'preferredcodec': '
|
16 |
'preferredquality': '192',
|
17 |
-
}]
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
-
|
20 |
-
ydl_opts['cookiefile'] = cookies_path
|
21 |
try:
|
|
|
22 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
23 |
-
ydl.download([
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
except Exception as e:
|
25 |
-
print("Error
|
|
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
|
|
|
|
28 |
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
return result['text']
|
35 |
|
36 |
-
|
37 |
-
cookies_path = 'cookies.txt' # Provide path to your exported cookies file
|
38 |
-
transcript = process_video(url, cookies_path)
|
39 |
-
return transcript
|
40 |
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
|
|
|
|
1 |
import gradio as gr
|
2 |
import yt_dlp
|
3 |
import whisper
|
|
|
|
|
4 |
import os
|
5 |
+
import subprocess
|
6 |
+
from transformers import pipeline
|
7 |
+
|
8 |
+
# --- Configuration ---
|
9 |
+
# Choose a smaller Whisper model for Hugging Face Spaces to avoid out-of-memory errors.
|
10 |
+
# 'base' or 'small' are good starting points. 'medium' or 'large' might require more resources.
|
11 |
+
WHISPER_MODEL_SIZE = "base"
|
12 |
+
|
13 |
+
# Choose a summarization model. 'sshleifer/distilbart-cnn-12-6' is a good balance
|
14 |
+
# of performance and size for summarization.
|
15 |
+
SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6"
|
16 |
+
|
17 |
+
# --- Load Models (once at startup) ---
|
18 |
+
# Load the Whisper ASR model
|
19 |
+
print(f"Loading Whisper model: {WHISPER_MODEL_SIZE}...")
|
20 |
+
whisper_model = whisper.load_model(WHISPER_MODEL_SIZE)
|
21 |
+
print("Whisper model loaded.")
|
22 |
|
23 |
+
# Load the summarization pipeline
|
24 |
+
print(f"Loading summarization model: {SUMMARIZATION_MODEL}...")
|
25 |
+
summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)
|
26 |
+
print("Summarization model loaded.")
|
27 |
|
28 |
+
# --- Core Functions ---
|
29 |
+
|
30 |
+
def download_and_extract_audio(youtube_url):
|
31 |
+
"""
|
32 |
+
Downloads a YouTube video and extracts its audio.
|
33 |
+
Returns the path to the extracted audio file or None on failure.
|
34 |
+
"""
|
35 |
+
video_id = youtube_url.split("v=")[-1].split("&")[0] # Extract video ID
|
36 |
+
video_path = f"/tmp/{video_id}.mp4"
|
37 |
+
audio_path = f"/tmp/{video_id}.mp3"
|
38 |
+
|
39 |
+
# yt-dlp options to download best audio only
|
40 |
ydl_opts = {
|
41 |
'format': 'bestaudio/best',
|
|
|
42 |
'postprocessors': [{
|
43 |
'key': 'FFmpegExtractAudio',
|
44 |
+
'preferredcodec': 'mp3',
|
45 |
'preferredquality': '192',
|
46 |
+
}],
|
47 |
+
'outtmpl': audio_path,
|
48 |
+
'noplaylist': True,
|
49 |
+
'quiet': True,
|
50 |
+
'no_warnings': True,
|
51 |
}
|
52 |
+
|
|
|
53 |
try:
|
54 |
+
print(f"Downloading audio for {youtube_url} to {audio_path}...")
|
55 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
56 |
+
ydl.download([youtube_url])
|
57 |
+
print("Audio download and extraction complete.")
|
58 |
+
return audio_path
|
59 |
+
except Exception as e:
|
60 |
+
print(f"Error downloading or extracting audio: {e}")
|
61 |
+
return None
|
62 |
+
|
63 |
+
def transcribe_audio(audio_file_path):
|
64 |
+
"""
|
65 |
+
Transcribes the given audio file using the loaded Whisper model.
|
66 |
+
Returns the transcribed text.
|
67 |
+
"""
|
68 |
+
print(f"Transcribing audio from {audio_file_path} using Whisper...")
|
69 |
+
try:
|
70 |
+
# Transcribe using the loaded Whisper model
|
71 |
+
result = whisper_model.transcribe(audio_file_path, fp16=False) # fp16=False for CPU inference
|
72 |
+
transcript = result["text"]
|
73 |
+
print("Transcription complete.")
|
74 |
+
return transcript
|
75 |
+
except Exception as e:
|
76 |
+
print(f"Error during transcription: {e}")
|
77 |
+
return "Transcription failed."
|
78 |
+
|
79 |
+
def summarize_text(text):
|
80 |
+
"""
|
81 |
+
Summarizes the given text using the loaded summarization model.
|
82 |
+
Returns the summarized text.
|
83 |
+
"""
|
84 |
+
print("Summarizing text...")
|
85 |
+
try:
|
86 |
+
# The summarizer pipeline can handle long texts by chunking them internally,
|
87 |
+
# but for very long videos, it might still struggle or be slow.
|
88 |
+
# min_length and max_length control the summary length.
|
89 |
+
summary = summarizer(text, max_length=500, min_length=50, do_sample=False)[0]['summary_text']
|
90 |
+
print("Summarization complete.")
|
91 |
+
return summary
|
92 |
except Exception as e:
|
93 |
+
print(f"Error during summarization: {e}")
|
94 |
+
return "Summarization failed."
|
95 |
|
96 |
+
def process_youtube_video(youtube_url):
|
97 |
+
"""
|
98 |
+
Main function to process the YouTube video: download, transcribe, and summarize.
|
99 |
+
"""
|
100 |
+
# 1. Download and Extract Audio
|
101 |
+
audio_file_path = download_and_extract_audio(youtube_url)
|
102 |
+
if not audio_file_path or not os.path.exists(audio_file_path):
|
103 |
+
return "Failed to download or extract audio.", "N/A"
|
104 |
|
105 |
+
# 2. Transcribe Audio
|
106 |
+
transcript = transcribe_audio(audio_file_path)
|
107 |
|
108 |
+
# 3. Summarize Transcript
|
109 |
+
summary = summarize_text(transcript)
|
110 |
|
111 |
+
# 4. Clean up temporary audio file
|
112 |
+
if os.path.exists(audio_file_path):
|
113 |
+
os.remove(audio_file_path)
|
114 |
+
print(f"Cleaned up {audio_file_path}")
|
|
|
115 |
|
116 |
+
return transcript, summary
|
|
|
|
|
|
|
117 |
|
118 |
+
# --- Gradio Interface ---
|
119 |
+
iface = gr.Interface(
|
120 |
+
fn=process_youtube_video,
|
121 |
+
inputs=gr.Textbox(label="Enter YouTube Video URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"),
|
122 |
+
outputs=[
|
123 |
+
gr.Textbox(label="Full Transcript", lines=15),
|
124 |
+
gr.Textbox(label="Summary/Notes", lines=10)
|
125 |
+
],
|
126 |
+
title="Mini NotebookLM: YouTube Video Summarizer",
|
127 |
+
description=(
|
128 |
+
"Enter a YouTube video URL, and this tool will download its audio, "
|
129 |
+
"transcribe it using OpenAI Whisper, and then generate a summary/notes."
|
130 |
+
"Please note: Processing time depends on video length and Hugging Face Space resources."
|
131 |
+
"For very long videos or complex audio, results may vary or take a long time."
|
132 |
+
),
|
133 |
+
allow_flagging="auto", # Allows users to flag results for review
|
134 |
+
examples=[
|
135 |
+
["https://www.youtube.com/watch?v=k_okcNVzIAo"], # Example: A short educational video
|
136 |
+
# Add more examples if you have specific short, public videos in mind.
|
137 |
+
]
|
138 |
+
)
|
139 |
|
140 |
+
# Launch the Gradio app
|
141 |
+
iface.launch()
|