divython commited on
Commit
760addf
·
verified ·
1 Parent(s): 86d9868

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -21
app.py CHANGED
@@ -1,43 +1,141 @@
1
  import gradio as gr
2
  import yt_dlp
3
  import whisper
4
- import tempfile
5
-
6
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
 
 
 
 
8
 
9
- def download_audio(url, cookies_path=None):
 
 
 
 
 
 
 
 
 
 
 
10
  ydl_opts = {
11
  'format': 'bestaudio/best',
12
- 'outtmpl': 'audio.%(ext)s',
13
  'postprocessors': [{
14
  'key': 'FFmpegExtractAudio',
15
- 'preferredcodec': 'wav',
16
  'preferredquality': '192',
17
- }]
 
 
 
 
18
  }
19
- if cookies_path:
20
- ydl_opts['cookiefile'] = cookies_path
21
  try:
 
22
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
23
- ydl.download([url])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  except Exception as e:
25
- print("Error downloading:", e)
 
26
 
 
 
 
 
 
 
 
 
27
 
 
 
28
 
 
 
29
 
30
- def process_video(url, cookies_path):
31
- audio_file = download_audio(url, cookies_path)
32
- model = whisper.load_model("base")
33
- result = model.transcribe(audio_file)
34
- return result['text']
35
 
36
- def main(url):
37
- cookies_path = 'cookies.txt' # Provide path to your exported cookies file
38
- transcript = process_video(url, cookies_path)
39
- return transcript
40
 
41
- demo = gr.Interface(fn=main, inputs=gr.Textbox(label="YouTube URL"), outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- demo.launch()
 
 
1
  import gradio as gr
2
  import yt_dlp
3
  import whisper
 
 
4
  import os
5
+ import subprocess
6
+ from transformers import pipeline
7
+
8
+ # --- Configuration ---
9
+ # Choose a smaller Whisper model for Hugging Face Spaces to avoid out-of-memory errors.
10
+ # 'base' or 'small' are good starting points. 'medium' or 'large' might require more resources.
11
+ WHISPER_MODEL_SIZE = "base"
12
+
13
+ # Choose a summarization model. 'sshleifer/distilbart-cnn-12-6' is a good balance
14
+ # of performance and size for summarization.
15
+ SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6"
16
+
17
+ # --- Load Models (once at startup) ---
18
+ # Load the Whisper ASR model
19
+ print(f"Loading Whisper model: {WHISPER_MODEL_SIZE}...")
20
+ whisper_model = whisper.load_model(WHISPER_MODEL_SIZE)
21
+ print("Whisper model loaded.")
22
 
23
+ # Load the summarization pipeline
24
+ print(f"Loading summarization model: {SUMMARIZATION_MODEL}...")
25
+ summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)
26
+ print("Summarization model loaded.")
27
 
28
+ # --- Core Functions ---
29
+
30
+ def download_and_extract_audio(youtube_url):
31
+ """
32
+ Downloads a YouTube video and extracts its audio.
33
+ Returns the path to the extracted audio file or None on failure.
34
+ """
35
+ video_id = youtube_url.split("v=")[-1].split("&")[0] # Extract video ID
36
+ video_path = f"/tmp/{video_id}.mp4"
37
+ audio_path = f"/tmp/{video_id}.mp3"
38
+
39
+ # yt-dlp options to download best audio only
40
  ydl_opts = {
41
  'format': 'bestaudio/best',
 
42
  'postprocessors': [{
43
  'key': 'FFmpegExtractAudio',
44
+ 'preferredcodec': 'mp3',
45
  'preferredquality': '192',
46
+ }],
47
+ 'outtmpl': audio_path,
48
+ 'noplaylist': True,
49
+ 'quiet': True,
50
+ 'no_warnings': True,
51
  }
52
+
 
53
  try:
54
+ print(f"Downloading audio for {youtube_url} to {audio_path}...")
55
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
56
+ ydl.download([youtube_url])
57
+ print("Audio download and extraction complete.")
58
+ return audio_path
59
+ except Exception as e:
60
+ print(f"Error downloading or extracting audio: {e}")
61
+ return None
62
+
63
+ def transcribe_audio(audio_file_path):
64
+ """
65
+ Transcribes the given audio file using the loaded Whisper model.
66
+ Returns the transcribed text.
67
+ """
68
+ print(f"Transcribing audio from {audio_file_path} using Whisper...")
69
+ try:
70
+ # Transcribe using the loaded Whisper model
71
+ result = whisper_model.transcribe(audio_file_path, fp16=False) # fp16=False for CPU inference
72
+ transcript = result["text"]
73
+ print("Transcription complete.")
74
+ return transcript
75
+ except Exception as e:
76
+ print(f"Error during transcription: {e}")
77
+ return "Transcription failed."
78
+
79
+ def summarize_text(text):
80
+ """
81
+ Summarizes the given text using the loaded summarization model.
82
+ Returns the summarized text.
83
+ """
84
+ print("Summarizing text...")
85
+ try:
86
+ # The summarizer pipeline can handle long texts by chunking them internally,
87
+ # but for very long videos, it might still struggle or be slow.
88
+ # min_length and max_length control the summary length.
89
+ summary = summarizer(text, max_length=500, min_length=50, do_sample=False)[0]['summary_text']
90
+ print("Summarization complete.")
91
+ return summary
92
  except Exception as e:
93
+ print(f"Error during summarization: {e}")
94
+ return "Summarization failed."
95
 
96
+ def process_youtube_video(youtube_url):
97
+ """
98
+ Main function to process the YouTube video: download, transcribe, and summarize.
99
+ """
100
+ # 1. Download and Extract Audio
101
+ audio_file_path = download_and_extract_audio(youtube_url)
102
+ if not audio_file_path or not os.path.exists(audio_file_path):
103
+ return "Failed to download or extract audio.", "N/A"
104
 
105
+ # 2. Transcribe Audio
106
+ transcript = transcribe_audio(audio_file_path)
107
 
108
+ # 3. Summarize Transcript
109
+ summary = summarize_text(transcript)
110
 
111
+ # 4. Clean up temporary audio file
112
+ if os.path.exists(audio_file_path):
113
+ os.remove(audio_file_path)
114
+ print(f"Cleaned up {audio_file_path}")
 
115
 
116
+ return transcript, summary
 
 
 
117
 
118
+ # --- Gradio Interface ---
119
+ iface = gr.Interface(
120
+ fn=process_youtube_video,
121
+ inputs=gr.Textbox(label="Enter YouTube Video URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"),
122
+ outputs=[
123
+ gr.Textbox(label="Full Transcript", lines=15),
124
+ gr.Textbox(label="Summary/Notes", lines=10)
125
+ ],
126
+ title="Mini NotebookLM: YouTube Video Summarizer",
127
+ description=(
128
+ "Enter a YouTube video URL, and this tool will download its audio, "
129
+ "transcribe it using OpenAI Whisper, and then generate a summary/notes."
130
+ "Please note: Processing time depends on video length and Hugging Face Space resources."
131
+ "For very long videos or complex audio, results may vary or take a long time."
132
+ ),
133
+ allow_flagging="auto", # Allows users to flag results for review
134
+ examples=[
135
+ ["https://www.youtube.com/watch?v=k_okcNVzIAo"], # Example: A short educational video
136
+ # Add more examples if you have specific short, public videos in mind.
137
+ ]
138
+ )
139
 
140
+ # Launch the Gradio app
141
+ iface.launch()