Spaces:

divython
/

yt-video-summariser

Sleeping

App Files Files Community

divython commited on about 1 month ago

Commit

760addf

verified ·

1 Parent(s): 86d9868

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -21

app.py CHANGED Viewed

@@ -1,43 +1,141 @@
 import gradio as gr
 import yt_dlp
 import whisper
-import tempfile
 import os
-def download_audio(url, cookies_path=None):
     ydl_opts = {
         'format': 'bestaudio/best',
-        'outtmpl': 'audio.%(ext)s',
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'wav',
             'preferredquality': '192',
-        }]
     }
-    if cookies_path:
-        ydl_opts['cookiefile'] = cookies_path
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([url])
     except Exception as e:
-        print("Error downloading:", e)
-def process_video(url, cookies_path):
-    audio_file = download_audio(url, cookies_path)
-    model = whisper.load_model("base")
-    result = model.transcribe(audio_file)
-    return result['text']
-def main(url):
-    cookies_path = 'cookies.txt'  # Provide path to your exported cookies file
-    transcript = process_video(url, cookies_path)
-    return transcript
-demo = gr.Interface(fn=main, inputs=gr.Textbox(label="YouTube URL"), outputs="text")
-demo.launch()

 import gradio as gr
 import yt_dlp
 import whisper
 import os
+import subprocess
+from transformers import pipeline
+# --- Configuration ---
+# Choose a smaller Whisper model for Hugging Face Spaces to avoid out-of-memory errors.
+# 'base' or 'small' are good starting points. 'medium' or 'large' might require more resources.
+WHISPER_MODEL_SIZE = "base"
+# Choose a summarization model. 'sshleifer/distilbart-cnn-12-6' is a good balance
+# of performance and size for summarization.
+SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6"
+# --- Load Models (once at startup) ---
+# Load the Whisper ASR model
+print(f"Loading Whisper model: {WHISPER_MODEL_SIZE}...")
+whisper_model = whisper.load_model(WHISPER_MODEL_SIZE)
+print("Whisper model loaded.")
+# Load the summarization pipeline
+print(f"Loading summarization model: {SUMMARIZATION_MODEL}...")
+summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)
+print("Summarization model loaded.")
+# --- Core Functions ---
+def download_and_extract_audio(youtube_url):
+    """
+    Downloads a YouTube video and extracts its audio.
+    Returns the path to the extracted audio file or None on failure.
+    """
+    video_id = youtube_url.split("v=")[-1].split("&")[0] # Extract video ID
+    video_path = f"/tmp/{video_id}.mp4"
+    audio_path = f"/tmp/{video_id}.mp3"
+    # yt-dlp options to download best audio only
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
             'preferredquality': '192',
+        }],
+        'outtmpl': audio_path,
+        'noplaylist': True,
+        'quiet': True,
+        'no_warnings': True,
     }
     try:
+        print(f"Downloading audio for {youtube_url} to {audio_path}...")
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([youtube_url])
+        print("Audio download and extraction complete.")
+        return audio_path
+    except Exception as e:
+        print(f"Error downloading or extracting audio: {e}")
+        return None
+def transcribe_audio(audio_file_path):
+    """
+    Transcribes the given audio file using the loaded Whisper model.
+    Returns the transcribed text.
+    """
+    print(f"Transcribing audio from {audio_file_path} using Whisper...")
+    try:
+        # Transcribe using the loaded Whisper model
+        result = whisper_model.transcribe(audio_file_path, fp16=False) # fp16=False for CPU inference
+        transcript = result["text"]
+        print("Transcription complete.")
+        return transcript
+    except Exception as e:
+        print(f"Error during transcription: {e}")
+        return "Transcription failed."
+def summarize_text(text):
+    """
+    Summarizes the given text using the loaded summarization model.
+    Returns the summarized text.
+    """
+    print("Summarizing text...")
+    try:
+        # The summarizer pipeline can handle long texts by chunking them internally,
+        # but for very long videos, it might still struggle or be slow.
+        # min_length and max_length control the summary length.
+        summary = summarizer(text, max_length=500, min_length=50, do_sample=False)[0]['summary_text']
+        print("Summarization complete.")
+        return summary
     except Exception as e:
+        print(f"Error during summarization: {e}")
+        return "Summarization failed."
+def process_youtube_video(youtube_url):
+    """
+    Main function to process the YouTube video: download, transcribe, and summarize.
+    """
+    # 1. Download and Extract Audio
+    audio_file_path = download_and_extract_audio(youtube_url)
+    if not audio_file_path or not os.path.exists(audio_file_path):
+        return "Failed to download or extract audio.", "N/A"
+    # 2. Transcribe Audio
+    transcript = transcribe_audio(audio_file_path)
+    # 3. Summarize Transcript
+    summary = summarize_text(transcript)
+    # 4. Clean up temporary audio file
+    if os.path.exists(audio_file_path):
+        os.remove(audio_file_path)
+        print(f"Cleaned up {audio_file_path}")
+    return transcript, summary
+# --- Gradio Interface ---
+iface = gr.Interface(
+    fn=process_youtube_video,
+    inputs=gr.Textbox(label="Enter YouTube Video URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"),
+    outputs=[
+        gr.Textbox(label="Full Transcript", lines=15),
+        gr.Textbox(label="Summary/Notes", lines=10)
+    ],
+    title="Mini NotebookLM: YouTube Video Summarizer",
+    description=(
+        "Enter a YouTube video URL, and this tool will download its audio, "
+        "transcribe it using OpenAI Whisper, and then generate a summary/notes."
+        "Please note: Processing time depends on video length and Hugging Face Space resources."
+        "For very long videos or complex audio, results may vary or take a long time."
+    ),
+    allow_flagging="auto", # Allows users to flag results for review
+    examples=[
+        ["https://www.youtube.com/watch?v=k_okcNVzIAo"], # Example: A short educational video
+        # Add more examples if you have specific short, public videos in mind.
+    ]
+)
+# Launch the Gradio app
+iface.launch()