Spaces:

divython
/

yt-video-summariser

Sleeping

File size: 8,727 Bytes

4d4b8ad
03410b4
5e74ff4
4b7eb7e
760addf
 
 
 
a509a5a
 
760addf
 
 
 
 
 
a509a5a
 
 
d81deed
 
a509a5a
 
 
 
 
 
 
 
 
15f90dc
a509a5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760addf
a509a5a
 
760addf
a509a5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760addf
 
4b7eb7e
 
 
 
760addf
4b7eb7e
760addf
 
 
 
 
a509a5a
4b7eb7e
760addf
d81deed
 
a509a5a
d81deed
a509a5a
 
d81deed
 
86d9868
a509a5a
86d9868
760addf
a509a5a
 
 
 
 
 
 
 
 
 
3009fd5
a509a5a
3009fd5
 
760addf
a509a5a
3009fd5
 
a509a5a
 
 
 
760addf
a509a5a
760addf
 
 
 
 
 
 
 
a509a5a
 
 
 
760addf
 
86d9868
760addf
 
9b856f1
a509a5a
 
760addf
 
a509a5a
760addf
a509a5a
 
 
 
 
 
 
 
 
 
 
 
 
 
5b69f95
a509a5a
 
 
 
03410b4
a509a5a
5e74ff4
760addf
 
 
 
 
a509a5a
 
760addf
a509a5a
760addf
a509a5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760addf
a509a5a
760addf
a509a5a
 
760addf
 
9b856f1
a509a5a

import gradio as gr
import yt_dlp
import whisper
import os
import subprocess
from transformers import pipeline

# --- Configuration ---
# Using 'base' Whisper model for significantly reduced resource usage.
# This is ideal for free Colab tiers or Hugging Face Spaces with limited CPU/GPU.
WHISPER_MODEL_SIZE = "base" 

# Choose a summarization model. 'sshleifer/distilbart-cnn-12-6' is a good balance
# of performance and size for summarization.
SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6"

# Path to your downloaded cookies.txt file.
# IMPORTANT: You MUST upload 'cookies.txt' (exported from your browser after logging into YouTube)
# to the root directory of your Colab notebook or Hugging Face Space for this to work.
COOKIES_FILE_PATH = "cookies.txt" 

# --- Global Variables for Models (loaded once) ---
whisper_model = None
summarizer_pipeline = None

# --- Setup Function to Install Libraries and Load Models ---
def setup_environment():
    """Installs necessary libraries and loads AI models."""
    print("Installing required libraries...")
    # Use !pip install for Colab
    #!pip install -q gradio yt-dlp openai-whisper transformers ffmpeg-python

    global whisper_model, summarizer_pipeline
    
    if whisper_model is None:
        print(f"Loading Whisper model: {WHISPER_MODEL_SIZE}...")
        try:
            # Check for GPU and set device
            import torch
            device = "cuda" if torch.cuda.is_available() else "cpu"
            print(f"Using device: {device}")
            whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, device=device)
            print("Whisper model loaded.")
        except Exception as e:
            print(f"Error loading Whisper model: {e}. Falling back to CPU.")
            whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, device="cpu")
            print("Whisper model loaded on CPU.")

    if summarizer_pipeline is None:
        print(f"Loading summarization model: {SUMMARIZATION_MODEL}...")
        summarizer_pipeline = pipeline("summarization", model=SUMMARIZATION_MODEL)
        print("Summarization model loaded.")

# Call setup function once at the start of the Colab session
setup_environment()

# --- Audio Download and Transcription ---

def download_and_transcribe_audio(youtube_url):
    """
    Downloads audio from YouTube and transcribes it using Whisper.
    Returns transcript or error message.
    """
    video_id = None
    try:
        from urllib.parse import urlparse, parse_qs
        parsed_url = urlparse(youtube_url)
        if parsed_url.hostname in ['www.youtube.com', 'youtube.com', 'm.youtube.com']:
            video_id = parse_qs(parsed_url.query).get('v')
            if video_id:
                video_id = video_id[0]
        elif parsed_url.hostname == 'youtu.be':
            video_id = parsed_url.path[1:]
        
        if not video_id:
            return None, "Invalid YouTube URL provided. Please check the format."

    except Exception as e:
        return None, f"Error parsing YouTube URL: {e}"

    audio_path = f"/tmp/{video_id}.mp3"

    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': audio_path,
        'noplaylist': True,
        'quiet': True,
        'no_warnings': True,
        'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
    }

    if os.path.exists(COOKIES_FILE_PATH):
        ydl_opts['cookiefile'] = COOKIES_FILE_PATH
        print(f"Using cookies from {COOKIES_FILE_PATH} for yt-dlp download.")
    else:
        print(f"WARNING: {COOKIES_FILE_PATH} not found. Proceeding without cookies. "
              "Downloads may fail due to bot detection. Please upload a valid cookies.txt.")


    try:
        print(f"Downloading audio for {youtube_url} to {audio_path} using yt-dlp...")
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([youtube_url])
        print("Audio download complete.")

        print(f"Transcribing audio from {audio_path} using Whisper ({WHISPER_MODEL_SIZE})...")
        if whisper_model is None:
            setup_environment() 
        
        result = whisper_model.transcribe(audio_path, fp16=False) 
        transcript = result["text"]
        print("Transcription complete.")
        return transcript, None
    except yt_dlp.utils.DownloadError as e:
        error_message = f"Download Error (yt-dlp): {e.exc_info[1].msg if e.exc_info else str(e)}"
        print(error_message)
        return None, error_message
    except Exception as e:
        error_message = f"An unexpected error occurred during audio processing: {str(e)}"
        print(error_message)
        return None, error_message
    finally:
        if os.path.exists(audio_path):
            os.remove(audio_path)
            print(f"Cleaned up {audio_path}")

# --- Text Summarization ---

def summarize_text(text):
    """
    Summarizes the given text using the loaded summarization model.
    Returns the summarized text.
    """
    print("Summarizing text...")
    try:
        if summarizer_pipeline is None:
            setup_environment() 

        summary = summarizer_pipeline(text, max_length=500, min_length=50, do_sample=False)[0]['summary_text']
        print("Summarization complete.")
        return summary
    except Exception as e:
        print(f"Error during summarization: {e}")
        return "Summarization failed."

# --- Main Processing Function ---

def process_youtube_video(youtube_url):
    """
    Main function to process the YouTube video: download audio, transcribe, and summarize.
    """
    full_transcript = "N/A"
    summary_notes = "N/A"
    
    if whisper_model is None or summarizer_pipeline is None:
        setup_environment()
        if whisper_model is None or summarizer_pipeline is None:
            return "Error: Failed to load AI models. Please check Colab environment.", "N/A"

    transcribed_text, audio_error = download_and_transcribe_audio(youtube_url)
    if transcribed_text:
        full_transcript = transcribed_text
    else:
        full_transcript = f"Failed to get transcript: {audio_error}"
        return full_transcript, summary_notes 

    if full_transcript and not full_transcript.startswith("Failed to get transcript"):
        summary_notes = summarize_text(full_transcript)
    else:
        summary_notes = "Cannot summarize due to failed transcription."

    return full_transcript, summary_notes

# --- Gradio Interface ---
iface = gr.Interface(
    fn=process_youtube_video,
    inputs=gr.Textbox(label="Enter YouTube Video URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"),
    outputs=[
        gr.Textbox(label="Full Transcript", lines=15, interactive=False),
        gr.Textbox(label="Summary/Notes", lines=10, interactive=False)
    ],
    title="Mini-Mini NotebookLM: YouTube Video Summarizer (Colab/Hugging Face)",
    description=(
        "This is a smaller, more resource-efficient version of NotebookLM. "
        "Enter a YouTube video URL. This tool will download its audio using `yt-dlp`, "
        "transcribe it using OpenAI Whisper (using the smaller 'base' model), "
        "and then generate a summary/notes."
        "<br><br><b>Important Setup Steps (One-Time in Colab/Hugging Face Spaces):</b>"
        "<ol>"
        "<li><b>Export `cookies.txt` from your browser:</b> Use a browser extension like 'Get cookies.txt' (for Chrome/Firefox) "
        "after logging into YouTube. This file contains your session cookies, which `yt-dlp` needs to bypass YouTube's bot detection.</li>"
        "<li><b>Upload `cookies.txt` to the root directory of your Colab notebook or Hugging Face Space.</b></li>"
        "</ol>"
        "<b>Performance Note:</b> While this version is optimized, analyzing long videos (e.g., 1 hour+) can still take a significant amount of time "
        "and consume considerable resources, especially on free tiers. For faster results, try shorter videos."
        "<br><b>Troubleshooting Downloads:</b> If downloads still fail with 'Sign in to confirm you’re not a bot', "
        "your `cookies.txt` might be invalid or expired, or YouTube's detection has become more aggressive. "
        "There are no other direct, free, and reliable methods to bypass YouTube's restrictions without using their official APIs."
    ),
    allow_flagging="auto",
    examples=[
        ["https://www.youtube.com/watch?v=jNQXAC9IVRw"], # Short educational video
        ["https://www.youtube.com/watch?v=kfS7W0-JtQo"] # Another example
    ]
)

iface.launch(debug=True)