import gradio as gr import yt_dlp import whisper import os import subprocess from transformers import pipeline # --- Configuration --- # Using 'base' Whisper model for significantly reduced resource usage. # This is ideal for free Colab tiers or Hugging Face Spaces with limited CPU/GPU. WHISPER_MODEL_SIZE = "base" # Choose a summarization model. 'sshleifer/distilbart-cnn-12-6' is a good balance # of performance and size for summarization. SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6" # Path to your downloaded cookies.txt file. # IMPORTANT: You MUST upload 'cookies.txt' (exported from your browser after logging into YouTube) # to the root directory of your Colab notebook or Hugging Face Space for this to work. COOKIES_FILE_PATH = "cookies.txt" # --- Global Variables for Models (loaded once) --- whisper_model = None summarizer_pipeline = None # --- Setup Function to Install Libraries and Load Models --- def setup_environment(): """Installs necessary libraries and loads AI models.""" print("Installing required libraries...") # Use !pip install for Colab #!pip install -q gradio yt-dlp openai-whisper transformers ffmpeg-python global whisper_model, summarizer_pipeline if whisper_model is None: print(f"Loading Whisper model: {WHISPER_MODEL_SIZE}...") try: # Check for GPU and set device import torch device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, device=device) print("Whisper model loaded.") except Exception as e: print(f"Error loading Whisper model: {e}. Falling back to CPU.") whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, device="cpu") print("Whisper model loaded on CPU.") if summarizer_pipeline is None: print(f"Loading summarization model: {SUMMARIZATION_MODEL}...") summarizer_pipeline = pipeline("summarization", model=SUMMARIZATION_MODEL) print("Summarization model loaded.") # Call setup function once at the start of the Colab session setup_environment() # --- Audio Download and Transcription --- def download_and_transcribe_audio(youtube_url): """ Downloads audio from YouTube and transcribes it using Whisper. Returns transcript or error message. """ video_id = None try: from urllib.parse import urlparse, parse_qs parsed_url = urlparse(youtube_url) if parsed_url.hostname in ['www.youtube.com', 'youtube.com', 'm.youtube.com']: video_id = parse_qs(parsed_url.query).get('v') if video_id: video_id = video_id[0] elif parsed_url.hostname == 'youtu.be': video_id = parsed_url.path[1:] if not video_id: return None, "Invalid YouTube URL provided. Please check the format." except Exception as e: return None, f"Error parsing YouTube URL: {e}" audio_path = f"/tmp/{video_id}.mp3" ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], 'outtmpl': audio_path, 'noplaylist': True, 'quiet': True, 'no_warnings': True, 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36', } if os.path.exists(COOKIES_FILE_PATH): ydl_opts['cookiefile'] = COOKIES_FILE_PATH print(f"Using cookies from {COOKIES_FILE_PATH} for yt-dlp download.") else: print(f"WARNING: {COOKIES_FILE_PATH} not found. Proceeding without cookies. " "Downloads may fail due to bot detection. Please upload a valid cookies.txt.") try: print(f"Downloading audio for {youtube_url} to {audio_path} using yt-dlp...") with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([youtube_url]) print("Audio download complete.") print(f"Transcribing audio from {audio_path} using Whisper ({WHISPER_MODEL_SIZE})...") if whisper_model is None: setup_environment() result = whisper_model.transcribe(audio_path, fp16=False) transcript = result["text"] print("Transcription complete.") return transcript, None except yt_dlp.utils.DownloadError as e: error_message = f"Download Error (yt-dlp): {e.exc_info[1].msg if e.exc_info else str(e)}" print(error_message) return None, error_message except Exception as e: error_message = f"An unexpected error occurred during audio processing: {str(e)}" print(error_message) return None, error_message finally: if os.path.exists(audio_path): os.remove(audio_path) print(f"Cleaned up {audio_path}") # --- Text Summarization --- def summarize_text(text): """ Summarizes the given text using the loaded summarization model. Returns the summarized text. """ print("Summarizing text...") try: if summarizer_pipeline is None: setup_environment() summary = summarizer_pipeline(text, max_length=500, min_length=50, do_sample=False)[0]['summary_text'] print("Summarization complete.") return summary except Exception as e: print(f"Error during summarization: {e}") return "Summarization failed." # --- Main Processing Function --- def process_youtube_video(youtube_url): """ Main function to process the YouTube video: download audio, transcribe, and summarize. """ full_transcript = "N/A" summary_notes = "N/A" if whisper_model is None or summarizer_pipeline is None: setup_environment() if whisper_model is None or summarizer_pipeline is None: return "Error: Failed to load AI models. Please check Colab environment.", "N/A" transcribed_text, audio_error = download_and_transcribe_audio(youtube_url) if transcribed_text: full_transcript = transcribed_text else: full_transcript = f"Failed to get transcript: {audio_error}" return full_transcript, summary_notes if full_transcript and not full_transcript.startswith("Failed to get transcript"): summary_notes = summarize_text(full_transcript) else: summary_notes = "Cannot summarize due to failed transcription." return full_transcript, summary_notes # --- Gradio Interface --- iface = gr.Interface( fn=process_youtube_video, inputs=gr.Textbox(label="Enter YouTube Video URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"), outputs=[ gr.Textbox(label="Full Transcript", lines=15, interactive=False), gr.Textbox(label="Summary/Notes", lines=10, interactive=False) ], title="Mini-Mini NotebookLM: YouTube Video Summarizer (Colab/Hugging Face)", description=( "This is a smaller, more resource-efficient version of NotebookLM. " "Enter a YouTube video URL. This tool will download its audio using `yt-dlp`, " "transcribe it using OpenAI Whisper (using the smaller 'base' model), " "and then generate a summary/notes." "

Important Setup Steps (One-Time in Colab/Hugging Face Spaces):" "
    " "
  1. Export `cookies.txt` from your browser: Use a browser extension like 'Get cookies.txt' (for Chrome/Firefox) " "after logging into YouTube. This file contains your session cookies, which `yt-dlp` needs to bypass YouTube's bot detection.
  2. " "
  3. Upload `cookies.txt` to the root directory of your Colab notebook or Hugging Face Space.
  4. " "
" "Performance Note: While this version is optimized, analyzing long videos (e.g., 1 hour+) can still take a significant amount of time " "and consume considerable resources, especially on free tiers. For faster results, try shorter videos." "
Troubleshooting Downloads: If downloads still fail with 'Sign in to confirm you’re not a bot', " "your `cookies.txt` might be invalid or expired, or YouTube's detection has become more aggressive. " "There are no other direct, free, and reliable methods to bypass YouTube's restrictions without using their official APIs." ), allow_flagging="auto", examples=[ ["https://www.youtube.com/watch?v=jNQXAC9IVRw"], # Short educational video ["https://www.youtube.com/watch?v=kfS7W0-JtQo"] # Another example ] ) iface.launch(debug=True)