open-notebooklm

Running on Zero

App Files Files Community

SlouchyBuffalo commited on May 12

Commit

872db68

verified ·

1 Parent(s): 40ec708

Create app.py

Browse files

Files changed (1) hide show

app.py +164 -0

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import queue
+import threading
+import spaces
+import os
+import io
+import soundfile as sf
+import gradio as gr
+import numpy as np
+import time
+import pymupdf
+import requests
+from pathlib import Path
+import torch
+from huggingface_hub import InferenceClient
+from kokoro import KModel, KPipeline
+# -----------------------------------------------------------------------------
+# Get default podcast materials, from Daily papers and one download
+# -----------------------------------------------------------------------------
+from papers import PaperManager
+paper_manager = PaperManager()
+top_papers = paper_manager.get_top_content()
+PODCAST_SUBJECT = list(top_papers.values())[0]
+os.makedirs("examples", exist_ok=True)
+response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf")
+with open("examples/Essay_Palantir.pdf", 'wb') as f:
+    f.write(response.content)
+# -----------------------------------------------------------------------------
+# LLM that writes the script (unchanged)
+# -----------------------------------------------------------------------------
+from prompts import SYSTEM_PROMPT
+client = InferenceClient(
+    "meta-llama/Llama-3.3-70B-Instruct",
+    provider="cerebras",
+    token=os.getenv("HF_TOKEN"),
+)
+def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
+    """Ask the LLM for a script of a podcast given by two hosts."""
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
+{subject[:10000]}"""},
+    ]
+    if steering_question and len(steering_question) > 0:
+        messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
+    response = client.chat_completion(
+        messages,
+        max_tokens=8156,
+    )
+    full_text = response.choices[0].message.content
+    assert "[JANE]" in full_text
+    dialogue_start_index = full_text.find("[JANE]")
+    podcast_text = full_text[dialogue_start_index:]
+    return podcast_text
+# -----------------------------------------------------------------------------
+# Kokoro TTS
+# -----------------------------------------------------------------------------
+CUDA_AVAILABLE = torch.cuda.is_available()
+kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
+kpipeline = KPipeline(lang_code="a")  # English voices
+MALE_VOICE = "am_fenrir"
+FEMALE_VOICE = "af_heart"
+# Pre‑warm voices to avoid first‑call latency
+for v in (MALE_VOICE, FEMALE_VOICE):
+    kpipeline.load_voice(v)
+@spaces.GPU
+def generate_podcast(url: str, pdf_path: str, topic: str):
+    if pdf_path:
+        with pymupdf.open(pdf_path) as pdf_doc:
+            material_text = ""
+            for page in pdf_doc:
+                material_text += page.get_text()
+    elif url:
+        response = requests.get(f'https://r.jina.ai/{url}')
+        material_text = response.text
+    else:
+        material_text = PODCAST_SUBJECT
+    # Generate podcast script!
+    podcast_script = generate_podcast_script(material_text, topic)
+    lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
+    pipeline = kpipeline
+    pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
+    pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
+    speed = 1.
+    sr = 24000
+    for line in lines:
+        # Expect "[S1] ..." or "[S2] ..."
+        if line.startswith("[MIKE]"):
+            pipeline_voice = pipeline_voice_male
+            voice = MALE_VOICE
+            utterance = line[len("[MIKE]"):].strip()
+        elif line.startswith("[JANE]"):
+            pipeline_voice = pipeline_voice_female
+            voice = FEMALE_VOICE
+            utterance = line[len("[JANE]"):].strip()
+        else:  # fallback
+            pipeline_voice = pipeline_voice_female
+            voice = FEMALE_VOICE
+            utterance = line
+        for _, ps, _ in pipeline(utterance, voice, speed):
+            t0 = time.time()
+            ref_s = pipeline_voice[len(ps) - 1]
+            audio_numpy = kmodel(ps, ref_s, speed).numpy()
+            yield (sr, audio_numpy)
+            t1 = time.time()
+            print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
+EXAMPLES = [
+    ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
+    [None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
+]
+demo = gr.Interface(
+    title="Open NotebookLM 🎙️",
+    description=f"""Generates a podcast discussion between two hosts about the materials of your choice.
+If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**'
+Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""",
+    fn=generate_podcast,
+    inputs=[
+        gr.Textbox(
+            label="🔗 Type a Webpage URL to discuss it (Optional)",
+            placeholder="The URL you want to discuss the content for.",
+        ),
+        gr.File(
+            label="Upload a PDF as discussion material (Optional)",
+            file_types=[".pdf"],
+            file_count="single",
+        ),
+        gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."),
+    ],
+    outputs=[
+        gr.Audio(
+            label="Listen to your podcast! 🔊",
+            format="wav",
+            streaming=True,
+        ),
+    ],
+    theme=gr.themes.Soft(),
+    submit_btn="Generate podcast 🎙️",
+    # clear_btn=gr.Button("🗑️"),
+    examples=EXAMPLES,
+    cache_examples=True,
+)
+if __name__ == "__main__":
+    demo.launch()