open-notebooklm

Running on Zero

App Files Files Community

SlouchyBuffalo commited on May 12

Commit

99fe1ff

verified ·

1 Parent(s): 1c4ff89

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -164

app.py DELETED Viewed

@@ -1,164 +0,0 @@
-import queue
-import threading
-import spaces
-import os
-import io
-import soundfile as sf
-import gradio as gr
-import numpy as np
-import time
-import pymupdf
-import requests
-from pathlib import Path
-import torch
-from huggingface_hub import InferenceClient
-from kokoro import KModel, KPipeline
-# -----------------------------------------------------------------------------
-# Get default podcast materials, from Daily papers and one download
-# -----------------------------------------------------------------------------
-from papers import PaperManager
-paper_manager = PaperManager()
-top_papers = paper_manager.get_top_content()
-PODCAST_SUBJECT = list(top_papers.values())[0]
-os.makedirs("examples", exist_ok=True)
-response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf")
-with open("examples/Essay_Palantir.pdf", 'wb') as f:
-    f.write(response.content)
-# -----------------------------------------------------------------------------
-# LLM that writes the script (unchanged)
-# -----------------------------------------------------------------------------
-from prompts import SYSTEM_PROMPT
-client = InferenceClient(
-    "meta-llama/Llama-3.3-70B-Instruct",
-    provider="cerebras",
-    token=os.getenv("HF_TOKEN"),
-)
-def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
-    """Ask the LLM for a script of a podcast given by two hosts."""
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
-{subject[:10000]}"""},
-    ]
-    if steering_question and len(steering_question) > 0:
-        messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
-    response = client.chat_completion(
-        messages,
-        max_tokens=8156,
-    )
-    full_text = response.choices[0].message.content
-    assert "[JANE]" in full_text
-    dialogue_start_index = full_text.find("[JANE]")
-    podcast_text = full_text[dialogue_start_index:]
-    return podcast_text
-# -----------------------------------------------------------------------------
-# Kokoro TTS
-# -----------------------------------------------------------------------------
-CUDA_AVAILABLE = torch.cuda.is_available()
-kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
-kpipeline = KPipeline(lang_code="a")  # English voices
-MALE_VOICE = "am_fenrir"
-FEMALE_VOICE = "af_heart"
-# Pre‑warm voices to avoid first‑call latency
-for v in (MALE_VOICE, FEMALE_VOICE):
-    kpipeline.load_voice(v)
-@spaces.GPU
-def generate_podcast(url: str, pdf_path: str, topic: str):
-    if pdf_path:
-        with pymupdf.open(pdf_path) as pdf_doc:
-            material_text = ""
-            for page in pdf_doc:
-                material_text += page.get_text()
-    elif url:
-        response = requests.get(f'https://r.jina.ai/{url}')
-        material_text = response.text
-    else:
-        material_text = PODCAST_SUBJECT
-    # Generate podcast script!
-    podcast_script = generate_podcast_script(material_text, topic)
-    lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
-    pipeline = kpipeline
-    pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
-    pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
-    speed = 1.
-    sr = 24000
-    for line in lines:
-        # Expect "[S1] ..." or "[S2] ..."
-        if line.startswith("[MIKE]"):
-            pipeline_voice = pipeline_voice_male
-            voice = MALE_VOICE
-            utterance = line[len("[MIKE]"):].strip()
-        elif line.startswith("[JANE]"):
-            pipeline_voice = pipeline_voice_female
-            voice = FEMALE_VOICE
-            utterance = line[len("[JANE]"):].strip()
-        else:  # fallback
-            pipeline_voice = pipeline_voice_female
-            voice = FEMALE_VOICE
-            utterance = line
-        for _, ps, _ in pipeline(utterance, voice, speed):
-            t0 = time.time()
-            ref_s = pipeline_voice[len(ps) - 1]
-            audio_numpy = kmodel(ps, ref_s, speed).numpy()
-            yield (sr, audio_numpy)
-            t1 = time.time()
-            print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
-EXAMPLES = [
-    ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
-    [None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
-]
-demo = gr.Interface(
-    title="Open NotebookLM 🎙️",
-    description=f"""Generates a podcast discussion between two hosts about the materials of your choice.
-If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**'
-Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""",
-    fn=generate_podcast,
-    inputs=[
-        gr.Textbox(
-            label="🔗 Type a Webpage URL to discuss it (Optional)",
-            placeholder="The URL you want to discuss the content for.",
-        ),
-        gr.File(
-            label="Upload a PDF as discussion material (Optional)",
-            file_types=[".pdf"],
-            file_count="single",
-        ),
-        gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."),
-    ],
-    outputs=[
-        gr.Audio(
-            label="Listen to your podcast! 🔊",
-            format="wav",
-            streaming=True,
-        ),
-    ],
-    theme=gr.themes.Soft(),
-    submit_btn="Generate podcast 🎙️",
-    # clear_btn=gr.Button("🗑️"),
-    examples=EXAMPLES,
-    cache_examples=True,
-)
-if __name__ == "__main__":
-    demo.launch()