Spaces:
Running
on
Zero
Running
on
Zero
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import queue
|
2 |
+
import threading
|
3 |
+
import spaces
|
4 |
+
import os
|
5 |
+
import io
|
6 |
+
import soundfile as sf
|
7 |
+
import gradio as gr
|
8 |
+
import numpy as np
|
9 |
+
import time
|
10 |
+
import pymupdf
|
11 |
+
import requests
|
12 |
+
from pathlib import Path
|
13 |
+
|
14 |
+
import torch
|
15 |
+
from huggingface_hub import InferenceClient
|
16 |
+
from kokoro import KModel, KPipeline
|
17 |
+
|
18 |
+
# -----------------------------------------------------------------------------
|
19 |
+
# Download example PDF
|
20 |
+
# -----------------------------------------------------------------------------
|
21 |
+
os.makedirs("examples", exist_ok=True)
|
22 |
+
response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf")
|
23 |
+
with open("examples/Essay_Palantir.pdf", 'wb') as f:
|
24 |
+
f.write(response.content)
|
25 |
+
|
26 |
+
# -----------------------------------------------------------------------------
|
27 |
+
# LLM that writes the script
|
28 |
+
# -----------------------------------------------------------------------------
|
29 |
+
from prompts import SYSTEM_PROMPT
|
30 |
+
|
31 |
+
client = InferenceClient(
|
32 |
+
"meta-llama/Llama-3.3-70B-Instruct",
|
33 |
+
provider="cerebras",
|
34 |
+
token=os.getenv("HF_TOKEN"),
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
|
39 |
+
"""Ask the LLM for a script of a podcast given by two hosts."""
|
40 |
+
messages = [
|
41 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
42 |
+
{"role": "user", "content": f"""Please analyze this content and create an engaging podcast discussion:
|
43 |
+
{subject[:10000]}"""},
|
44 |
+
]
|
45 |
+
if steering_question and len(steering_question) > 0:
|
46 |
+
messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
|
47 |
+
|
48 |
+
response = client.chat_completion(
|
49 |
+
messages,
|
50 |
+
max_tokens=8156,
|
51 |
+
)
|
52 |
+
full_text = response.choices[0].message.content
|
53 |
+
assert "[JANE]" in full_text
|
54 |
+
dialogue_start_index = full_text.find("[JANE]")
|
55 |
+
podcast_text = full_text[dialogue_start_index:]
|
56 |
+
return podcast_text
|
57 |
+
|
58 |
+
# -----------------------------------------------------------------------------
|
59 |
+
# Kokoro TTS
|
60 |
+
# -----------------------------------------------------------------------------
|
61 |
+
CUDA_AVAILABLE = torch.cuda.is_available()
|
62 |
+
|
63 |
+
kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
|
64 |
+
kpipeline = KPipeline(lang_code="a") # English voices
|
65 |
+
|
66 |
+
MALE_VOICE = "am_fenrir"
|
67 |
+
FEMALE_VOICE = "af_heart"
|
68 |
+
|
69 |
+
# Pre‑warm voices to avoid first‑call latency
|
70 |
+
for v in (MALE_VOICE, FEMALE_VOICE):
|
71 |
+
kpipeline.load_voice(v)
|
72 |
+
|
73 |
+
@spaces.GPU
|
74 |
+
def generate_podcast(url: str, pdf_path: str, topic: str):
|
75 |
+
if pdf_path:
|
76 |
+
with pymupdf.open(pdf_path) as pdf_doc:
|
77 |
+
material_text = ""
|
78 |
+
for page in pdf_doc:
|
79 |
+
material_text += page.get_text()
|
80 |
+
elif url:
|
81 |
+
response = requests.get(f'https://r.jina.ai/{url}')
|
82 |
+
material_text = response.text
|
83 |
+
else:
|
84 |
+
raise gr.Error("Please provide either a URL or upload a PDF file to generate a podcast.")
|
85 |
+
|
86 |
+
# Generate podcast script!
|
87 |
+
podcast_script = generate_podcast_script(material_text, topic)
|
88 |
+
|
89 |
+
lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
|
90 |
+
|
91 |
+
pipeline = kpipeline
|
92 |
+
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
|
93 |
+
pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
|
94 |
+
|
95 |
+
speed = 1.
|
96 |
+
sr = 24000
|
97 |
+
|
98 |
+
for line in lines:
|
99 |
+
# Expect "[S1] ..." or "[S2] ..."
|
100 |
+
if line.startswith("[MIKE]"):
|
101 |
+
pipeline_voice = pipeline_voice_male
|
102 |
+
voice = MALE_VOICE
|
103 |
+
utterance = line[len("[MIKE]"):].strip()
|
104 |
+
elif line.startswith("[JANE]"):
|
105 |
+
pipeline_voice = pipeline_voice_female
|
106 |
+
voice = FEMALE_VOICE
|
107 |
+
utterance = line[len("[JANE]"):].strip()
|
108 |
+
else: # fallback
|
109 |
+
pipeline_voice = pipeline_voice_female
|
110 |
+
voice = FEMALE_VOICE
|
111 |
+
utterance = line
|
112 |
+
|
113 |
+
for _, ps, _ in pipeline(utterance, voice, speed):
|
114 |
+
t0 = time.time()
|
115 |
+
ref_s = pipeline_voice[len(ps) - 1]
|
116 |
+
audio_numpy = kmodel(ps, ref_s, speed).numpy()
|
117 |
+
yield (sr, audio_numpy)
|
118 |
+
t1 = time.time()
|
119 |
+
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
|
120 |
+
|
121 |
+
EXAMPLES = [
|
122 |
+
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
|
123 |
+
[None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
|
124 |
+
]
|
125 |
+
|
126 |
+
demo = gr.Interface(
|
127 |
+
title="Open NotebookLM 🎙️",
|
128 |
+
description="""Generates a podcast discussion between two hosts about the materials of your choice.
|
129 |
+
Upload a PDF or provide a webpage URL to create your podcast discussion.
|
130 |
+
Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""",
|
131 |
+
fn=generate_podcast,
|
132 |
+
inputs=[
|
133 |
+
gr.Textbox(
|
134 |
+
label="🔗 Type a Webpage URL to discuss it (Optional)",
|
135 |
+
placeholder="The URL you want to discuss the content for.",
|
136 |
+
),
|
137 |
+
gr.File(
|
138 |
+
label="Upload a PDF as discussion material (Optional)",
|
139 |
+
file_types=[".pdf"],
|
140 |
+
file_count="single",
|
141 |
+
),
|
142 |
+
gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."),
|
143 |
+
],
|
144 |
+
outputs=[
|
145 |
+
gr.Audio(
|
146 |
+
label="Listen to your podcast! 🔊",
|
147 |
+
format="wav",
|
148 |
+
streaming=True,
|
149 |
+
),
|
150 |
+
],
|
151 |
+
theme=gr.themes.Soft(),
|
152 |
+
submit_btn="Generate podcast 🎙️",
|
153 |
+
# clear_btn=gr.Button("🗑️"),
|
154 |
+
examples=EXAMPLES,
|
155 |
+
cache_examples=True,
|
156 |
+
)
|
157 |
+
|
158 |
+
if __name__ == "__main__":
|
159 |
+
demo.launch()
|