SlouchyBuffalo commited on
Commit
872db68
·
verified ·
1 Parent(s): 40ec708

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import queue
2
+ import threading
3
+ import spaces
4
+ import os
5
+ import io
6
+ import soundfile as sf
7
+ import gradio as gr
8
+ import numpy as np
9
+ import time
10
+ import pymupdf
11
+ import requests
12
+ from pathlib import Path
13
+
14
+ import torch
15
+ from huggingface_hub import InferenceClient
16
+ from kokoro import KModel, KPipeline
17
+ # -----------------------------------------------------------------------------
18
+ # Get default podcast materials, from Daily papers and one download
19
+ # -----------------------------------------------------------------------------
20
+ from papers import PaperManager
21
+
22
+ paper_manager = PaperManager()
23
+ top_papers = paper_manager.get_top_content()
24
+
25
+ PODCAST_SUBJECT = list(top_papers.values())[0]
26
+
27
+ os.makedirs("examples", exist_ok=True)
28
+ response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf")
29
+ with open("examples/Essay_Palantir.pdf", 'wb') as f:
30
+ f.write(response.content)
31
+
32
+ # -----------------------------------------------------------------------------
33
+ # LLM that writes the script (unchanged)
34
+ # -----------------------------------------------------------------------------
35
+ from prompts import SYSTEM_PROMPT
36
+
37
+ client = InferenceClient(
38
+ "meta-llama/Llama-3.3-70B-Instruct",
39
+ provider="cerebras",
40
+ token=os.getenv("HF_TOKEN"),
41
+ )
42
+
43
+
44
+ def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
45
+ """Ask the LLM for a script of a podcast given by two hosts."""
46
+ messages = [
47
+ {"role": "system", "content": SYSTEM_PROMPT},
48
+ {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
49
+ {subject[:10000]}"""},
50
+ ]
51
+ if steering_question and len(steering_question) > 0:
52
+ messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
53
+
54
+ response = client.chat_completion(
55
+ messages,
56
+ max_tokens=8156,
57
+ )
58
+ full_text = response.choices[0].message.content
59
+ assert "[JANE]" in full_text
60
+ dialogue_start_index = full_text.find("[JANE]")
61
+ podcast_text = full_text[dialogue_start_index:]
62
+ return podcast_text
63
+
64
+ # -----------------------------------------------------------------------------
65
+ # Kokoro TTS
66
+ # -----------------------------------------------------------------------------
67
+ CUDA_AVAILABLE = torch.cuda.is_available()
68
+
69
+ kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
70
+ kpipeline = KPipeline(lang_code="a") # English voices
71
+
72
+ MALE_VOICE = "am_fenrir"
73
+ FEMALE_VOICE = "af_heart"
74
+
75
+ # Pre‑warm voices to avoid first‑call latency
76
+ for v in (MALE_VOICE, FEMALE_VOICE):
77
+ kpipeline.load_voice(v)
78
+
79
+ @spaces.GPU
80
+ def generate_podcast(url: str, pdf_path: str, topic: str):
81
+ if pdf_path:
82
+ with pymupdf.open(pdf_path) as pdf_doc:
83
+ material_text = ""
84
+ for page in pdf_doc:
85
+ material_text += page.get_text()
86
+ elif url:
87
+ response = requests.get(f'https://r.jina.ai/{url}')
88
+ material_text = response.text
89
+ else:
90
+ material_text = PODCAST_SUBJECT
91
+
92
+ # Generate podcast script!
93
+ podcast_script = generate_podcast_script(material_text, topic)
94
+
95
+ lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
96
+
97
+ pipeline = kpipeline
98
+ pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
99
+ pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
100
+
101
+ speed = 1.
102
+ sr = 24000
103
+
104
+ for line in lines:
105
+ # Expect "[S1] ..." or "[S2] ..."
106
+ if line.startswith("[MIKE]"):
107
+ pipeline_voice = pipeline_voice_male
108
+ voice = MALE_VOICE
109
+ utterance = line[len("[MIKE]"):].strip()
110
+ elif line.startswith("[JANE]"):
111
+ pipeline_voice = pipeline_voice_female
112
+ voice = FEMALE_VOICE
113
+ utterance = line[len("[JANE]"):].strip()
114
+ else: # fallback
115
+ pipeline_voice = pipeline_voice_female
116
+ voice = FEMALE_VOICE
117
+ utterance = line
118
+
119
+ for _, ps, _ in pipeline(utterance, voice, speed):
120
+ t0 = time.time()
121
+ ref_s = pipeline_voice[len(ps) - 1]
122
+ audio_numpy = kmodel(ps, ref_s, speed).numpy()
123
+ yield (sr, audio_numpy)
124
+ t1 = time.time()
125
+ print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
126
+
127
+ EXAMPLES = [
128
+ ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
129
+ [None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
130
+ ]
131
+ demo = gr.Interface(
132
+ title="Open NotebookLM 🎙️",
133
+ description=f"""Generates a podcast discussion between two hosts about the materials of your choice.
134
+ If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**'
135
+ Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""",
136
+ fn=generate_podcast,
137
+ inputs=[
138
+ gr.Textbox(
139
+ label="🔗 Type a Webpage URL to discuss it (Optional)",
140
+ placeholder="The URL you want to discuss the content for.",
141
+ ),
142
+ gr.File(
143
+ label="Upload a PDF as discussion material (Optional)",
144
+ file_types=[".pdf"],
145
+ file_count="single",
146
+ ),
147
+ gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."),
148
+ ],
149
+ outputs=[
150
+ gr.Audio(
151
+ label="Listen to your podcast! 🔊",
152
+ format="wav",
153
+ streaming=True,
154
+ ),
155
+ ],
156
+ theme=gr.themes.Soft(),
157
+ submit_btn="Generate podcast 🎙️",
158
+ # clear_btn=gr.Button("🗑️"),
159
+ examples=EXAMPLES,
160
+ cache_examples=True,
161
+ )
162
+
163
+ if __name__ == "__main__":
164
+ demo.launch()