SlouchyBuffalo commited on
Commit
c7a8464
·
verified ·
1 Parent(s): 99fe1ff

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import queue
2
+ import threading
3
+ import spaces
4
+ import os
5
+ import io
6
+ import soundfile as sf
7
+ import gradio as gr
8
+ import numpy as np
9
+ import time
10
+ import pymupdf
11
+ import requests
12
+ from pathlib import Path
13
+
14
+ import torch
15
+ from huggingface_hub import InferenceClient
16
+ from kokoro import KModel, KPipeline
17
+
18
+ # -----------------------------------------------------------------------------
19
+ # Download example PDF
20
+ # -----------------------------------------------------------------------------
21
+ os.makedirs("examples", exist_ok=True)
22
+ response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf")
23
+ with open("examples/Essay_Palantir.pdf", 'wb') as f:
24
+ f.write(response.content)
25
+
26
+ # -----------------------------------------------------------------------------
27
+ # LLM that writes the script
28
+ # -----------------------------------------------------------------------------
29
+ from prompts import SYSTEM_PROMPT
30
+
31
+ client = InferenceClient(
32
+ "meta-llama/Llama-3.3-70B-Instruct",
33
+ provider="cerebras",
34
+ token=os.getenv("HF_TOKEN"),
35
+ )
36
+
37
+
38
+ def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
39
+ """Ask the LLM for a script of a podcast given by two hosts."""
40
+ messages = [
41
+ {"role": "system", "content": SYSTEM_PROMPT},
42
+ {"role": "user", "content": f"""Please analyze this content and create an engaging podcast discussion:
43
+ {subject[:10000]}"""},
44
+ ]
45
+ if steering_question and len(steering_question) > 0:
46
+ messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
47
+
48
+ response = client.chat_completion(
49
+ messages,
50
+ max_tokens=8156,
51
+ )
52
+ full_text = response.choices[0].message.content
53
+ assert "[JANE]" in full_text
54
+ dialogue_start_index = full_text.find("[JANE]")
55
+ podcast_text = full_text[dialogue_start_index:]
56
+ return podcast_text
57
+
58
+ # -----------------------------------------------------------------------------
59
+ # Kokoro TTS
60
+ # -----------------------------------------------------------------------------
61
+ CUDA_AVAILABLE = torch.cuda.is_available()
62
+
63
+ kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
64
+ kpipeline = KPipeline(lang_code="a") # English voices
65
+
66
+ MALE_VOICE = "am_fenrir"
67
+ FEMALE_VOICE = "af_heart"
68
+
69
+ # Pre‑warm voices to avoid first‑call latency
70
+ for v in (MALE_VOICE, FEMALE_VOICE):
71
+ kpipeline.load_voice(v)
72
+
73
+ @spaces.GPU
74
+ def generate_podcast(url: str, pdf_path: str, topic: str):
75
+ if pdf_path:
76
+ with pymupdf.open(pdf_path) as pdf_doc:
77
+ material_text = ""
78
+ for page in pdf_doc:
79
+ material_text += page.get_text()
80
+ elif url:
81
+ response = requests.get(f'https://r.jina.ai/{url}')
82
+ material_text = response.text
83
+ else:
84
+ raise gr.Error("Please provide either a URL or upload a PDF file to generate a podcast.")
85
+
86
+ # Generate podcast script!
87
+ podcast_script = generate_podcast_script(material_text, topic)
88
+
89
+ lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
90
+
91
+ pipeline = kpipeline
92
+ pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
93
+ pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
94
+
95
+ speed = 1.
96
+ sr = 24000
97
+
98
+ for line in lines:
99
+ # Expect "[S1] ..." or "[S2] ..."
100
+ if line.startswith("[MIKE]"):
101
+ pipeline_voice = pipeline_voice_male
102
+ voice = MALE_VOICE
103
+ utterance = line[len("[MIKE]"):].strip()
104
+ elif line.startswith("[JANE]"):
105
+ pipeline_voice = pipeline_voice_female
106
+ voice = FEMALE_VOICE
107
+ utterance = line[len("[JANE]"):].strip()
108
+ else: # fallback
109
+ pipeline_voice = pipeline_voice_female
110
+ voice = FEMALE_VOICE
111
+ utterance = line
112
+
113
+ for _, ps, _ in pipeline(utterance, voice, speed):
114
+ t0 = time.time()
115
+ ref_s = pipeline_voice[len(ps) - 1]
116
+ audio_numpy = kmodel(ps, ref_s, speed).numpy()
117
+ yield (sr, audio_numpy)
118
+ t1 = time.time()
119
+ print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
120
+
121
+ EXAMPLES = [
122
+ ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
123
+ [None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
124
+ ]
125
+
126
+ demo = gr.Interface(
127
+ title="Open NotebookLM 🎙️",
128
+ description="""Generates a podcast discussion between two hosts about the materials of your choice.
129
+ Upload a PDF or provide a webpage URL to create your podcast discussion.
130
+ Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""",
131
+ fn=generate_podcast,
132
+ inputs=[
133
+ gr.Textbox(
134
+ label="🔗 Type a Webpage URL to discuss it (Optional)",
135
+ placeholder="The URL you want to discuss the content for.",
136
+ ),
137
+ gr.File(
138
+ label="Upload a PDF as discussion material (Optional)",
139
+ file_types=[".pdf"],
140
+ file_count="single",
141
+ ),
142
+ gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."),
143
+ ],
144
+ outputs=[
145
+ gr.Audio(
146
+ label="Listen to your podcast! 🔊",
147
+ format="wav",
148
+ streaming=True,
149
+ ),
150
+ ],
151
+ theme=gr.themes.Soft(),
152
+ submit_btn="Generate podcast 🎙️",
153
+ # clear_btn=gr.Button("🗑️"),
154
+ examples=EXAMPLES,
155
+ cache_examples=True,
156
+ )
157
+
158
+ if __name__ == "__main__":
159
+ demo.launch()