Enhance generate_audio function: Update parameters, add documentation, and implement audio generation using MusicGen
Browse files
app.py
CHANGED
@@ -151,9 +151,30 @@ def generate_image(prompt: str, neg_prompt: str) -> Image.Image:
|
|
151 |
return gr.Image(value=image, label="Generated Image")
|
152 |
|
153 |
@tool
|
154 |
-
def generate_audio(prompt: str, duration: int
|
155 |
"""
|
156 |
Generate audio from a text prompt using MusicGen.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
Args:
|
158 |
prompt: The text prompt to generate the audio from.
|
159 |
duration: Duration of the generated audio in seconds.
|
@@ -187,7 +208,8 @@ class Agent:
|
|
187 |
model=client,
|
188 |
tools=[DuckDuckGoSearchTool(max_results=5),
|
189 |
VisitWebpageTool(max_output_length=20000),
|
190 |
-
generate_image,
|
|
|
191 |
generate_audio,
|
192 |
download_images,
|
193 |
transcribe_audio],
|
|
|
151 |
return gr.Image(value=image, label="Generated Image")
|
152 |
|
153 |
@tool
|
154 |
+
def generate_audio(prompt: str, duration: int) -> gr.Component:
|
155 |
"""
|
156 |
Generate audio from a text prompt using MusicGen.
|
157 |
+
Args:
|
158 |
+
prompt: The text prompt to generate the audio from.
|
159 |
+
duration: Duration of the generated audio in seconds.
|
160 |
+
|
161 |
+
Returns:
|
162 |
+
gr.Component: The generated audio as a Gradio Audio component.
|
163 |
+
"""
|
164 |
+
client = Tool.from_space(
|
165 |
+
space_id="luke9705/MusicGen_custom",
|
166 |
+
token=os.environ.get('HF_TOKEN'),
|
167 |
+
name="Sound_Generator",
|
168 |
+
description="Generate music or sound effects from a text prompt using MusicGen."
|
169 |
+
)
|
170 |
+
sound = client(prompt, duration)
|
171 |
+
|
172 |
+
return gr.Audio(value=sound)
|
173 |
+
|
174 |
+
@tool
|
175 |
+
def generate_audio_from_sample(prompt: str, duration: int, sample: list[int, np.ndarray] = None) -> gr.Component:
|
176 |
+
"""
|
177 |
+
Generate audio from a text prompt + audio sample using MusicGen.
|
178 |
Args:
|
179 |
prompt: The text prompt to generate the audio from.
|
180 |
duration: Duration of the generated audio in seconds.
|
|
|
208 |
model=client,
|
209 |
tools=[DuckDuckGoSearchTool(max_results=5),
|
210 |
VisitWebpageTool(max_output_length=20000),
|
211 |
+
generate_image,
|
212 |
+
generate_audio_from_sample,
|
213 |
generate_audio,
|
214 |
download_images,
|
215 |
transcribe_audio],
|