Spaces:
Running
on
Zero
Running
on
Zero
renhang
commited on
Commit
·
1e7fc7e
1
Parent(s):
3db0011
update
Browse files- app.py +28 -6
- jam_infer.yaml +2 -2
app.py
CHANGED
@@ -7,12 +7,14 @@ import requests
|
|
7 |
import subprocess
|
8 |
from pathlib import Path
|
9 |
import torchaudio
|
|
|
|
|
10 |
|
11 |
-
from model import Jamify
|
12 |
from utils import json_to_text, text_to_json, convert_text_time_to_beats, convert_text_beats_to_time, convert_text_beats_to_time_with_regrouping, text_to_words, beats_to_text_with_regrouping, round_to_quarter_beats
|
13 |
|
14 |
def crop_audio_to_30_seconds(audio_path):
|
15 |
-
"""Crop audio to first 30 seconds and return path to temporary cropped file"""
|
16 |
if not audio_path or not os.path.exists(audio_path):
|
17 |
return None
|
18 |
|
@@ -29,11 +31,20 @@ def crop_audio_to_30_seconds(audio_path):
|
|
29 |
else:
|
30 |
cropped_waveform = waveform
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
# Save to temporary file
|
33 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
34 |
temp_path = temp_file.name
|
35 |
|
36 |
-
torchaudio.save(temp_path,
|
37 |
return temp_path
|
38 |
|
39 |
except Exception as e:
|
@@ -196,7 +207,18 @@ default_audio_display = crop_audio_to_30_seconds(default_audio) if default_audio
|
|
196 |
# Gradio interface
|
197 |
with gr.Blocks() as demo:
|
198 |
gr.Markdown("# Jamify: Music Generation from Lyrics and Style")
|
199 |
-
gr.Markdown("Provide your lyrics,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
# State to track selected example (-1 means "Make Your Own" is selected, 0 is first example)
|
202 |
selected_example = gr.State(0 if examples else -1)
|
@@ -211,8 +233,8 @@ with gr.Blocks() as demo:
|
|
211 |
with gr.Row():
|
212 |
example_buttons = []
|
213 |
for i, example in enumerate(examples):
|
214 |
-
# Use consistent button width
|
215 |
-
button_text = example['id'][:
|
216 |
# First button starts as primary (selected), others as secondary
|
217 |
initial_variant = "primary" if i == 0 else "secondary"
|
218 |
button = gr.Button(
|
|
|
7 |
import subprocess
|
8 |
from pathlib import Path
|
9 |
import torchaudio
|
10 |
+
import torch
|
11 |
+
import pyloudnorm as pyln
|
12 |
|
13 |
+
from model import Jamify, normalize_audio
|
14 |
from utils import json_to_text, text_to_json, convert_text_time_to_beats, convert_text_beats_to_time, convert_text_beats_to_time_with_regrouping, text_to_words, beats_to_text_with_regrouping, round_to_quarter_beats
|
15 |
|
16 |
def crop_audio_to_30_seconds(audio_path):
|
17 |
+
"""Crop audio to first 30 seconds, normalize, and return path to temporary cropped file"""
|
18 |
if not audio_path or not os.path.exists(audio_path):
|
19 |
return None
|
20 |
|
|
|
31 |
else:
|
32 |
cropped_waveform = waveform
|
33 |
|
34 |
+
# Resample to 44100 Hz if needed (to match prediction pipeline)
|
35 |
+
if sample_rate != 44100:
|
36 |
+
resampler = torchaudio.transforms.Resample(sample_rate, 44100)
|
37 |
+
cropped_waveform = resampler(cropped_waveform)
|
38 |
+
sample_rate = 44100
|
39 |
+
|
40 |
+
# Apply the same normalization as the prediction pipeline
|
41 |
+
normalized_waveform = normalize_audio(cropped_waveform)
|
42 |
+
|
43 |
# Save to temporary file
|
44 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
45 |
temp_path = temp_file.name
|
46 |
|
47 |
+
torchaudio.save(temp_path, normalized_waveform, sample_rate)
|
48 |
return temp_path
|
49 |
|
50 |
except Exception as e:
|
|
|
207 |
# Gradio interface
|
208 |
with gr.Blocks() as demo:
|
209 |
gr.Markdown("# Jamify: Music Generation from Lyrics and Style")
|
210 |
+
gr.Markdown("Provide your lyrics, an audio style reference, and a desired duration to generate a song.")
|
211 |
+
|
212 |
+
# Helpful reminder for users
|
213 |
+
gr.Markdown("""
|
214 |
+
💡 **Demo Tip**: Don't start from scratch! Use the sample examples below as templates:
|
215 |
+
- Click any sample to load its lyrics and audio style
|
216 |
+
- **Edit the lyrics**: Change words, modify timing, or adjust the structure
|
217 |
+
- **Experiment with timing**: Try different word durations or beats
|
218 |
+
- **Mix and match**: Use lyrics from one example with audio style from another
|
219 |
+
|
220 |
+
This approach is much easier than creating everything from zero!
|
221 |
+
""")
|
222 |
|
223 |
# State to track selected example (-1 means "Make Your Own" is selected, 0 is first example)
|
224 |
selected_example = gr.State(0 if examples else -1)
|
|
|
233 |
with gr.Row():
|
234 |
example_buttons = []
|
235 |
for i, example in enumerate(examples):
|
236 |
+
# Use consistent button width with 10 character limit
|
237 |
+
button_text = example['id'][:10] if len(example['id']) <= 10 else example['id'][:9] + "…"
|
238 |
# First button starts as primary (selected), others as secondary
|
239 |
initial_variant = "primary" if i == 0 else "secondary"
|
240 |
button = gr.Button(
|
jam_infer.yaml
CHANGED
@@ -23,10 +23,10 @@ evaluation:
|
|
23 |
cfg_range:
|
24 |
- 0.05
|
25 |
- 1
|
26 |
-
fix_dual_cfg: true
|
27 |
dual_cfg:
|
28 |
- 4.7
|
29 |
-
- 2.
|
30 |
steps: 50
|
31 |
|
32 |
model:
|
|
|
23 |
cfg_range:
|
24 |
- 0.05
|
25 |
- 1
|
26 |
+
# fix_dual_cfg: true
|
27 |
dual_cfg:
|
28 |
- 4.7
|
29 |
+
- 2.5
|
30 |
steps: 50
|
31 |
|
32 |
model:
|