Spaces:

declare-lab
/

JAM

Running on Zero

App Files Files Community

renhang commited on 17 days ago

Commit

a1ddd2f

1 Parent(s): deaa9a6

update

Browse files

Files changed (2) hide show

app.py +8 -3
utils.py +31 -5

app.py CHANGED Viewed

@@ -95,6 +95,11 @@ def load_example(example_idx, examples):
 # Load examples at startup
 examples = load_examples()
 # Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Jamify: Music Generation from Lyrics and Style")
@@ -116,12 +121,12 @@ with gr.Blocks() as demo:
                 label="Lyrics",
                 lines=10,
                 placeholder="Enter lyrics in format: word[start:end] word[start:end]...\nExample: It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16] way[6.16:6.48]...",
-                value=""
             )
-            duration_slider = gr.Slider(minimum=5, maximum=230, value=120, step=30, label="Duration (seconds)")
             with gr.Tab("Style from Audio"):
-                reference_audio = gr.File(label="Reference Audio (.mp3, .wav)", type="filepath")
             with gr.Tab("Style from Text"):
                 style_prompt = gr.Textbox(label="Style Prompt", lines=3, placeholder="e.g., A high-energy electronic dance track with a strong bassline and euphoric synths.")

 # Load examples at startup
 examples = load_examples()
+# Get default values from first example
+default_audio = examples[0]['audio_path'] if examples else None
+default_lyrics = examples[0]['lyrics_text'] if examples else ""
+default_duration = examples[0]['duration'] if examples else 120
 # Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Jamify: Music Generation from Lyrics and Style")
                 label="Lyrics",
                 lines=10,
                 placeholder="Enter lyrics in format: word[start:end] word[start:end]...\nExample: It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16] way[6.16:6.48]...",
+                value=default_lyrics
             )
+            duration_slider = gr.Slider(minimum=5, maximum=230, value=default_duration, step=30, label="Duration (seconds)")
             with gr.Tab("Style from Audio"):
+                reference_audio = gr.File(label="Reference Audio (.mp3, .wav)", type="filepath", value=default_audio)
             with gr.Tab("Style from Text"):
                 style_prompt = gr.Textbox(label="Style Prompt", lines=3, placeholder="e.g., A high-energy electronic dance track with a strong bassline and euphoric synths.")

utils.py CHANGED Viewed

@@ -95,7 +95,10 @@ def words_to_text(words: list[dict]) -> str:
         word_text = word.get('word', '')
         start = word.get('start', 0.0)
         end = word.get('end', 0.0)
-        text_parts.append(f"{word_text}[{start}:{end}]")
     return " ".join(text_parts)
@@ -104,30 +107,53 @@ def json_to_text(json_data: dict) -> str:
     """
     Convert JSON lyrics data to text format for display.
     Only uses the 'word' layer from the JSON structure.
     Args:
         json_data: Dictionary with 'word' key containing list of word objects
     Returns:
-        String in format "word[start:end] word[start:end]..."
     """
     if not isinstance(json_data, dict) or 'word' not in json_data:
         return ""
     words = json_data['word']
-    return words_to_text(words)
 def text_to_json(text: str) -> dict:
     """
     Convert text format to JSON structure expected by the model.
     Creates the 'word' layer that the model needs.
     Args:
-        text: String in format "word[start:end] word[start:end]..."
     Returns:
         Dictionary with 'word' key containing list of word objects
     """
-    words = text_to_words(text)
     return {"word": words}

         word_text = word.get('word', '')
         start = word.get('start', 0.0)
         end = word.get('end', 0.0)
+        # Format timestamps to max 2 decimal places
+        start_str = f"{start:.2f}".rstrip('0').rstrip('.')
+        end_str = f"{end:.2f}".rstrip('0').rstrip('.')
+        text_parts.append(f"{word_text}[{start_str}:{end_str}]")
     return " ".join(text_parts)
     """
     Convert JSON lyrics data to text format for display.
     Only uses the 'word' layer from the JSON structure.
+    Groups words into sentences/lines for better readability.
     Args:
         json_data: Dictionary with 'word' key containing list of word objects
     Returns:
+        String with words grouped into lines: "word[start:end] word[start:end]...\nword[start:end]..."
     """
     if not isinstance(json_data, dict) or 'word' not in json_data:
         return ""
     words = json_data['word']
+    # Group words into segments using the existing regroup_words function
+    segments = regroup_words(words, max_len=15.0, gap=0.50)
+    # Convert each segment to text format
+    segment_lines = []
+    for seg in segments:
+        # Extract words for this segment based on time range
+        seg_words = []
+        for word in words:
+            if seg['start'] <= word['start'] < seg['end'] or (
+                word['start'] <= seg['start'] < word['end']
+            ):
+                seg_words.append(word)
+        if seg_words:
+            segment_text = words_to_text(seg_words)
+            segment_lines.append(segment_text)
+    return '\n'.join(segment_lines)
 def text_to_json(text: str) -> dict:
     """
     Convert text format to JSON structure expected by the model.
     Creates the 'word' layer that the model needs.
+    Handles multi-line input by joining lines.
     Args:
+        text: String in format "word[start:end] word[start:end]..." (can be multi-line)
     Returns:
         Dictionary with 'word' key containing list of word objects
     """
+    # Join multiple lines into single line for parsing
+    single_line_text = ' '.join(line.strip() for line in text.split('\n') if line.strip())
+    words = text_to_words(single_line_text)
     return {"word": words}