hungchiayu commited on
Commit
30f9d01
·
1 Parent(s): cbec30f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -24
app.py CHANGED
@@ -31,15 +31,15 @@ def gradio_generate(prompt, steps, guidance,duration=10):
31
 
32
 
33
  #wavio.write(output_filename, output_wave, rate=44100, sampwidth=2)
34
- unique_filename = f"output_{uuid.uuid4().hex}.wav"
35
- print(f"Saving audio to file: {unique_filename}")
36
 
37
  # Save to file
38
- torchaudio.save(unique_filename, output, 44100)
39
  print(f"Audio saved: {unique_filename}")
40
 
41
  # Return the path to the generated audio file
42
- return unique_filename
43
 
44
  #if (output_format == "mp3"):
45
  # AudioSegment.from_wav("temp.wav").export("temp.mp3", format = "mp3")
@@ -61,26 +61,47 @@ denoising_steps = gr.Slider(minimum=10, maximum=100, value=25, step=5, label="St
61
  guidance_scale = gr.Slider(minimum=1, maximum=10, value=4.5, step=0.5, label="Guidance Scale", interactive=True)
62
  duration_scale = gr.Slider(minimum=1, maximum=30, value=10, step=1, label="Duration", interactive=True)
63
 
64
- interface = gr.Interface(
 
 
65
  fn=gradio_generate,
66
- inputs=[
67
- gr.Textbox(label="Prompt", placeholder="Enter your text prompt here"),
68
- gr.Slider(0, 30, value=10, label="Duration in Seconds"),
69
- gr.Slider(10, 150, value=50, step=5, label="Number of Diffusion Steps"),
70
- gr.Slider(1, 10, value=4.5, step=0.5, label="CFG Scale")
71
- ],
72
- outputs=gr.Audio(type="filepath", label="Generated Audio"),
73
- title="TangoFlux Generator",
74
- description="Generate variable-length stereo audio at 44.1kHz from text prompts using TangoFlux.",
75
  examples=[
76
- [
77
- "Create a serene soundscape of a quiet beach at sunset.", # Text prompt
78
-
79
- 15, # Duration in Seconds
80
- 50, # Number of Diffusion Steps
81
- 4.5, # CFG Scale
82
- ]
83
-
84
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- interface.launch()
 
31
 
32
 
33
  #wavio.write(output_filename, output_wave, rate=44100, sampwidth=2)
34
+ filename = 'temp.wav'
35
+ #print(f"Saving audio to file: {unique_filename}")
36
 
37
  # Save to file
38
+ torchaudio.save(filename, output, 44100)
39
  print(f"Audio saved: {unique_filename}")
40
 
41
  # Return the path to the generated audio file
42
+ return filename
43
 
44
  #if (output_format == "mp3"):
45
  # AudioSegment.from_wav("temp.wav").export("temp.mp3", format = "mp3")
 
61
  guidance_scale = gr.Slider(minimum=1, maximum=10, value=4.5, step=0.5, label="Guidance Scale", interactive=True)
62
  duration_scale = gr.Slider(minimum=1, maximum=30, value=10, step=1, label="Duration", interactive=True)
63
 
64
+
65
+ # Gradio interface
66
+ gr_interface = gr.Interface(
67
  fn=gradio_generate,
68
+ inputs=[input_text, denoising_steps, guidance_scale,duration_scale],
69
+ outputs=output_audio,
70
+ title="TangoFlux: ",
71
+ description=description_text,
72
+ allow_flagging=False,
73
+
74
+
75
+
76
+
77
  examples=[
78
+ ["Quiet speech and then and airplane flying away"],
79
+ ["A bicycle peddling on dirt and gravel followed by a man speaking then laughing"],
80
+ ["Ducks quack and water splashes with some animal screeching in the background"],
81
+ ["Describe the sound of the ocean"],
82
+ ["A woman and a baby are having a conversation"],
83
+ ["A man speaks followed by a popping noise and laughter"],
84
+ ["A cup is filled from a faucet"],
85
+ ["An audience cheering and clapping"],
86
+ ["Rolling thunder with lightning strikes"],
87
+ ["A dog barking and a cat mewing and a racing car passes by"],
88
+ ["Gentle water stream, birds chirping and sudden gun shot"],
89
+ ["A man talking followed by a goat baaing then a metal gate sliding shut as ducks quack and wind blows into a microphone."],
90
+ ["A dog barking"],
91
+ ["A cat meowing"],
92
+ ["Wooden table tapping sound while water pouring"],
93
+ ["Applause from a crowd with distant clicking and a man speaking over a loudspeaker"],
94
+ ["two gunshots followed by birds flying away while chirping"],
95
+ ["Whistling with birds chirping"],
96
+ ["A person snoring"],
97
+ ["Motor vehicles are driving with loud engines and a person whistles"],
98
+ ["People cheering in a stadium while thunder and lightning strikes"],
99
+ ["A helicopter is in flight"],
100
+ ["A dog barking and a man talking and a racing car passes by"],
101
+ ],
102
+ cache_examples="lazy", # Turn on to cache.
103
+ )
104
+
105
+
106
 
107
+ gr_interface.queue(15).launch()