reach-vb HF Staff fffiloni commited on
Commit
6b7f89f
·
0 Parent(s):

Duplicate from fffiloni/animated-audio-visualizer-1024

Browse files

Co-authored-by: Sylvain Filoni <fffiloni@users.noreply.huggingface.co>

Files changed (6) hide show
  1. .gitattributes +34 -0
  2. Lato-Regular.ttf +0 -0
  3. README.md +13 -0
  4. app.py +214 -0
  5. black_cache.png +0 -0
  6. requirements.txt +4 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Lato-Regular.ttf ADDED
Binary file (72.3 kB). View file
 
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Animated Audio Visualizer 1024
3
+ emoji: 📊🎶
4
+ colorFrom: green
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.39.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: fffiloni/animated-audio-visualizer-1024
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import matplotlib.pyplot as plt
3
+ import librosa
4
+ import numpy as np
5
+ from PIL import Image, ImageDraw, ImageFont
6
+ from moviepy.editor import *
7
+ from moviepy.video.io.VideoFileClip import VideoFileClip
8
+
9
+ def make_bars_image(height_values, index, new_height):
10
+
11
+ # Define the size of the image
12
+ width = 1024
13
+ height = new_height
14
+
15
+ # Create a new image with a transparent background
16
+ image = Image.new('RGBA', (width, height), color=(0, 0, 0, 0))
17
+
18
+ # Get the image drawing context
19
+ draw = ImageDraw.Draw(image)
20
+
21
+ # Define the rectangle width and spacing
22
+ rect_width = 4
23
+ spacing = 4
24
+
25
+ # Define the list of height values for the rectangles
26
+ #height_values = [20, 40, 60, 80, 100, 80, 60, 40]
27
+ num_bars = len(height_values)
28
+ # Calculate the total width of the rectangles and the spacing
29
+ total_width = num_bars * rect_width + (num_bars - 1) * spacing
30
+
31
+ # Calculate the starting position for the first rectangle
32
+ start_x = int((width - total_width) / 2)
33
+ # Define the buffer size
34
+ buffer_size = int(80 * 2)
35
+ # Draw the rectangles from left to right
36
+ x = start_x
37
+ for i, height in enumerate(height_values):
38
+
39
+ # Define the rectangle coordinates
40
+ y0 = buffer_size
41
+ y1 = height + buffer_size
42
+ x0 = x
43
+ x1 = x + rect_width
44
+
45
+ # Draw the rectangle
46
+ draw.rectangle([x0, y0, x1, y1], fill='white')
47
+
48
+ # Move to the next rectangle position
49
+ if i < num_bars - 1:
50
+ x += rect_width + spacing
51
+
52
+
53
+ # Rotate the image by 180 degrees
54
+ image = image.rotate(180)
55
+
56
+ # Mirror the image
57
+ image = image.transpose(Image.FLIP_LEFT_RIGHT)
58
+
59
+ # Save the image
60
+ image.save('audio_bars_'+ str(index) + '.png')
61
+
62
+ return 'audio_bars_'+ str(index) + '.png'
63
+
64
+ def db_to_height(db_value):
65
+ # Scale the dB value to a range between 0 and 1
66
+ scaled_value = (db_value + 80) / 80
67
+
68
+ # Convert the scaled value to a height between 0 and 100
69
+ height = scaled_value * 50
70
+
71
+ return height
72
+
73
+ def infer(title, audio_in, image_in, output_video_path):
74
+ # Load the audio file
75
+ audio_path = audio_in
76
+ audio_data, sr = librosa.load(audio_path)
77
+
78
+ # Get the duration in seconds
79
+ duration = librosa.get_duration(y=audio_data, sr=sr)
80
+
81
+ # Extract the audio data for the desired time
82
+ start_time = 0 # start time in seconds
83
+ end_time = duration # end time in seconds
84
+
85
+ start_index = int(start_time * sr)
86
+ end_index = int(end_time * sr)
87
+
88
+ audio_data = audio_data[start_index:end_index]
89
+
90
+ # Compute the short-time Fourier transform
91
+ hop_length = 1024
92
+
93
+
94
+ stft = librosa.stft(audio_data, hop_length=hop_length)
95
+ spectrogram = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
96
+
97
+ # Get the frequency values
98
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=stft.shape[0])
99
+
100
+ # Select the indices of the frequency values that correspond to the desired frequencies
101
+ n_freqs = 114
102
+ freq_indices = np.linspace(0, len(freqs) - 1, n_freqs, dtype=int)
103
+
104
+ # Extract the dB values for the desired frequencies
105
+ db_values = []
106
+ for i in range(spectrogram.shape[1]):
107
+ db_values.append(list(zip(freqs[freq_indices], spectrogram[freq_indices, i])))
108
+
109
+ # Print the dB values for the first time frame
110
+ print(db_values[0])
111
+
112
+ proportional_values = []
113
+
114
+ for frame in db_values:
115
+ proportional_frame = [db_to_height(db) for f, db in frame]
116
+ proportional_values.append(proportional_frame)
117
+
118
+ print(proportional_values[0])
119
+ print("AUDIO CHUNK: " + str(len(proportional_values)))
120
+
121
+ # Open the background image
122
+ background_image = Image.open(image_in)
123
+
124
+ # Resize the image while keeping its aspect ratio
125
+ bg_width, bg_height = background_image.size
126
+ aspect_ratio = bg_width / bg_height
127
+ new_width = 1024
128
+ new_height = int(new_width / aspect_ratio)
129
+ resized_bg = background_image.resize((new_width, new_height))
130
+
131
+ # Apply black cache for better visibility of the white text
132
+ bg_cache = Image.open('black_cache.png')
133
+
134
+ # Resize black_cache image to fit with the width
135
+ black_cache_width, black_cache_height = bg_cache.size
136
+ new_bc_width = 1024
137
+ new_bc_height = black_cache_height * 2
138
+ bg_cache = bg_cache.resize((new_bc_width, new_bc_height), Image.LANCZOS)
139
+
140
+ resized_bg.paste(bg_cache, (0, resized_bg.height - bg_cache.height), mask=bg_cache)
141
+
142
+ # Create a new ImageDraw object
143
+ draw = ImageDraw.Draw(resized_bg)
144
+
145
+ # Define the text to be added
146
+ text = title
147
+ font = ImageFont.truetype("Lato-Regular.ttf", 16)
148
+ text_color = (255, 255, 255) # white color
149
+
150
+ # Calculate the position of the text
151
+ #text_width, text_height = draw.textsize(text, font=font)
152
+ x = int(30 * 2)
153
+ y = new_height - (70 * 2)
154
+
155
+ # Draw the text on the image
156
+ draw.text((x, y), text, fill=text_color, font=font)
157
+
158
+ # Save the resized image
159
+ resized_bg.save('resized_background.jpg')
160
+
161
+ generated_frames = []
162
+ for i, frame in enumerate(proportional_values):
163
+ bars_img = make_bars_image(frame, i, new_height)
164
+ bars_img = Image.open(bars_img)
165
+ # Paste the audio bars image on top of the background image
166
+ fresh_bg = Image.open('resized_background.jpg')
167
+ fresh_bg.paste(bars_img, (0, 0), mask=bars_img)
168
+ # Save the image
169
+ fresh_bg.save('audio_bars_with_bg' + str(i) + '.jpg')
170
+ generated_frames.append('audio_bars_with_bg' + str(i) + '.jpg')
171
+ print(generated_frames)
172
+
173
+ # Create a video clip from the images
174
+ clip = ImageSequenceClip(generated_frames, fps=len(generated_frames)/(end_time-start_time))
175
+ audio_clip = AudioFileClip(audio_in)
176
+ clip = clip.set_audio(audio_clip)
177
+ # Set the output codec
178
+ codec = 'libx264'
179
+ audio_codec = 'aac'
180
+ # Save the video to a file
181
+ clip.write_videofile("my_video.mp4", codec=codec, audio_codec=audio_codec)
182
+
183
+ retimed_clip = VideoFileClip("my_video.mp4")
184
+
185
+ # Set the desired frame rate
186
+ new_fps = 25
187
+
188
+ # Create a new clip with the new frame rate
189
+ new_clip = retimed_clip.set_fps(new_fps)
190
+
191
+ # Save the new clip as a new video file
192
+ new_clip.write_videofile(output_video_path, codec=codec, audio_codec=audio_codec)
193
+
194
+ # Visualize the audio bars
195
+ plt.figure(figsize=(10, 4))
196
+ librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='log')
197
+ plt.colorbar(format='%+2.0f dB')
198
+ plt.title('Audio Bars Visualization')
199
+
200
+ # Save the image as a JPG file
201
+ output_path = 'image_out.jpg'
202
+ plt.savefig(output_path, dpi=300, bbox_inches='tight')
203
+
204
+ #test make image bars
205
+ #bars_img = make_bars_image(proportional_values[0])
206
+ return output_video_path, 'image_out.jpg'
207
+
208
+ gr.Interface(fn=infer,
209
+ inputs=[gr.Textbox(placeholder='FIND A GOOD TITLE'),
210
+ gr.Audio(source='upload', type='filepath'),
211
+ gr.Image(source='upload', type='filepath'),
212
+ gr.Textbox(label="Output video path", value="my_final_video.mp4", visible=False)],
213
+ outputs=[gr.Video(label='video result'), gr.Image(label='spectrogram image')],
214
+ title='Animated Audio Visualizer', description='<p style="text-align: center;">Upload an audio file, upload a background image, choose a good title, click submit.</p>').launch()
black_cache.png ADDED
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ matplotlib
2
+ librosa
3
+ moviepy
4
+ pillow