Spaces:
Runtime error
Runtime error
Duplicate from fffiloni/animated-audio-visualizer-1024
Browse filesCo-authored-by: Sylvain Filoni <fffiloni@users.noreply.huggingface.co>
- .gitattributes +34 -0
- Lato-Regular.ttf +0 -0
- README.md +13 -0
- app.py +214 -0
- black_cache.png +0 -0
- requirements.txt +4 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Lato-Regular.ttf
ADDED
Binary file (72.3 kB). View file
|
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Animated Audio Visualizer 1024
|
3 |
+
emoji: 📊🎶
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.39.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: fffiloni/animated-audio-visualizer-1024
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import librosa
|
4 |
+
import numpy as np
|
5 |
+
from PIL import Image, ImageDraw, ImageFont
|
6 |
+
from moviepy.editor import *
|
7 |
+
from moviepy.video.io.VideoFileClip import VideoFileClip
|
8 |
+
|
9 |
+
def make_bars_image(height_values, index, new_height):
|
10 |
+
|
11 |
+
# Define the size of the image
|
12 |
+
width = 1024
|
13 |
+
height = new_height
|
14 |
+
|
15 |
+
# Create a new image with a transparent background
|
16 |
+
image = Image.new('RGBA', (width, height), color=(0, 0, 0, 0))
|
17 |
+
|
18 |
+
# Get the image drawing context
|
19 |
+
draw = ImageDraw.Draw(image)
|
20 |
+
|
21 |
+
# Define the rectangle width and spacing
|
22 |
+
rect_width = 4
|
23 |
+
spacing = 4
|
24 |
+
|
25 |
+
# Define the list of height values for the rectangles
|
26 |
+
#height_values = [20, 40, 60, 80, 100, 80, 60, 40]
|
27 |
+
num_bars = len(height_values)
|
28 |
+
# Calculate the total width of the rectangles and the spacing
|
29 |
+
total_width = num_bars * rect_width + (num_bars - 1) * spacing
|
30 |
+
|
31 |
+
# Calculate the starting position for the first rectangle
|
32 |
+
start_x = int((width - total_width) / 2)
|
33 |
+
# Define the buffer size
|
34 |
+
buffer_size = int(80 * 2)
|
35 |
+
# Draw the rectangles from left to right
|
36 |
+
x = start_x
|
37 |
+
for i, height in enumerate(height_values):
|
38 |
+
|
39 |
+
# Define the rectangle coordinates
|
40 |
+
y0 = buffer_size
|
41 |
+
y1 = height + buffer_size
|
42 |
+
x0 = x
|
43 |
+
x1 = x + rect_width
|
44 |
+
|
45 |
+
# Draw the rectangle
|
46 |
+
draw.rectangle([x0, y0, x1, y1], fill='white')
|
47 |
+
|
48 |
+
# Move to the next rectangle position
|
49 |
+
if i < num_bars - 1:
|
50 |
+
x += rect_width + spacing
|
51 |
+
|
52 |
+
|
53 |
+
# Rotate the image by 180 degrees
|
54 |
+
image = image.rotate(180)
|
55 |
+
|
56 |
+
# Mirror the image
|
57 |
+
image = image.transpose(Image.FLIP_LEFT_RIGHT)
|
58 |
+
|
59 |
+
# Save the image
|
60 |
+
image.save('audio_bars_'+ str(index) + '.png')
|
61 |
+
|
62 |
+
return 'audio_bars_'+ str(index) + '.png'
|
63 |
+
|
64 |
+
def db_to_height(db_value):
|
65 |
+
# Scale the dB value to a range between 0 and 1
|
66 |
+
scaled_value = (db_value + 80) / 80
|
67 |
+
|
68 |
+
# Convert the scaled value to a height between 0 and 100
|
69 |
+
height = scaled_value * 50
|
70 |
+
|
71 |
+
return height
|
72 |
+
|
73 |
+
def infer(title, audio_in, image_in, output_video_path):
|
74 |
+
# Load the audio file
|
75 |
+
audio_path = audio_in
|
76 |
+
audio_data, sr = librosa.load(audio_path)
|
77 |
+
|
78 |
+
# Get the duration in seconds
|
79 |
+
duration = librosa.get_duration(y=audio_data, sr=sr)
|
80 |
+
|
81 |
+
# Extract the audio data for the desired time
|
82 |
+
start_time = 0 # start time in seconds
|
83 |
+
end_time = duration # end time in seconds
|
84 |
+
|
85 |
+
start_index = int(start_time * sr)
|
86 |
+
end_index = int(end_time * sr)
|
87 |
+
|
88 |
+
audio_data = audio_data[start_index:end_index]
|
89 |
+
|
90 |
+
# Compute the short-time Fourier transform
|
91 |
+
hop_length = 1024
|
92 |
+
|
93 |
+
|
94 |
+
stft = librosa.stft(audio_data, hop_length=hop_length)
|
95 |
+
spectrogram = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
|
96 |
+
|
97 |
+
# Get the frequency values
|
98 |
+
freqs = librosa.fft_frequencies(sr=sr, n_fft=stft.shape[0])
|
99 |
+
|
100 |
+
# Select the indices of the frequency values that correspond to the desired frequencies
|
101 |
+
n_freqs = 114
|
102 |
+
freq_indices = np.linspace(0, len(freqs) - 1, n_freqs, dtype=int)
|
103 |
+
|
104 |
+
# Extract the dB values for the desired frequencies
|
105 |
+
db_values = []
|
106 |
+
for i in range(spectrogram.shape[1]):
|
107 |
+
db_values.append(list(zip(freqs[freq_indices], spectrogram[freq_indices, i])))
|
108 |
+
|
109 |
+
# Print the dB values for the first time frame
|
110 |
+
print(db_values[0])
|
111 |
+
|
112 |
+
proportional_values = []
|
113 |
+
|
114 |
+
for frame in db_values:
|
115 |
+
proportional_frame = [db_to_height(db) for f, db in frame]
|
116 |
+
proportional_values.append(proportional_frame)
|
117 |
+
|
118 |
+
print(proportional_values[0])
|
119 |
+
print("AUDIO CHUNK: " + str(len(proportional_values)))
|
120 |
+
|
121 |
+
# Open the background image
|
122 |
+
background_image = Image.open(image_in)
|
123 |
+
|
124 |
+
# Resize the image while keeping its aspect ratio
|
125 |
+
bg_width, bg_height = background_image.size
|
126 |
+
aspect_ratio = bg_width / bg_height
|
127 |
+
new_width = 1024
|
128 |
+
new_height = int(new_width / aspect_ratio)
|
129 |
+
resized_bg = background_image.resize((new_width, new_height))
|
130 |
+
|
131 |
+
# Apply black cache for better visibility of the white text
|
132 |
+
bg_cache = Image.open('black_cache.png')
|
133 |
+
|
134 |
+
# Resize black_cache image to fit with the width
|
135 |
+
black_cache_width, black_cache_height = bg_cache.size
|
136 |
+
new_bc_width = 1024
|
137 |
+
new_bc_height = black_cache_height * 2
|
138 |
+
bg_cache = bg_cache.resize((new_bc_width, new_bc_height), Image.LANCZOS)
|
139 |
+
|
140 |
+
resized_bg.paste(bg_cache, (0, resized_bg.height - bg_cache.height), mask=bg_cache)
|
141 |
+
|
142 |
+
# Create a new ImageDraw object
|
143 |
+
draw = ImageDraw.Draw(resized_bg)
|
144 |
+
|
145 |
+
# Define the text to be added
|
146 |
+
text = title
|
147 |
+
font = ImageFont.truetype("Lato-Regular.ttf", 16)
|
148 |
+
text_color = (255, 255, 255) # white color
|
149 |
+
|
150 |
+
# Calculate the position of the text
|
151 |
+
#text_width, text_height = draw.textsize(text, font=font)
|
152 |
+
x = int(30 * 2)
|
153 |
+
y = new_height - (70 * 2)
|
154 |
+
|
155 |
+
# Draw the text on the image
|
156 |
+
draw.text((x, y), text, fill=text_color, font=font)
|
157 |
+
|
158 |
+
# Save the resized image
|
159 |
+
resized_bg.save('resized_background.jpg')
|
160 |
+
|
161 |
+
generated_frames = []
|
162 |
+
for i, frame in enumerate(proportional_values):
|
163 |
+
bars_img = make_bars_image(frame, i, new_height)
|
164 |
+
bars_img = Image.open(bars_img)
|
165 |
+
# Paste the audio bars image on top of the background image
|
166 |
+
fresh_bg = Image.open('resized_background.jpg')
|
167 |
+
fresh_bg.paste(bars_img, (0, 0), mask=bars_img)
|
168 |
+
# Save the image
|
169 |
+
fresh_bg.save('audio_bars_with_bg' + str(i) + '.jpg')
|
170 |
+
generated_frames.append('audio_bars_with_bg' + str(i) + '.jpg')
|
171 |
+
print(generated_frames)
|
172 |
+
|
173 |
+
# Create a video clip from the images
|
174 |
+
clip = ImageSequenceClip(generated_frames, fps=len(generated_frames)/(end_time-start_time))
|
175 |
+
audio_clip = AudioFileClip(audio_in)
|
176 |
+
clip = clip.set_audio(audio_clip)
|
177 |
+
# Set the output codec
|
178 |
+
codec = 'libx264'
|
179 |
+
audio_codec = 'aac'
|
180 |
+
# Save the video to a file
|
181 |
+
clip.write_videofile("my_video.mp4", codec=codec, audio_codec=audio_codec)
|
182 |
+
|
183 |
+
retimed_clip = VideoFileClip("my_video.mp4")
|
184 |
+
|
185 |
+
# Set the desired frame rate
|
186 |
+
new_fps = 25
|
187 |
+
|
188 |
+
# Create a new clip with the new frame rate
|
189 |
+
new_clip = retimed_clip.set_fps(new_fps)
|
190 |
+
|
191 |
+
# Save the new clip as a new video file
|
192 |
+
new_clip.write_videofile(output_video_path, codec=codec, audio_codec=audio_codec)
|
193 |
+
|
194 |
+
# Visualize the audio bars
|
195 |
+
plt.figure(figsize=(10, 4))
|
196 |
+
librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='log')
|
197 |
+
plt.colorbar(format='%+2.0f dB')
|
198 |
+
plt.title('Audio Bars Visualization')
|
199 |
+
|
200 |
+
# Save the image as a JPG file
|
201 |
+
output_path = 'image_out.jpg'
|
202 |
+
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
203 |
+
|
204 |
+
#test make image bars
|
205 |
+
#bars_img = make_bars_image(proportional_values[0])
|
206 |
+
return output_video_path, 'image_out.jpg'
|
207 |
+
|
208 |
+
gr.Interface(fn=infer,
|
209 |
+
inputs=[gr.Textbox(placeholder='FIND A GOOD TITLE'),
|
210 |
+
gr.Audio(source='upload', type='filepath'),
|
211 |
+
gr.Image(source='upload', type='filepath'),
|
212 |
+
gr.Textbox(label="Output video path", value="my_final_video.mp4", visible=False)],
|
213 |
+
outputs=[gr.Video(label='video result'), gr.Image(label='spectrogram image')],
|
214 |
+
title='Animated Audio Visualizer', description='<p style="text-align: center;">Upload an audio file, upload a background image, choose a good title, click submit.</p>').launch()
|
black_cache.png
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
matplotlib
|
2 |
+
librosa
|
3 |
+
moviepy
|
4 |
+
pillow
|