animated-audio-visualizer-1024

Runtime error

App Files Files Community

reach-vb HF Staff

fffiloni commited on Aug 17, 2023

Commit

6b7f89f

0 Parent(s):

Duplicate from fffiloni/animated-audio-visualizer-1024

Browse files

Co-authored-by: Sylvain Filoni <fffiloni@users.noreply.huggingface.co>

Files changed (6) hide show

.gitattributes +34 -0
Lato-Regular.ttf +0 -0
README.md +13 -0
app.py +214 -0
black_cache.png +0 -0
requirements.txt +4 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Lato-Regular.ttf ADDED Viewed

Binary file (72.3 kB). View file

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Animated Audio Visualizer 1024
+emoji: 📊🎶
+colorFrom: green
+colorTo: yellow
+sdk: gradio
+sdk_version: 3.39.0
+app_file: app.py
+pinned: false
+duplicated_from: fffiloni/animated-audio-visualizer-1024
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import gradio as gr
+import matplotlib.pyplot as plt
+import librosa
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+from moviepy.editor import *
+from moviepy.video.io.VideoFileClip import VideoFileClip
+def make_bars_image(height_values, index, new_height):
+    # Define the size of the image
+    width = 1024
+    height = new_height
+    # Create a new image with a transparent background
+    image = Image.new('RGBA', (width, height), color=(0, 0, 0, 0))
+    # Get the image drawing context
+    draw = ImageDraw.Draw(image)
+    # Define the rectangle width and spacing
+    rect_width = 4
+    spacing = 4
+    # Define the list of height values for the rectangles
+    #height_values = [20, 40, 60, 80, 100, 80, 60, 40]
+    num_bars = len(height_values)
+    # Calculate the total width of the rectangles and the spacing
+    total_width = num_bars * rect_width + (num_bars - 1) * spacing
+    # Calculate the starting position for the first rectangle
+    start_x = int((width - total_width) / 2)
+    # Define the buffer size
+    buffer_size = int(80 * 2)
+    # Draw the rectangles from left to right
+    x = start_x
+    for i, height in enumerate(height_values):
+        # Define the rectangle coordinates
+        y0 = buffer_size
+        y1 = height + buffer_size
+        x0 = x
+        x1 = x + rect_width
+        # Draw the rectangle
+        draw.rectangle([x0, y0, x1, y1], fill='white')
+        # Move to the next rectangle position
+        if i < num_bars - 1:
+            x += rect_width + spacing
+    # Rotate the image by 180 degrees
+    image = image.rotate(180)
+    # Mirror the image
+    image = image.transpose(Image.FLIP_LEFT_RIGHT)
+    # Save the image
+    image.save('audio_bars_'+ str(index) + '.png')
+    return 'audio_bars_'+ str(index) + '.png'
+def db_to_height(db_value):
+    # Scale the dB value to a range between 0 and 1
+    scaled_value = (db_value + 80) / 80
+    # Convert the scaled value to a height between 0 and 100
+    height = scaled_value * 50
+    return height
+def infer(title, audio_in, image_in, output_video_path):
+    # Load the audio file
+    audio_path = audio_in
+    audio_data, sr = librosa.load(audio_path)
+    # Get the duration in seconds
+    duration = librosa.get_duration(y=audio_data, sr=sr)
+    # Extract the audio data for the desired time
+    start_time = 0 # start time in seconds
+    end_time = duration # end time in seconds
+    start_index = int(start_time * sr)
+    end_index = int(end_time * sr)
+    audio_data = audio_data[start_index:end_index]
+    # Compute the short-time Fourier transform
+    hop_length = 1024
+    stft = librosa.stft(audio_data, hop_length=hop_length)
+    spectrogram = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
+    # Get the frequency values
+    freqs = librosa.fft_frequencies(sr=sr, n_fft=stft.shape[0])
+    # Select the indices of the frequency values that correspond to the desired frequencies
+    n_freqs = 114
+    freq_indices = np.linspace(0, len(freqs) - 1, n_freqs, dtype=int)
+    # Extract the dB values for the desired frequencies
+    db_values = []
+    for i in range(spectrogram.shape[1]):
+        db_values.append(list(zip(freqs[freq_indices], spectrogram[freq_indices, i])))
+    # Print the dB values for the first time frame
+    print(db_values[0])
+    proportional_values = []
+    for frame in db_values:
+        proportional_frame = [db_to_height(db) for f, db in frame]
+        proportional_values.append(proportional_frame)
+    print(proportional_values[0])
+    print("AUDIO CHUNK: " + str(len(proportional_values)))
+    # Open the background image
+    background_image = Image.open(image_in)
+    # Resize the image while keeping its aspect ratio
+    bg_width, bg_height = background_image.size
+    aspect_ratio = bg_width / bg_height
+    new_width = 1024
+    new_height = int(new_width / aspect_ratio)
+    resized_bg = background_image.resize((new_width, new_height))
+    # Apply black cache for better visibility of the white text
+    bg_cache = Image.open('black_cache.png')
+    # Resize black_cache image to fit with the width
+    black_cache_width, black_cache_height = bg_cache.size
+    new_bc_width = 1024
+    new_bc_height = black_cache_height * 2
+    bg_cache = bg_cache.resize((new_bc_width, new_bc_height), Image.LANCZOS)
+    resized_bg.paste(bg_cache, (0, resized_bg.height - bg_cache.height), mask=bg_cache)
+    # Create a new ImageDraw object
+    draw = ImageDraw.Draw(resized_bg)
+    # Define the text to be added
+    text = title
+    font = ImageFont.truetype("Lato-Regular.ttf", 16)
+    text_color = (255, 255, 255) # white color
+    # Calculate the position of the text
+    #text_width, text_height = draw.textsize(text, font=font)
+    x = int(30 * 2)
+    y = new_height - (70 * 2)
+    # Draw the text on the image
+    draw.text((x, y), text, fill=text_color, font=font)
+    # Save the resized image
+    resized_bg.save('resized_background.jpg')
+    generated_frames = []
+    for i, frame in enumerate(proportional_values):
+        bars_img = make_bars_image(frame, i, new_height)
+        bars_img = Image.open(bars_img)
+        # Paste the audio bars image on top of the background image
+        fresh_bg = Image.open('resized_background.jpg')
+        fresh_bg.paste(bars_img, (0, 0), mask=bars_img)
+        # Save the image
+        fresh_bg.save('audio_bars_with_bg' + str(i) + '.jpg')
+        generated_frames.append('audio_bars_with_bg' + str(i) + '.jpg')
+    print(generated_frames)
+    # Create a video clip from the images
+    clip = ImageSequenceClip(generated_frames, fps=len(generated_frames)/(end_time-start_time))
+    audio_clip = AudioFileClip(audio_in)
+    clip = clip.set_audio(audio_clip)
+    # Set the output codec
+    codec = 'libx264'
+    audio_codec = 'aac'
+    # Save the video to a file
+    clip.write_videofile("my_video.mp4", codec=codec, audio_codec=audio_codec)
+    retimed_clip = VideoFileClip("my_video.mp4")
+    # Set the desired frame rate
+    new_fps = 25
+    # Create a new clip with the new frame rate
+    new_clip = retimed_clip.set_fps(new_fps)
+    # Save the new clip as a new video file
+    new_clip.write_videofile(output_video_path, codec=codec, audio_codec=audio_codec)
+    # Visualize the audio bars
+    plt.figure(figsize=(10, 4))
+    librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='log')
+    plt.colorbar(format='%+2.0f dB')
+    plt.title('Audio Bars Visualization')
+    # Save the image as a JPG file
+    output_path = 'image_out.jpg'
+    plt.savefig(output_path, dpi=300, bbox_inches='tight')
+    #test make image bars
+    #bars_img = make_bars_image(proportional_values[0])
+    return output_video_path, 'image_out.jpg'
+gr.Interface(fn=infer,
+             inputs=[gr.Textbox(placeholder='FIND A GOOD TITLE'),
+                     gr.Audio(source='upload', type='filepath'),
+                     gr.Image(source='upload', type='filepath'),
+                     gr.Textbox(label="Output video path", value="my_final_video.mp4", visible=False)],
+             outputs=[gr.Video(label='video result'), gr.Image(label='spectrogram image')],
+            title='Animated Audio Visualizer', description='<p style="text-align: center;">Upload an audio file, upload a background image, choose a good title, click submit.</p>').launch()

black_cache.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+matplotlib
+librosa
+moviepy
+pillow