Spaces:

VigneshDark
/

background_noise_over_speech

Sleeping

App Files Files Community

VigneshDark commited on Mar 12

Commit

b36d167

1 Parent(s): 7e40d92

added code

Browse files

Files changed (7) hide show

.gitattributes +1 -0
.gitignore +1 -0
__pycache__/helper.cpython-312.pyc +0 -0
anushka.wav +3 -0
app.py +96 -0
helper.py +106 -0
traffic.wav +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ /.venv

__pycache__/helper.cpython-312.pyc ADDED Viewed

Binary file (3.25 kB). View file

anushka.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79d0cfa2385223555e11093ade0c9dcabe1932171318afcd70479e00176026cb
+size 192780

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+from helper import process_audio
+import os
+import numpy as np
+# Sample audio file paths
+SAMPLE_SPEECH = "anushka.wav"
+SAMPLE_NOISE = "traffic.wav"
+def process_audio_files(speech_file, noise_file, alpha, beta):
+    """
+    Process the audio files and return the mixed output
+    Args:
+        speech_file (tuple): Speech audio (sample_rate, data)
+        noise_file (tuple): Noise audio (sample_rate, data)
+        alpha (float): First slider value (-30 to +30)
+        beta (float): Second slider value (-30 to +30)
+    Returns:
+        tuple: (sample_rate, processed_audio_data)
+    """
+    speech_sr, speech_data = speech_file
+    noise_sr, noise_data = noise_file
+    # Process the audio using the helper function
+    output_audio = process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta)
+    # Convert AudioSegment to numpy array
+    samples = np.array(output_audio.get_array_of_samples())
+    return (output_audio.frame_rate, samples)
+# Create the Gradio interface
+with gr.Blocks() as app:
+    gr.Markdown("# Audio Mixing Application")
+    with gr.Row():
+        with gr.Column():
+            # Input components
+            speech_input = gr.Audio(
+                label="Speech Audio",
+                type="numpy"
+            )
+            noise_input = gr.Audio(
+                label="Noise Audio",
+                type="numpy"
+            )
+            # Sample audio examples
+            gr.Examples(
+                examples=[[SAMPLE_SPEECH, SAMPLE_NOISE]],
+                inputs=[speech_input, noise_input],
+                label="Sample Audio Files"
+            )
+            # Slider controls
+            alpha_slider = gr.Slider(
+                minimum=-30,
+                maximum=30,
+                value=0,
+                step=1,
+                label="Alpha (dB)",
+                info="Adjust alpha from -30 to +30 dB"
+            )
+            beta_slider = gr.Slider(
+                minimum=-30,
+                maximum=30,
+                value=0,
+                step=1,
+                label="Beta (dB)",
+                info="Adjust beta from -30 to +30 dB"
+            )
+            # Submit button
+            submit_btn = gr.Button("Process Audio")
+        with gr.Column():
+            # Output audio player
+            output_audio = gr.Audio(
+                label="Mixed Audio",
+                type="numpy"
+            )
+    # Connect the components
+    submit_btn.click(
+        fn=process_audio_files,
+        inputs=[speech_input, noise_input, alpha_slider, beta_slider],
+        outputs=output_audio
+    )
+if __name__ == "__main__":
+    app.launch()

helper.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import random
+from pydub import AudioSegment
+# from pydub.effects import normalize
+import numpy as np
+def get_audio_volume_db(audio):
+    """Estimate the volume in dBFS (decibels relative to full scale) using PyDub."""
+    return audio.dBFS if audio.dBFS != float('-inf') else -50.0  # Default to -50 dB for silence
+def adjust_volume(audio, volume_change_db):
+    """Adjusts the volume of an AudioSegment."""
+    return audio + volume_change_db
+# def compress_audio(audio):
+#     """Apply compression to normalize speech volume."""
+#     return normalize(audio)
+def place_in_stereo(audio, pan_value):
+    """Places audio in stereo field (-1.0 = full left, 1.0 = full right)."""
+    return audio.pan(pan_value)
+def overlay_audio(speech_audio, noise_audio):
+    """Overlays speech and noise using PyDub."""
+    return speech_audio.overlay(noise_audio)
+def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
+    """
+    Process speech and noise audio data to create a mixed audio output.
+    Args:
+        speech_data (numpy.ndarray): Speech audio data
+        noise_data (numpy.ndarray): Noise audio data
+        speech_sr (int): Speech sample rate
+        noise_sr (int): Noise sample rate
+        alpha (float): Speech volume adjustment
+        beta (float): Noise volume adjustment
+    Returns:
+        AudioSegment: Processed audio
+    """
+    # Convert numpy arrays to AudioSegment
+    speech_audio = AudioSegment(
+        speech_data.tobytes(),
+        frame_rate=speech_sr,
+        sample_width=speech_data.dtype.itemsize,
+        channels=1
+    )
+    noise_audio = AudioSegment(
+        noise_data.tobytes(),
+        frame_rate=noise_sr,
+        sample_width=noise_data.dtype.itemsize,
+        channels=1
+    )
+    # Get speech duration
+    speech_duration = len(speech_audio) / 1000.0  # Convert ms to sec
+    # Cut noise segment
+    if len(noise_audio) / 1000.0 <= speech_duration:
+        trimmed_noise = noise_audio
+    else:
+        start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000
+        trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)]
+    trimmed_noise = trimmed_noise.set_frame_rate(8000)
+    # Calculate volumes and adjustments
+    speech_vol = get_audio_volume_db(speech_audio)
+    noise_vol = get_audio_volume_db(trimmed_noise)
+    current_snr = speech_vol - noise_vol
+    adjustment_needed = 10 - current_snr  # target_snr hardcoded to 10
+    if adjustment_needed > 0:  # Speech too quiet
+        speech_adjust = min(adjustment_needed, 2)
+        noise_adjust = -min(adjustment_needed / 2, 5)
+    else:  # Speech too loud
+        speech_adjust = max(adjustment_needed, -5)
+        noise_adjust = -5 / 2
+    # Apply adjustments
+    adjusted_speech = adjust_volume(speech_audio, speech_adjust + alpha)
+    adjusted_noise = adjust_volume(trimmed_noise, noise_adjust + beta)
+    final_audio = overlay_audio(adjusted_speech, adjusted_noise)
+    return final_audio
+# final_audio = process_audio("anushka.wav", "traffic.wav")
+# # Single write operation at the end
+# final_audio.export("output-traffic.wav", format="wav")
+# print("Processing complete. Check output.wav!")
+# -18, -20 for office
+# -13 , -20 for market
+# -18, -20 for traffic

traffic.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fcab96844aaeebbdbd40b0d39df8689edefda2bd05ab6a44b74e5d96e7b852d
+size 24178236