|
""" |
|
Various functions for finding/manipulating silence in AudioSegments |
|
""" |
|
import itertools |
|
|
|
from .utils import db_to_float |
|
|
|
|
|
def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): |
|
""" |
|
Returns a list of all silent sections [start, end] in milliseconds of audio_segment. |
|
Inverse of detect_nonsilent() |
|
|
|
audio_segment - the segment to find silence in |
|
min_silence_len - the minimum length for any silent section |
|
silence_thresh - the upper bound for how quiet is silent in dFBS |
|
seek_step - step size for interating over the segment in ms |
|
""" |
|
seg_len = len(audio_segment) |
|
|
|
|
|
if seg_len < min_silence_len: |
|
return [] |
|
|
|
|
|
silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude |
|
|
|
|
|
silence_starts = [] |
|
|
|
|
|
|
|
last_slice_start = seg_len - min_silence_len |
|
slice_starts = range(0, last_slice_start + 1, seek_step) |
|
|
|
|
|
|
|
if last_slice_start % seek_step: |
|
slice_starts = itertools.chain(slice_starts, [last_slice_start]) |
|
|
|
for i in slice_starts: |
|
audio_slice = audio_segment[i:i + min_silence_len] |
|
if audio_slice.rms <= silence_thresh: |
|
silence_starts.append(i) |
|
|
|
|
|
if not silence_starts: |
|
return [] |
|
|
|
|
|
silent_ranges = [] |
|
|
|
prev_i = silence_starts.pop(0) |
|
current_range_start = prev_i |
|
|
|
for silence_start_i in silence_starts: |
|
continuous = (silence_start_i == prev_i + seek_step) |
|
|
|
|
|
|
|
|
|
silence_has_gap = silence_start_i > (prev_i + min_silence_len) |
|
|
|
if not continuous and silence_has_gap: |
|
silent_ranges.append([current_range_start, |
|
prev_i + min_silence_len]) |
|
current_range_start = silence_start_i |
|
prev_i = silence_start_i |
|
|
|
silent_ranges.append([current_range_start, |
|
prev_i + min_silence_len]) |
|
|
|
return silent_ranges |
|
|
|
|
|
def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): |
|
""" |
|
Returns a list of all nonsilent sections [start, end] in milliseconds of audio_segment. |
|
Inverse of detect_silent() |
|
|
|
audio_segment - the segment to find silence in |
|
min_silence_len - the minimum length for any silent section |
|
silence_thresh - the upper bound for how quiet is silent in dFBS |
|
seek_step - step size for interating over the segment in ms |
|
""" |
|
silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step) |
|
len_seg = len(audio_segment) |
|
|
|
|
|
if not silent_ranges: |
|
return [[0, len_seg]] |
|
|
|
|
|
if silent_ranges[0][0] == 0 and silent_ranges[0][1] == len_seg: |
|
return [] |
|
|
|
prev_end_i = 0 |
|
nonsilent_ranges = [] |
|
for start_i, end_i in silent_ranges: |
|
nonsilent_ranges.append([prev_end_i, start_i]) |
|
prev_end_i = end_i |
|
|
|
if end_i != len_seg: |
|
nonsilent_ranges.append([prev_end_i, len_seg]) |
|
|
|
if nonsilent_ranges[0] == [0, 0]: |
|
nonsilent_ranges.pop(0) |
|
|
|
return nonsilent_ranges |
|
|
|
|
|
def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100, |
|
seek_step=1): |
|
""" |
|
Returns list of audio segments from splitting audio_segment on silent sections |
|
|
|
audio_segment - original pydub.AudioSegment() object |
|
|
|
min_silence_len - (in ms) minimum length of a silence to be used for |
|
a split. default: 1000ms |
|
|
|
silence_thresh - (in dBFS) anything quieter than this will be |
|
considered silence. default: -16dBFS |
|
|
|
keep_silence - (in ms or True/False) leave some silence at the beginning |
|
and end of the chunks. Keeps the sound from sounding like it |
|
is abruptly cut off. |
|
When the length of the silence is less than the keep_silence duration |
|
it is split evenly between the preceding and following non-silent |
|
segments. |
|
If True is specified, all the silence is kept, if False none is kept. |
|
default: 100ms |
|
|
|
seek_step - step size for interating over the segment in ms |
|
""" |
|
|
|
|
|
def pairwise(iterable): |
|
"s -> (s0,s1), (s1,s2), (s2, s3), ..." |
|
a, b = itertools.tee(iterable) |
|
next(b, None) |
|
return zip(a, b) |
|
|
|
if isinstance(keep_silence, bool): |
|
keep_silence = len(audio_segment) if keep_silence else 0 |
|
|
|
output_ranges = [ |
|
[ start - keep_silence, end + keep_silence ] |
|
for (start,end) |
|
in detect_nonsilent(audio_segment, min_silence_len, silence_thresh, seek_step) |
|
] |
|
|
|
for range_i, range_ii in pairwise(output_ranges): |
|
last_end = range_i[1] |
|
next_start = range_ii[0] |
|
if next_start < last_end: |
|
range_i[1] = (last_end+next_start)//2 |
|
range_ii[0] = range_i[1] |
|
|
|
return [ |
|
audio_segment[ max(start,0) : min(end,len(audio_segment)) ] |
|
for start,end in output_ranges |
|
] |
|
|
|
|
|
def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10): |
|
""" |
|
Returns the millisecond/index that the leading silence ends. |
|
|
|
audio_segment - the segment to find silence in |
|
silence_threshold - the upper bound for how quiet is silent in dFBS |
|
chunk_size - chunk size for interating over the segment in ms |
|
""" |
|
trim_ms = 0 |
|
assert chunk_size > 0 |
|
while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound): |
|
trim_ms += chunk_size |
|
|
|
|
|
return min(trim_ms, len(sound)) |
|
|
|
|
|
|