Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /pydub /silence.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified 30 days ago

raw

history blame contribute delete

6.46 kB

	"""
	Various functions for finding/manipulating silence in AudioSegments
	"""
	import itertools

	from .utils import db_to_float


	def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
	"""
	Returns a list of all silent sections [start, end] in milliseconds of audio_segment.
	Inverse of detect_nonsilent()

	audio_segment - the segment to find silence in
	min_silence_len - the minimum length for any silent section
	silence_thresh - the upper bound for how quiet is silent in dFBS
	seek_step - step size for interating over the segment in ms
	"""
	seg_len = len(audio_segment)

	# you can't have a silent portion of a sound that is longer than the sound
	if seg_len < min_silence_len:
	return []

	# convert silence threshold to a float value (so we can compare it to rms)
	silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude

	# find silence and add start and end indicies to the to_cut list
	silence_starts = []

	# check successive (1 sec by default) chunk of sound for silence
	# try a chunk at every "seek step" (or every chunk for a seek step == 1)
	last_slice_start = seg_len - min_silence_len
	slice_starts = range(0, last_slice_start + 1, seek_step)

	# guarantee last_slice_start is included in the range
	# to make sure the last portion of the audio is searched
	if last_slice_start % seek_step:
	slice_starts = itertools.chain(slice_starts, [last_slice_start])

	for i in slice_starts:
	audio_slice = audio_segment[i:i + min_silence_len]
	if audio_slice.rms <= silence_thresh:
	silence_starts.append(i)

	# short circuit when there is no silence
	if not silence_starts:
	return []

	# combine the silence we detected into ranges (start ms - end ms)
	silent_ranges = []

	prev_i = silence_starts.pop(0)
	current_range_start = prev_i

	for silence_start_i in silence_starts:
	continuous = (silence_start_i == prev_i + seek_step)

	# sometimes two small blips are enough for one particular slice to be
	# non-silent, despite the silence all running together. Just combine
	# the two overlapping silent ranges.
	silence_has_gap = silence_start_i > (prev_i + min_silence_len)

	if not continuous and silence_has_gap:
	silent_ranges.append([current_range_start,
	prev_i + min_silence_len])
	current_range_start = silence_start_i
	prev_i = silence_start_i

	silent_ranges.append([current_range_start,
	prev_i + min_silence_len])

	return silent_ranges


	def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
	"""
	Returns a list of all nonsilent sections [start, end] in milliseconds of audio_segment.
	Inverse of detect_silent()

	audio_segment - the segment to find silence in
	min_silence_len - the minimum length for any silent section
	silence_thresh - the upper bound for how quiet is silent in dFBS
	seek_step - step size for interating over the segment in ms
	"""
	silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step)
	len_seg = len(audio_segment)

	# if there is no silence, the whole thing is nonsilent
	if not silent_ranges:
	return [[0, len_seg]]

	# short circuit when the whole audio segment is silent
	if silent_ranges[0][0] == 0 and silent_ranges[0][1] == len_seg:
	return []

	prev_end_i = 0
	nonsilent_ranges = []
	for start_i, end_i in silent_ranges:
	nonsilent_ranges.append([prev_end_i, start_i])
	prev_end_i = end_i

	if end_i != len_seg:
	nonsilent_ranges.append([prev_end_i, len_seg])

	if nonsilent_ranges[0] == [0, 0]:
	nonsilent_ranges.pop(0)

	return nonsilent_ranges


	def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100,
	seek_step=1):
	"""
	Returns list of audio segments from splitting audio_segment on silent sections

	audio_segment - original pydub.AudioSegment() object

	min_silence_len - (in ms) minimum length of a silence to be used for
	a split. default: 1000ms

	silence_thresh - (in dBFS) anything quieter than this will be
	considered silence. default: -16dBFS

	keep_silence - (in ms or True/False) leave some silence at the beginning
	and end of the chunks. Keeps the sound from sounding like it
	is abruptly cut off.
	When the length of the silence is less than the keep_silence duration
	it is split evenly between the preceding and following non-silent
	segments.
	If True is specified, all the silence is kept, if False none is kept.
	default: 100ms

	seek_step - step size for interating over the segment in ms
	"""

	# from the itertools documentation
	def pairwise(iterable):
	"s -> (s0,s1), (s1,s2), (s2, s3), ..."
	a, b = itertools.tee(iterable)
	next(b, None)
	return zip(a, b)

	if isinstance(keep_silence, bool):
	keep_silence = len(audio_segment) if keep_silence else 0

	output_ranges = [
	[ start - keep_silence, end + keep_silence ]
	for (start,end)
	in detect_nonsilent(audio_segment, min_silence_len, silence_thresh, seek_step)
	]

	for range_i, range_ii in pairwise(output_ranges):
	last_end = range_i[1]
	next_start = range_ii[0]
	if next_start < last_end:
	range_i[1] = (last_end+next_start)//2
	range_ii[0] = range_i[1]

	return [
	audio_segment[ max(start,0) : min(end,len(audio_segment)) ]
	for start,end in output_ranges
	]


	def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10):
	"""
	Returns the millisecond/index that the leading silence ends.

	audio_segment - the segment to find silence in
	silence_threshold - the upper bound for how quiet is silent in dFBS
	chunk_size - chunk size for interating over the segment in ms
	"""
	trim_ms = 0 # ms
	assert chunk_size > 0 # to avoid infinite loop
	while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
	trim_ms += chunk_size

	# if there is no end it should return the length of the segment
	return min(trim_ms, len(sound))