Spaces:

JacobLinCool
/

tja-generator

Running

tja-generator / preprocess.py

github-actions[bot]

Sync to HuggingFace Spaces

9df2e22 10 months ago

5.58 kB

	import matplotlib.pyplot as plt
	import numpy as np
	import soundfile as sf
	from librosa.filters import mel
	from scipy import signal
	from scipy.fftpack import fft


	class Audio:
	"""
	audio class which holds music data and timestamp for notes.

	Args:
	filename: file name.
	stereo: True or False; wether you have Don/Ka streo file or not. normaly True.
	Variables:


	Example:
	>>>from music_processor import *
	>>>song = Audio(filename)
	>>># to get audio data
	>>>song.data
	>>># to import .tja files:
	>>>song.import_tja(filename)
	>>># to get data converted
	>>>song.data = (song.data[:,0]+song.data[:,1])/2
	>>>fft_and_melscale(song, include_zero_cross=False)
	"""

	def __init__(self, data, samplerate, stereo=True):
	self.data = data
	self.samplerate = samplerate
	if stereo is False:
	self.data = (self.data[:, 0] + self.data[:, 1]) / 2
	self.timestamp = []

	def plotaudio(self, start_t, stop_t):
	plt.plot(
	np.linspace(start_t, stop_t, stop_t - start_t), self.data[start_t:stop_t, 0]
	)
	plt.show()

	def save(self, filename, start_t=0, stop_t=None):
	if stop_t is None:
	stop_t = self.data.shape[0]
	sf.write(filename, self.data[start_t:stop_t], self.samplerate)

	def synthesize(self, diff=True, don="./asset/don.wav", ka="./asset/ka.wav"):
	donsound = sf.read(don)[0]
	donsound = (donsound[:, 0] + donsound[:, 1]) / 2
	kasound = sf.read(ka)[0]
	kasound = (kasound[:, 0] + kasound[:, 1]) / 2
	donlen = len(donsound)
	kalen = len(kasound)

	if diff is True:
	for stamp in self.timestamp:
	timing = int(stamp[0] * self.samplerate)
	try:
	if stamp[1] in (1, 3, 5, 6, 7):
	self.data[timing : timing + donlen] += donsound
	elif stamp[1] in (2, 4):
	self.data[timing : timing + kalen] += kasound
	except ValueError:
	pass

	elif diff == "don":
	if isinstance(self.timestamp[0], tuple):
	for stamp in self.timestamp:
	if stamp * self.samplerate + donlen < self.data.shape[0]:
	self.data[
	int(stamp[0] * self.samplerate) : int(
	stamp[0] * self.samplerate
	)
	+ donlen
	] += donsound
	else:
	for stamp in self.timestamp:
	if stamp * self.samplerate + donlen < self.data.shape[0]:
	self.data[
	int(stamp * self.samplerate) : int(stamp * self.samplerate)
	+ donlen
	] += donsound

	elif diff == "ka":
	if isinstance(self.timestamp[0], tuple):
	for stamp in self.timestamp:
	if stamp * self.samplerate + kalen < self.data.shape[0]:
	self.data[
	int(stamp[0] * self.samplerate) : int(
	stamp[0] * self.samplerate
	)
	+ kalen
	] += kasound
	else:
	for stamp in self.timestamp:
	if stamp * self.samplerate + kalen < self.data.shape[0]:
	self.data[
	int(stamp * self.samplerate) : int(stamp * self.samplerate)
	+ kalen
	] += kasound


	def make_frame(data, nhop, nfft):
	"""
	helping function for fftandmelscale.
	細かい時間に切り分けたものを学習データとするため，nhop(512)ずつずらしながらnfftサイズのデータを配列として返す
	"""

	length = data.shape[0]
	framedata = np.concatenate((data, np.zeros(nfft))) # zero padding
	return np.array(
	[framedata[i * nhop : i * nhop + nfft] for i in range(length // nhop)]
	)


	# @jit
	def fft_and_melscale(
	song,
	nhop=512,
	nffts=[1024, 2048, 4096],
	mel_nband=80,
	mel_freqlo=27.5,
	mel_freqhi=16000.0,
	include_zero_cross=False,
	):
	"""
	fft and melscale method.
	fft: nfft = [1024, 2048, 4096]; サンプルの切り取る長さを変えながらデータからnp.arrayを抽出して高速フーリエ変換を行う．
	melscale: 周波数の次元を削減するとともに，log10の値を取っている．
	"""

	feat_channels = []

	for nfft in nffts:
	feats = []
	window = signal.windows.blackmanharris(nfft)
	filt = mel(
	sr=song.samplerate,
	n_fft=nfft,
	n_mels=mel_nband,
	fmin=mel_freqlo,
	fmax=mel_freqhi,
	)

	# get normal frame
	frame = make_frame(song.data, nhop, nfft)
	# print(frame.shape)

	# melscaling
	processedframe = fft(window * frame)[:, : nfft // 2 + 1]
	processedframe = np.dot(filt, np.transpose(np.abs(processedframe) ** 2))
	processedframe = 20 * np.log10(processedframe + 0.1)
	# print(processedframe.shape)

	feat_channels.append(processedframe)

	if include_zero_cross:
	song.zero_crossing = np.where(np.diff(np.sign(song.data)))[0]
	print(song.zero_crossing)

	return np.array(feat_channels)