Spaces:

rohitptnk
/

V2V-Translate

Running

V2V-Translate / my_tts.py

Add app.py

d000c57 6 months ago

1.45 kB

	from transformers import BarkModel, AutoProcessor
	import torch
	import scipy

	def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
	"""
	Convert text to speech using Bark model

	Args:
	text (str): Text to convert to speech
	voice_preset (str): Voice preset to use for the speech synthesis

	Returns:
	torch.Tensor: Generated speech audio
	sampling_rate (int): Sampling rate of the generated audio
	"""
	# Check if CUDA is available and set device accordingly
	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	# Load the model and processor
	model = BarkModel.from_pretrained("suno/bark-small")
	processor = AutoProcessor.from_pretrained("suno/bark")

	# Move model and inputs to the appropriate device
	model = model.to(device)
	inputs = processor(text=text, voice_preset=voice_preset)
	for key, value in inputs.items():
	inputs[key] = value.to(device)

	# prepare the inputs
	inputs = processor(text, voice_preset=voice_preset)
	for key, value in inputs.items():
	inputs[key] = inputs[key].to(device)

	# generate speech
	speech_output = model.generate(**inputs)
	sampling_rate = model.generation_config.sample_rate
	path = "output_audio.wav"

	# Save the generated audio to a fileimport scipy
	scipy.io.wavfile.write("output_audio.wav", rate=sampling_rate, data=speech_output[0].cpu().numpy())

	return path