Spaces:

Sambhavnoobcoder
/

PDF-text-extractor_sd_1

Runtime error

App Files Files Community

PDF-text-extractor_sd_1 / app.py

Sambhavnoobcoder

put speaker choice 92

58791c2 about 2 years ago

raw

history blame

2.26 kB

	import gradio as gr
	from pdfminer.high_level import extract_text
	import logging
	from typing import cast
	from balacoon_tts import TTS
	from huggingface_hub import hf_hub_download, list_repo_files

	# Global tts module, initialized from a model selected
	tts = None

	def read_pdf(file):
	with open(file.name, "rb") as f:
	text = extract_text(f)
	return text

	def set_model(model_name_str):
	"""
	Gets value from `model_name`, loads the model,
	re-initializes the tts object, and gets a list of
	speakers that the model supports and sets them to `speaker`.
	"""
	model_path = hf_hub_download(repo_id="balacoon/tts", filename=model_name_str)
	global tts
	tts = TTS(model_path)
	speakers = tts.get_speakers()
	value = speakers[-1]
	return speakers, value

	def main():
	logging.basicConfig(level=logging.INFO)
	repo_files = list_repo_files(repo_id="balacoon/tts")
	model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
	model_name_dropdown = gr.inputs.Dropdown(label="Model", choices=model_files)
	speaker_dropdown = gr.inputs.Dropdown(label="Speaker", choices=[92])

	file_input = gr.inputs.File(label="Select a PDF File", type="file")
	text = gr.outputs.Textbox()

	def synthesize_audio(file, model_name_str, speaker_str):
	"""
	Gets the selected PDF `file`, model name from `model_name`,
	and speaker name from `speaker`. Synthesizes the audio waveform
	from the text extracted from the PDF and returns it.
	"""
	if file is None or file.name == "":
	logging.info("No file selected.")
	return None

	text_str = read_pdf(file)
	if len(text_str) > 1024:
	text_str = text_str[:1024]

	global tts
	samples = cast(TTS, tts).synthesize(text_str, speaker_str)
	return (cast(TTS, tts).get_sampling_rate(), samples)

	audio = gr.outputs.Audio(label="Generated Audio", type="numpy")

	iface = gr.Interface(
	fn=synthesize_audio,
	inputs=[file_input, model_name_dropdown, speaker_dropdown],
	outputs=audio,
	title="PDF TO SPEECH CONVERTER",
	layout="rows",
	debug=True
	)

	iface.launch()


	if __name__ == "__main__":
	main()