Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -437,7 +437,6 @@ import torch
|
|
437 |
from TTS.api import TTS
|
438 |
from nltk.tokenize import sent_tokenize
|
439 |
from pydub import AudioSegment
|
440 |
-
# Assuming split_long_sentence and wipe_folder are defined elsewhere in your code
|
441 |
|
442 |
default_target_voice_path = "default_voice.wav" # Ensure this is a valid path
|
443 |
default_language_code = "en"
|
@@ -483,18 +482,30 @@ def combine_wav_files(input_directory, output_directory, file_name):
|
|
483 |
print(f"Combined audio saved to {output_file_path}")
|
484 |
|
485 |
# Function to split long strings into parts
|
486 |
-
|
|
|
487 |
"""
|
488 |
Splits a sentence into parts based on length or number of pauses without recursion.
|
489 |
|
490 |
:param sentence: The sentence to split.
|
491 |
-
:param
|
492 |
:param max_pauses: Maximum allowed number of pauses in a sentence.
|
493 |
:return: A list of sentence parts that meet the criteria.
|
494 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
parts = []
|
496 |
-
while len(sentence) > max_length or sentence.count(
|
497 |
-
possible_splits = [i for i, char in enumerate(sentence) if char in
|
498 |
if possible_splits:
|
499 |
# Find the best place to split the sentence, preferring the last possible split to keep parts longer
|
500 |
split_at = possible_splits[-1] + 1
|
@@ -559,7 +570,7 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, targe
|
|
559 |
chapter_text = file.read()
|
560 |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
|
561 |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
|
562 |
-
fragments = split_long_sentence(sentence,
|
563 |
for fragment in fragments:
|
564 |
if fragment != "":
|
565 |
print(f"Generating fragment: {fragment}...")
|
@@ -606,7 +617,7 @@ def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, tar
|
|
606 |
chapter_text = file.read()
|
607 |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
|
608 |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
|
609 |
-
fragments = split_long_sentence(sentence,
|
610 |
for fragment in fragments:
|
611 |
if fragment != "":
|
612 |
print(f"Generating fragment: {fragment}...")
|
|
|
437 |
from TTS.api import TTS
|
438 |
from nltk.tokenize import sent_tokenize
|
439 |
from pydub import AudioSegment
|
|
|
440 |
|
441 |
default_target_voice_path = "default_voice.wav" # Ensure this is a valid path
|
442 |
default_language_code = "en"
|
|
|
482 |
print(f"Combined audio saved to {output_file_path}")
|
483 |
|
484 |
# Function to split long strings into parts
|
485 |
+
# Modify the function to handle special cases for Chinese, Italian, and default for others
|
486 |
+
def split_long_sentence(sentence, language='en', max_pauses=10):
|
487 |
"""
|
488 |
Splits a sentence into parts based on length or number of pauses without recursion.
|
489 |
|
490 |
:param sentence: The sentence to split.
|
491 |
+
:param language: The language of the sentence (default is English).
|
492 |
:param max_pauses: Maximum allowed number of pauses in a sentence.
|
493 |
:return: A list of sentence parts that meet the criteria.
|
494 |
"""
|
495 |
+
# Adjust the max_length and punctuation symbols based on language
|
496 |
+
if language == 'zh-cn':
|
497 |
+
max_length = 84 # Chinese-specific max length
|
498 |
+
punctuation = [',', '。', ';', '!', '?'] # Chinese-specific punctuation
|
499 |
+
elif language == 'it':
|
500 |
+
max_length = 213 # Italian-specific max length
|
501 |
+
punctuation = [',', ';', '.'] # Standard punctuation
|
502 |
+
else:
|
503 |
+
max_length = 249 # Default max length for other languages
|
504 |
+
punctuation = [',', ';', '.'] # Default punctuation
|
505 |
+
|
506 |
parts = []
|
507 |
+
while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
|
508 |
+
possible_splits = [i for i, char in enumerate(sentence) if char in punctuation and i < max_length]
|
509 |
if possible_splits:
|
510 |
# Find the best place to split the sentence, preferring the last possible split to keep parts longer
|
511 |
split_at = possible_splits[-1] + 1
|
|
|
570 |
chapter_text = file.read()
|
571 |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
|
572 |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
|
573 |
+
fragments = split_long_sentence(sentence, language=language)
|
574 |
for fragment in fragments:
|
575 |
if fragment != "":
|
576 |
print(f"Generating fragment: {fragment}...")
|
|
|
617 |
chapter_text = file.read()
|
618 |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
|
619 |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
|
620 |
+
fragments = split_long_sentence(sentence, language=language)
|
621 |
for fragment in fragments:
|
622 |
if fragment != "":
|
623 |
print(f"Generating fragment: {fragment}...")
|