Spaces:
Runtime error
Runtime error
added Arabic punctuation to sentence splitting
Browse files
app.py
CHANGED
@@ -537,12 +537,6 @@ def split_long_sentence(sentence, language='en', max_pauses=10):
|
|
537 |
#Get the Max character length for the selected language -2 : with a default of 248 if no language is found
|
538 |
max_length = (char_limits.get(language, 250)-2)
|
539 |
|
540 |
-
## Adjust the max_length and punctuation symbols based on language
|
541 |
-
#if language == 'zh-cn':
|
542 |
-
# punctuation = [',', '。', ';', '!', '?'] # Chinese-specific punctuation
|
543 |
-
#else:
|
544 |
-
# punctuation = [',', ';', '.'] # Default punctuation
|
545 |
-
|
546 |
# Adjust the pause punctuation symbols based on language
|
547 |
if language == 'zh-cn':
|
548 |
punctuation = [',', '。', ';', '?', '!'] # Chinese-specific pause punctuation including sentence-ending marks
|
@@ -550,12 +544,15 @@ def split_long_sentence(sentence, language='en', max_pauses=10):
|
|
550 |
punctuation = ['、', '。', ';', '?', '!'] # Japanese-specific pause punctuation
|
551 |
elif language == 'ko':
|
552 |
punctuation = [',', '。', ';', '?', '!'] # Korean-specific pause punctuation
|
|
|
|
|
553 |
elif language == 'en':
|
554 |
punctuation = [',', ';', '.'] # English-specific pause punctuation
|
555 |
else:
|
556 |
# Default pause punctuation for other languages (es, fr, de, it, pt, pl, cs, ru, nl, tr, hu)
|
557 |
punctuation = [',', '.', ';', ':', '?', '!']
|
558 |
|
|
|
559 |
|
560 |
parts = []
|
561 |
while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
|
|
|
537 |
#Get the Max character length for the selected language -2 : with a default of 248 if no language is found
|
538 |
max_length = (char_limits.get(language, 250)-2)
|
539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
540 |
# Adjust the pause punctuation symbols based on language
|
541 |
if language == 'zh-cn':
|
542 |
punctuation = [',', '。', ';', '?', '!'] # Chinese-specific pause punctuation including sentence-ending marks
|
|
|
544 |
punctuation = ['、', '。', ';', '?', '!'] # Japanese-specific pause punctuation
|
545 |
elif language == 'ko':
|
546 |
punctuation = [',', '。', ';', '?', '!'] # Korean-specific pause punctuation
|
547 |
+
elif language == 'ar':
|
548 |
+
punctuation = ['،', '؛', '؟', '!', '·', '؛', '.'] # Arabic-specific punctuation
|
549 |
elif language == 'en':
|
550 |
punctuation = [',', ';', '.'] # English-specific pause punctuation
|
551 |
else:
|
552 |
# Default pause punctuation for other languages (es, fr, de, it, pt, pl, cs, ru, nl, tr, hu)
|
553 |
punctuation = [',', '.', ';', ':', '?', '!']
|
554 |
|
555 |
+
|
556 |
|
557 |
parts = []
|
558 |
while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
|