gradio-deepl / split_text.py
ffreemt
Update split_text, >5000 chars OK
22ba884
"""Split text to limit chars per chunk.
Converted from splitText.js.
"""
# pylint: disable=invalid-name, broad-except
from typing import Optional
from logzero import logger
limit_ = 4900
def split_text(text: str, limit: Optional[int] = None):
"""Split text to limit chars per chunk."""
if not text: # handle text=""
return [text]
if limit is None:
limit = limit_
else:
try:
limit = int(limit)
except Exception as exc:
logger.error(exc)
limit = limit_
if limit < 1:
limit = limit_
chunks = []
paragraphs = text.splitlines()
current_chunk = paragraphs[0] + "\n"
for paragraph in paragraphs[1:]:
if len(current_chunk) + len(paragraph) <= limit:
# Add paragraph to current chunk
current_chunk += paragraph + "\n"
else:
# Save current chunk and start a new one with this paragraph
chunks.append(current_chunk)
current_chunk = paragraph + "\n"
# Add the last chunk
chunks.append(current_chunk)
# remove extra \n and possible blank in the beginning
# return list(filter(lambda _: _.strip(), map(lambda _: _.strip(), chunks)))
return chunks