Spaces:
Runtime error
Runtime error
"""Split text to limit chars per chunk. | |
Converted from splitText.js. | |
""" | |
# pylint: disable=invalid-name, broad-except | |
from typing import Optional | |
from logzero import logger | |
limit_ = 4900 | |
def split_text(text: str, limit: Optional[int] = None): | |
"""Split text to limit chars per chunk.""" | |
if not text: # handle text="" | |
return [text] | |
if limit is None: | |
limit = limit_ | |
else: | |
try: | |
limit = int(limit) | |
except Exception as exc: | |
logger.error(exc) | |
limit = limit_ | |
if limit < 1: | |
limit = limit_ | |
chunks = [] | |
paragraphs = text.splitlines() | |
current_chunk = paragraphs[0] + "\n" | |
for paragraph in paragraphs[1:]: | |
if len(current_chunk) + len(paragraph) <= limit: | |
# Add paragraph to current chunk | |
current_chunk += paragraph + "\n" | |
else: | |
# Save current chunk and start a new one with this paragraph | |
chunks.append(current_chunk) | |
current_chunk = paragraph + "\n" | |
# Add the last chunk | |
chunks.append(current_chunk) | |
# remove extra \n and possible blank in the beginning | |
# return list(filter(lambda _: _.strip(), map(lambda _: _.strip(), chunks))) | |
return chunks | |