File size: 5,882 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
"""
This module provides functions for justifying Unicode text in a monospaced
display such as a terminal.
We used to have our own implementation here, but now we mostly rely on
the 'wcwidth' library.
"""
from unicodedata import normalize
from wcwidth import wcswidth, wcwidth
from ftfy.fixes import remove_terminal_escapes
def character_width(char: str) -> int:
r"""
Determine the width that a character is likely to be displayed as in
a monospaced terminal. The width for a printable character will
always be 0, 1, or 2.
Nonprintable or control characters will return -1, a convention that comes
from wcwidth.
>>> character_width('θ»')
2
>>> character_width('A')
1
>>> character_width('\N{ZERO WIDTH JOINER}')
0
>>> character_width('\n')
-1
"""
return int(wcwidth(char))
def monospaced_width(text: str) -> int:
r"""
Return the number of character cells that this string is likely to occupy
when displayed in a monospaced, modern, Unicode-aware terminal emulator.
We refer to this as the "display width" of the string.
This can be useful for formatting text that may contain non-spacing
characters, or CJK characters that take up two character cells.
Returns -1 if the string contains a non-printable or control character.
>>> monospaced_width('γ‘γγΆε°θΏγ')
12
>>> len('γ‘γγΆε°θΏγ')
6
>>> monospaced_width('owl\N{SOFT HYPHEN}flavored')
11
>>> monospaced_width('example\x80')
-1
A more complex example: The Korean word 'ibnida' can be written with 3
pre-composed characters or 7 jamo. Either way, it *looks* the same and
takes up 6 character cells.
>>> monospaced_width('μ
λλ€')
6
>>> monospaced_width('\u110b\u1175\u11b8\u1102\u1175\u1103\u1161')
6
The word "blue" with terminal escapes to make it blue still takes up only
4 characters, when shown as intended.
>>> monospaced_width('\x1b[34mblue\x1b[m')
4
"""
# NFC-normalize the text first, so that we don't need special cases for
# Hangul jamo.
#
# Remove terminal escapes before calculating width, because if they are
# displayed as intended, they will have zero width.
return int(wcswidth(remove_terminal_escapes(normalize("NFC", text))))
def display_ljust(text: str, width: int, fillchar: str = " ") -> str:
"""
Return `text` left-justified in a Unicode string whose display width,
in a monospaced terminal, should be at least `width` character cells.
The rest of the string will be padded with `fillchar`, which must be
a width-1 character.
"Left" here means toward the beginning of the string, which may actually
appear on the right in an RTL context. This is similar to the use of the
word "left" in "left parenthesis".
>>> lines = ['Table flip', '(β―Β°β‘Β°)β―οΈ΅ β»ββ»', 'γ‘γγΆε°θΏγ']
>>> for line in lines:
... print(display_ljust(line, 20, 'β'))
Table flipββββββββββ
(β―Β°β‘Β°)β―οΈ΅ β»ββ»βββββββ
γ‘γγΆε°θΏγββββββββ
This example, and the similar ones that follow, should come out justified
correctly when viewed in a monospaced terminal. It will probably not look
correct if you're viewing this code or documentation in a Web browser.
"""
if character_width(fillchar) != 1:
raise ValueError("The padding character must have display width 1")
text_width = monospaced_width(text)
if text_width == -1:
# There's a control character here, so just don't add padding
return text
padding = max(0, width - text_width)
return text + fillchar * padding
def display_rjust(text: str, width: int, fillchar: str = " ") -> str:
"""
Return `text` right-justified in a Unicode string whose display width,
in a monospaced terminal, should be at least `width` character cells.
The rest of the string will be padded with `fillchar`, which must be
a width-1 character.
"Right" here means toward the end of the string, which may actually be on
the left in an RTL context. This is similar to the use of the word "right"
in "right parenthesis".
>>> lines = ['Table flip', '(β―Β°β‘Β°)β―οΈ΅ β»ββ»', 'γ‘γγΆε°θΏγ']
>>> for line in lines:
... print(display_rjust(line, 20, 'β'))
ββββββββββTable flip
βββββββ(β―Β°β‘Β°)β―οΈ΅ β»ββ»
ββββββββγ‘γγΆε°θΏγ
"""
if character_width(fillchar) != 1:
raise ValueError("The padding character must have display width 1")
text_width = monospaced_width(text)
if text_width == -1:
return text
padding = max(0, width - text_width)
return fillchar * padding + text
def display_center(text: str, width: int, fillchar: str = " ") -> str:
"""
Return `text` centered in a Unicode string whose display width, in a
monospaced terminal, should be at least `width` character cells. The rest
of the string will be padded with `fillchar`, which must be a width-1
character.
>>> lines = ['Table flip', '(β―Β°β‘Β°)β―οΈ΅ β»ββ»', 'γ‘γγΆε°θΏγ']
>>> for line in lines:
... print(display_center(line, 20, 'β'))
βββββTable flipβββββ
βββ(β―Β°β‘Β°)β―οΈ΅ β»ββ»ββββ
ββββγ‘γγΆε°θΏγββββ
"""
if character_width(fillchar) != 1:
raise ValueError("The padding character must have display width 1")
text_width = monospaced_width(text)
if text_width == -1:
return text
padding = max(0, width - text_width)
left_padding = padding // 2
right_padding = padding - left_padding
return fillchar * left_padding + text + fillchar * right_padding
|