32
32
__all__ = ["UTextWrapper", "fill", "wrap"]
34
def _unicode_char_width(uc):
35
"""Return width of character `uc`.
37
:param: uc Single unicode character.
39
# 'A' means width of the character is not be able to determine.
40
# We assume that it's width is 2 because longer wrap may over
41
# terminal width but shorter wrap may be acceptable.
42
return (_eawidth(uc) in 'FWA' and 2) or 1
45
"""Returns width for s.
47
When s is unicode, take care of east asian width.
48
When s is bytes, treat all byte is single width character.
50
NOTE: Supporting byte string should be removed with Python 3.
52
assert isinstance(s, unicode)
53
return sum(_unicode_char_width(c) for c in s)
56
"""Returns head and rest of s. (head+rest == s)
58
Head is large as long as _width(head) <= width.
60
assert isinstance(s, unicode)
62
charwidth = _unicode_char_width
63
for pos, c in enumerate(s):
66
return s[:pos], s[pos:]
70
34
class UTextWrapper(textwrap.TextWrapper):
72
36
Extend TextWrapper for Unicode.
74
38
This textwrapper handles east asian double width and split word
75
39
even if !break_long_words when word contains double width
42
:param ambiguous_width: (keyword argument) width for character when
43
unicodedata.east_asian_width(c) == 'A'
79
47
def __init__(self, width=None, **kwargs):
81
49
width = (osutils.terminal_width() or
82
50
osutils.default_terminal_width) - 1
52
ambi_width = kwargs.pop('ambiguous_width', 2)
54
self._east_asian_doublewidth = 'FW'
56
self._east_asian_doublewidth = 'FWA'
58
raise ValueError("ambiguous_width should be 1 or 2")
83
60
# No drop_whitespace param before Python 2.6 it was always dropped
84
61
if sys.version_info < (2, 6):
85
62
self.drop_whitespace = kwargs.pop("drop_whitespace", True)
87
64
raise ValueError("TextWrapper version must drop whitespace")
88
65
textwrap.TextWrapper.__init__(self, width, **kwargs)
67
def _unicode_char_width(self, uc):
68
"""Return width of character `uc`.
70
:param: uc Single unicode character.
72
# 'A' means width of the character is not be able to determine.
73
# We assume that it's width is 2 because longer wrap may over
74
# terminal width but shorter wrap may be acceptable.
75
return (_eawidth(uc) in self._east_asian_doublewidth and 2) or 1
78
"""Returns width for s.
80
When s is unicode, take care of east asian width.
81
When s is bytes, treat all byte is single width character.
83
assert isinstance(s, unicode)
84
charwidth = self._unicode_char_width
85
return sum(charwidth(c) for c in s)
87
def _cut(self, s, width):
88
"""Returns head and rest of s. (head+rest == s)
90
Head is large as long as _width(head) <= width.
92
assert isinstance(s, unicode)
94
charwidth = self._unicode_char_width
95
for pos, c in enumerate(s):
98
return s[:pos], s[pos:]
90
101
def _handle_long_word(self, chunks, cur_line, cur_len, width):
91
102
# Figure out when indent is larger than the specified width, and make
92
103
# sure at least one character is stripped off on every pass
94
space_left = chunks[-1] and _width(chunks[-1][0]) or 1
105
space_left = chunks[-1] and self._width(chunks[-1][0]) or 1
96
107
space_left = width - cur_len
98
109
# If we're allowed to break long words, then do so: put as much
99
110
# of the next chunk onto the current line as will fit.
100
111
if self.break_long_words:
101
head, rest = _cut(chunks[-1], space_left)
112
head, rest = self._cut(chunks[-1], space_left)
102
113
cur_line.append(head)
104
115
chunks[-1] = rest
164
174
# The current line is full, and the next chunk is too big to
165
175
# fit on *any* line (not just this one).
166
if chunks and _width(chunks[-1]) > width:
176
if chunks and self._width(chunks[-1]) > width:
167
177
self._handle_long_word(chunks, cur_line, cur_len, width)
169
179
# If the last chunk on this line is all whitespace, drop it.