15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
# UTextWrapper._handle_long_word, UTextWrapper._wrap_chunks,
19
# wrap and fill is copied from Python's textwrap module
20
# (under PSF license) and modified for support CJK.
20
23
from unicodedata import east_asian_width as _eawidth
25
from bzrlib import osutils
22
27
__all__ = ["UTextWrapper", "fill", "wrap"]
38
43
w += (c in 'FWA' and 2) or 1
41
def _break_cjkword(word, width):
42
"""Split `word` by `width`.
44
Returns a tuple contains 2 strings. First string is head of
45
`word` that's length is less than `width`. Second string is
48
The border of head and rest is next to double width character.
49
Because spaces is not used as word separator on CJK.
51
When ``_width(word) < width``, returns ``(word, '')``.
52
When can't split anywhere, returns ``('', word)``.
47
"""Returns head and rest of s. (head+rest == s).
49
Head is large as long as _width(head) <= width.
51
if isinstance(s, str):
52
return s[:width], s[width:]
53
assert isinstance(s, unicode)
55
for pos, c in enumerate(word):
62
if pos>0 and _width(word[pos]) == 2:
63
# "sssDDD" and pos=3 => "sss", "DDD" (D is double width)
64
return word[:pos], word[pos:]
65
# "DDDssss" and pos=4 => "DDD", "ssss"
66
while pos > 0 and _width(word[pos-1]) != 2:
70
return word[:pos], word[pos:]
55
for pos, c in enumerate(s):
56
w += (_eawidth(c) in 'FWA' and 2) or 1
58
return s[:pos], s[pos:]
73
62
class UTextWrapper(textwrap.TextWrapper):
85
74
textwrap.TextWrapper.__init__(self, width, **kwargs)
87
76
def _handle_long_word(self, chunks, cur_line, cur_len, width):
88
head, rest = _break_cjkword(chunks[-1], width)
77
# Figure out when indent is larger than the specified width, and make
78
# sure at least one character is stripped off on every pass
80
space_left = chunks[-1] and _width(chunks[-1][0]) or 1
82
space_left = width - cur_len
84
# If we're allowed to break long words, then do so: put as much
85
# of the next chunk onto the current line as will fit.
86
if self.break_long_words:
87
head, rest = _cut(chunks[-1], space_left)
95
textwrap.TextWrapper._handle_long_word(
96
self, chunks, cur_line, cur_len, width)
94
# Otherwise, we have to preserve the long word intact. Only add
95
# it to the current line if there's nothing already there --
96
# that minimizes how much we violate the width constraint.
98
cur_line.append(chunks.pop())
100
# If we're not allowed to break long words, and there's already
101
# text on the current line, do nothing. Next time through the
102
# main loop of _wrap_chunks(), we'll wind up here again, but
103
# cur_len will be zero, so the next line will be entirely
104
# devoted to the long word that we can't handle right now.
98
106
def _wrap_chunks(self, chunks):
138
146
# Nope, this line is full.
141
head, rest = _break_cjkword(chunks[-1], width-cur_len)
143
cur_line.append(head)
144
cur_len += _width(head)
149
150
# The current line is full, and the next chunk is too big to
166
def _split(self, text):
167
chunks = textwrap.TextWrapper._split(self, unicode(text))
168
cjk_split_chunks = []
170
assert chunk # TextWrapper._split removes empty chunk
172
for pos, char in enumerate(chunk):
173
# Treats all asian character are line breakable.
174
# But it is not true because line breaking is
175
# prohibited around some characters.
176
# See UAX # 14 "UNICODE LINE BREAKING ALGORITHM"
177
if _eawidth(char) in 'FWA':
179
cjk_split_chunks.append(chunk[prev_pos:pos])
180
cjk_split_chunks.append(char)
182
if prev_pos < len(chunk):
183
cjk_split_chunks.append(chunk[prev_pos:])
184
return cjk_split_chunks
165
186
def wrap(self, text):
166
187
# ensure text is unicode
167
188
return textwrap.TextWrapper.wrap(self, unicode(text))