~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/utextwrap.py

Committer: INADA Naoki
Date: 2011-05-08 16:11:56 UTC
mto: This revision was merged to the branch mainline in revision 5874.
Revision ID: songofacandy@gmail.com-20110508161156-d962p9m9gdszbdgu

Add keyword parameter 'ambiguous_width' that specifies width for character
when unicodedata.east_asian_width(c)=='A'.

files modified:
bzrlib/tests/test_utextwrap.py

bzrlib/utextwrap.py

Show diffs side-by-side

added added

removed removed

bzrlib/utextwrap.py

__all__ = ["UTextWrapper", "fill", "wrap"]

def _unicode_char_width(uc):

"""Return width of character `uc`.

:param: uc Single unicode character.

"""

# 'A' means width of the character is not be able to determine.

# We assume that it's width is 2 because longer wrap may over

# terminal width but shorter wrap may be acceptable.

return (_eawidth(uc) in 'FWA' and 2) or 1

def _width(s):

"""Returns width for s.

When s is unicode, take care of east asian width.

When s is bytes, treat all byte is single width character.

NOTE: Supporting byte string should be removed with Python 3.

"""

assert isinstance(s, unicode)

return sum(_unicode_char_width(c) for c in s)

def _cut(s, width):

"""Returns head and rest of s. (head+rest == s)

Head is large as long as _width(head) <= width.

"""

assert isinstance(s, unicode)

w = 0

charwidth = _unicode_char_width

for pos, c in enumerate(s):

w += charwidth(c)

if w > width:

return s[:pos], s[pos:]

return s, u''

class UTextWrapper(textwrap.TextWrapper):

"""

Extend TextWrapper for Unicode.

This textwrapper handles east asian double width and split word

even if !break_long_words when word contains double width

characters.

:param ambiguous_width: (keyword argument) width for character when

unicodedata.east_asian_width(c) == 'A'

(default: 2)

"""

def __init__(self, width=None, **kwargs):

if width is None:

width = (osutils.terminal_width() or

osutils.default_terminal_width) - 1

ambi_width = kwargs.pop('ambiguous_width', 2)

if ambi_width == 1:

self._east_asian_doublewidth = 'FW'

elif ambi_width == 2:

self._east_asian_doublewidth = 'FWA'

else:

raise ValueError("ambiguous_width should be 1 or 2")

# No drop_whitespace param before Python 2.6 it was always dropped

if sys.version_info < (2, 6):

self.drop_whitespace = kwargs.pop("drop_whitespace", True)

raise ValueError("TextWrapper version must drop whitespace")

textwrap.TextWrapper.__init__(self, width, **kwargs)

def _unicode_char_width(self, uc):

"""Return width of character `uc`.

:param: uc Single unicode character.

"""

# 'A' means width of the character is not be able to determine.

# We assume that it's width is 2 because longer wrap may over

# terminal width but shorter wrap may be acceptable.

return (_eawidth(uc) in self._east_asian_doublewidth and 2) or 1

def _width(self, s):

"""Returns width for s.

When s is unicode, take care of east asian width.

When s is bytes, treat all byte is single width character.

"""

assert isinstance(s, unicode)

charwidth = self._unicode_char_width

return sum(charwidth(c) for c in s)

def _cut(self, s, width):

"""Returns head and rest of s. (head+rest == s)

Head is large as long as _width(head) <= width.

"""

assert isinstance(s, unicode)

w = 0

charwidth = self._unicode_char_width

for pos, c in enumerate(s):

w += charwidth(c)

if w > width:

return s[:pos], s[pos:]

return s, u''

100

101

def _handle_long_word(self, chunks, cur_line, cur_len, width):

102

# Figure out when indent is larger than the specified width, and make

103

# sure at least one character is stripped off on every pass

104

if width < 2:

space_left = chunks[-1] and _width(chunks[-1][0]) or 1

105

space_left = chunks[-1] and self._width(chunks[-1][0]) or 1

106

else:

107

space_left = width - cur_len

108

109

# If we're allowed to break long words, then do so: put as much

110

# of the next chunk onto the current line as will fit.

100

111

if self.break_long_words:

101

head, rest = _cut(chunks[-1], space_left)

112

head, rest = self._cut(chunks[-1], space_left)

102

113

cur_line.append(head)

103

114

if rest:

104

115

chunks[-1] = rest

149

160

150

161

while chunks:

151

162

# Use _width instead of len for east asian width

152

# l = len(chunks[-1])

153

l = _width(chunks[-1])

163

l = self._width(chunks[-1])

154

164

155

165

# Can at least squeeze this chunk onto the current line.

156

166

if cur_len + l <= width:

163

173

164

174

# The current line is full, and the next chunk is too big to

165

175

# fit on *any* line (not just this one).

166

if chunks and _width(chunks[-1]) > width:

176

if chunks and self._width(chunks[-1]) > width:

167

177

self._handle_long_word(chunks, cur_line, cur_len, width)

168

178

169

179

# If the last chunk on this line is all whitespace, drop it.

Older »