~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/utextwrap.py

Committer: Canonical.com Patch Queue Manager
Date: 2011-04-19 01:07:44 UTC
mfrom: (5757.7.11 knitpackrepo-6)
Revision ID: pqm@pqm.ubuntu.com-20110419010744-ns5qnlw97wrrva7s

(jelmer) Split KnitPackRepository-specific bits out of Packer class into
KnitPacker. (Jelmer Vernooij)

files removed:
bzrlib/tests/per_repository_vf/test_check.py

bzrlib/tests/per_repository_vf/test_reconcile.py

bzrlib/tests/test_utextwrap.py

bzrlib/utextwrap.py

files renamed:
bzrlib/tests/per_repository_vf/helpers.py => bzrlib/tests/per_repository/helpers.py

files modified:
bzrlib/__init__.py

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/commands.py

bzrlib/dirstate.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/mutabletree.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitpack_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/smart/branch.py

bzrlib/smart/request.py

bzrlib/testament.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_controldir/test_controldir.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository_vf/__init__.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_workingtree.py

bzrlib/transform.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/bug-handling.txt

doc/developers/contribution-quickstart.txt

doc/en/release-notes/bzr-2.3.txt

doc/en/release-notes/bzr-2.4.txt

doc/en/whats-new/whats-new-in-2.4.txt

Show diffs side-by-side

added added

removed removed

bzrlib/utextwrap.py

# UTextWrapper._handle_long_word, UTextWrapper._wrap_chunks,

# UTextWrapper._fix_sentence_endings, wrap and fill is copied from Python's

# textwrap module (under PSF license) and modified for support CJK.

# Original Copyright for these functions:

# Written by Greg Ward <gward@python.net>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

import sys

import textwrap

from unicodedata import east_asian_width as _eawidth

from bzrlib import osutils

__all__ = ["UTextWrapper", "fill", "wrap"]

class UTextWrapper(textwrap.TextWrapper):

"""

Extend TextWrapper for Unicode.

This textwrapper handles east asian double width and split word

even if !break_long_words when word contains double width

characters.

:param ambiguous_width: (keyword argument) width for character when

unicodedata.east_asian_width(c) == 'A'

(default: 1)

Limitations:

* expand_tabs doesn't fixed. It uses len() for calculating width

of string on left of TAB.

* Handles one codeunit as a single character having 1 or 2 width.

This is not correct when there are surrogate pairs, combined

characters or zero-width characters.

* Treats all asian character are line breakable. But it is not

true because line breaking is prohibited around some characters.

(For example, breaking before punctation mark is prohibited.)

See UAX # 14 "UNICODE LINE BREAKING ALGORITHM"

"""

def __init__(self, width=None, **kwargs):

if width is None:

width = (osutils.terminal_width() or

osutils.default_terminal_width) - 1

ambi_width = kwargs.pop('ambiguous_width', 1)

if ambi_width == 1:

self._east_asian_doublewidth = 'FW'

elif ambi_width == 2:

self._east_asian_doublewidth = 'FWA'

else:

raise ValueError("ambiguous_width should be 1 or 2")

# No drop_whitespace param before Python 2.6 it was always dropped

if sys.version_info < (2, 6):

self.drop_whitespace = kwargs.pop("drop_whitespace", True)

if not self.drop_whitespace:

raise ValueError("TextWrapper version must drop whitespace")

textwrap.TextWrapper.__init__(self, width, **kwargs)

def _unicode_char_width(self, uc):

"""Return width of character `uc`.

:param: uc Single unicode character.

"""

# 'A' means width of the character is not be able to determine.

# We assume that it's width is 2 because longer wrap may over

# terminal width but shorter wrap may be acceptable.

return (_eawidth(uc) in self._east_asian_doublewidth and 2) or 1

def _width(self, s):

"""Returns width for s.

When s is unicode, take care of east asian width.

When s is bytes, treat all byte is single width character.

"""

charwidth = self._unicode_char_width

return sum(charwidth(c) for c in s)

def _cut(self, s, width):

"""Returns head and rest of s. (head+rest == s)

100

Head is large as long as _width(head) <= width.

101

"""

102

w = 0

103

charwidth = self._unicode_char_width

104

for pos, c in enumerate(s):

105

w += charwidth(c)

106

if w > width:

107

return s[:pos], s[pos:]

108

return s, u''

109

110

def _fix_sentence_endings(self, chunks):

111

"""_fix_sentence_endings(chunks : [string])

112

113

Correct for sentence endings buried in 'chunks'. Eg. when the

114

original text contains "... foo.\nBar ...", munge_whitespace()

115

and split() will convert that to [..., "foo.", " ", "Bar", ...]

116

which has one too few spaces; this method simply changes the one

117

space to two.

118

119

Note: This function is copied from textwrap.TextWrap and modified

120

to use unicode always.

121

"""

122

i = 0

123

L = len(chunks)-1

124

patsearch = self.sentence_end_re.search

125

while i < L:

126

if chunks[i+1] == u" " and patsearch(chunks[i]):

127

chunks[i+1] = u" "

128

i += 2

129

else:

130

i += 1

131

132

def _handle_long_word(self, chunks, cur_line, cur_len, width):

133

# Figure out when indent is larger than the specified width, and make

134

# sure at least one character is stripped off on every pass

135

if width < 2:

136

space_left = chunks[-1] and self._width(chunks[-1][0]) or 1

137

else:

138

space_left = width - cur_len

139

140

# If we're allowed to break long words, then do so: put as much

141

# of the next chunk onto the current line as will fit.

142

if self.break_long_words:

143

head, rest = self._cut(chunks[-1], space_left)

144

cur_line.append(head)

145

if rest:

146

chunks[-1] = rest

147

else:

148

del chunks[-1]

149

150

# Otherwise, we have to preserve the long word intact. Only add

151

# it to the current line if there's nothing already there --

152

# that minimizes how much we violate the width constraint.

153

elif not cur_line:

154

cur_line.append(chunks.pop())

155

156

# If we're not allowed to break long words, and there's already

157

# text on the current line, do nothing. Next time through the

158

# main loop of _wrap_chunks(), we'll wind up here again, but

159

# cur_len will be zero, so the next line will be entirely

160

# devoted to the long word that we can't handle right now.

161

162

def _wrap_chunks(self, chunks):

163

lines = []

164

if self.width <= 0:

165

raise ValueError("invalid width %r (must be > 0)" % self.width)

166

167

# Arrange in reverse order so items can be efficiently popped

168

# from a stack of chucks.

169

chunks.reverse()

170

171

while chunks:

172

173

# Start the list of chunks that will make up the current line.

174

# cur_len is just the length of all the chunks in cur_line.

175

cur_line = []

176

cur_len = 0

177

178

# Figure out which static string will prefix this line.

179

if lines:

180

indent = self.subsequent_indent

181

else:

182

indent = self.initial_indent

183

184

# Maximum width for this line.

185

width = self.width - len(indent)

186

187

# First chunk on line is whitespace -- drop it, unless this

188

# is the very beginning of the text (ie. no lines started yet).

189

if self.drop_whitespace and chunks[-1].strip() == '' and lines:

190

del chunks[-1]

191

192

while chunks:

193

# Use _width instead of len for east asian width

194

l = self._width(chunks[-1])

195

196

# Can at least squeeze this chunk onto the current line.

197

if cur_len + l <= width:

198

cur_line.append(chunks.pop())

199

cur_len += l

200

201

# Nope, this line is full.

202

else:

203

break

204

205

# The current line is full, and the next chunk is too big to

206

# fit on *any* line (not just this one).

207

if chunks and self._width(chunks[-1]) > width:

208

self._handle_long_word(chunks, cur_line, cur_len, width)

209

210

# If the last chunk on this line is all whitespace, drop it.

211

if self.drop_whitespace and cur_line and not cur_line[-1].strip():

212

del cur_line[-1]

213

214

# Convert current line back to a string and store it in list

215

# of all lines (return value).

216

if cur_line:

217

lines.append(indent + u''.join(cur_line))

218

219

return lines

220

221

def _split(self, text):

222

chunks = textwrap.TextWrapper._split(self, unicode(text))

223

cjk_split_chunks = []

224

for chunk in chunks:

225

prev_pos = 0

226

for pos, char in enumerate(chunk):

227

if self._unicode_char_width(char) == 2:

228

if prev_pos < pos:

229

cjk_split_chunks.append(chunk[prev_pos:pos])

230

cjk_split_chunks.append(char)

231

prev_pos = pos+1

232

if prev_pos < len(chunk):

233

cjk_split_chunks.append(chunk[prev_pos:])

234

return cjk_split_chunks

235

236

def wrap(self, text):

237

# ensure text is unicode

238

return textwrap.TextWrapper.wrap(self, unicode(text))

239

240

# -- Convenience interface ---------------------------------------------

241

242

def wrap(text, width=None, **kwargs):

243

"""Wrap a single paragraph of text, returning a list of wrapped lines.

244

245

Reformat the single paragraph in 'text' so it fits in lines of no

246

more than 'width' columns, and return a list of wrapped lines. By

247

default, tabs in 'text' are expanded with string.expandtabs(), and

248

all other whitespace characters (including newline) are converted to

249

space. See TextWrapper class for available keyword args to customize

250

wrapping behaviour.

251

"""

252

return UTextWrapper(width=width, **kwargs).wrap(text)

253

254

def fill(text, width=None, **kwargs):

255

"""Fill a single paragraph of text, returning a new string.

256

257

Reformat the single paragraph in 'text' to fit in lines of no more

258

than 'width' columns, and return a new string containing the entire

259

wrapped paragraph. As with wrap(), tabs are expanded and other

260

whitespace characters converted to space. See TextWrapper class for

261

available keyword args to customize wrapping behaviour.

262

"""

263

return UTextWrapper(width=width, **kwargs).fill(text)

264

Older »