2052.3.2
by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical |
1 |
# Copyright (C) 2005 Canonical Ltd
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
2 |
#
|
2052.3.1
by John Arbash Meinel
Add tests to cleanup the copyright of all source files |
3 |
# This program is free software; you can redistribute it and/or modify
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
1553.5.6
by Martin Pool
Clean up comments |
16 |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
17 |
# \subsection{\emph{rio} - simple text metaformat}
|
18 |
#
|
|
19 |
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
|
|
20 |
#
|
|
21 |
# The stored data consists of a series of \emph{stanzas}, each of which contains
|
|
22 |
# \emph{fields} identified by an ascii name, with Unicode or string contents.
|
|
23 |
# The field tag is constrained to alphanumeric characters.
|
|
24 |
# There may be more than one field in a stanza with the same name.
|
|
25 |
#
|
|
26 |
# The format itself does not deal with character encoding issues, though
|
|
27 |
# the result will normally be written in Unicode.
|
|
28 |
#
|
|
29 |
# The format is intended to be simple enough that there is exactly one character
|
|
30 |
# stream representation of an object and vice versa, and that this relation
|
|
31 |
# will continue to hold for future versions of bzr.
|
|
32 |
||
33 |
import re |
|
34 |
||
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
35 |
from bzrlib.iterablefile import IterableFile |
36 |
||
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
37 |
# XXX: some redundancy is allowing to write stanzas in isolation as well as
|
38 |
# through a writer object.
|
|
39 |
||
40 |
class RioWriter(object): |
|
41 |
def __init__(self, to_file): |
|
42 |
self._soft_nl = False |
|
43 |
self._to_file = to_file |
|
44 |
||
45 |
def write_stanza(self, stanza): |
|
46 |
if self._soft_nl: |
|
2911.6.1
by Blake Winton
Change 'print >> f,'s to 'f.write('s. |
47 |
self._to_file.write('\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
48 |
stanza.write(self._to_file) |
49 |
self._soft_nl = True |
|
50 |
||
51 |
||
52 |
class RioReader(object): |
|
53 |
"""Read stanzas from a file as a sequence
|
|
54 |
|
|
55 |
to_file can be anything that can be enumerated as a sequence of
|
|
56 |
lines (with newlines.)
|
|
57 |
"""
|
|
58 |
def __init__(self, from_file): |
|
59 |
self._from_file = from_file |
|
60 |
||
61 |
def __iter__(self): |
|
62 |
while True: |
|
63 |
s = read_stanza(self._from_file) |
|
64 |
if s is None: |
|
65 |
break
|
|
66 |
else: |
|
67 |
yield s |
|
68 |
||
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
69 |
|
70 |
def rio_file(stanzas, header=None): |
|
71 |
"""Produce a rio IterableFile from an iterable of stanzas"""
|
|
72 |
def str_iter(): |
|
73 |
if header is not None: |
|
74 |
yield header + '\n' |
|
75 |
first_stanza = True |
|
76 |
for s in stanzas: |
|
77 |
if first_stanza is not True: |
|
78 |
yield '\n' |
|
79 |
for line in s.to_lines(): |
|
80 |
yield line |
|
81 |
first_stanza = False |
|
82 |
return IterableFile(str_iter()) |
|
83 |
||
84 |
||
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
85 |
def read_stanzas(from_file): |
86 |
while True: |
|
87 |
s = read_stanza(from_file) |
|
88 |
if s is None: |
|
89 |
break
|
|
90 |
else: |
|
91 |
yield s |
|
92 |
||
93 |
class Stanza(object): |
|
94 |
"""One stanza for rio.
|
|
95 |
||
96 |
Each stanza contains a set of named fields.
|
|
97 |
|
|
98 |
Names must be non-empty ascii alphanumeric plus _. Names can be repeated
|
|
99 |
within a stanza. Names are case-sensitive. The ordering of fields is
|
|
100 |
preserved.
|
|
101 |
||
102 |
Each field value must be either an int or a string.
|
|
103 |
"""
|
|
104 |
||
105 |
__slots__ = ['items'] |
|
106 |
||
107 |
def __init__(self, **kwargs): |
|
108 |
"""Construct a new Stanza.
|
|
109 |
||
110 |
The keyword arguments, if any, are added in sorted order to the stanza.
|
|
111 |
"""
|
|
112 |
self.items = [] |
|
113 |
if kwargs: |
|
114 |
for tag, value in sorted(kwargs.items()): |
|
115 |
self.add(tag, value) |
|
116 |
||
117 |
def add(self, tag, value): |
|
118 |
"""Append a name and value to the stanza."""
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
119 |
if not valid_tag(tag): |
120 |
raise ValueError("invalid tag %r" % (tag,)) |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
121 |
if isinstance(value, str): |
122 |
value = unicode(value) |
|
123 |
elif isinstance(value, unicode): |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
124 |
pass
|
125 |
## elif isinstance(value, (int, long)):
|
|
126 |
## value = str(value) # XXX: python2.4 without L-suffix
|
|
127 |
else: |
|
1553.5.7
by Martin Pool
rio.Stanza.add should raise TypeError on invalid types. |
128 |
raise TypeError("invalid type for rio value: %r of type %s" |
129 |
% (value, type(value))) |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
130 |
self.items.append((tag, value)) |
131 |
||
132 |
def __contains__(self, find_tag): |
|
133 |
"""True if there is any field in this stanza with the given tag."""
|
|
134 |
for tag, value in self.items: |
|
135 |
if tag == find_tag: |
|
136 |
return True |
|
137 |
return False |
|
138 |
||
139 |
def __len__(self): |
|
140 |
"""Return number of pairs in the stanza."""
|
|
141 |
return len(self.items) |
|
142 |
||
143 |
def __eq__(self, other): |
|
144 |
if not isinstance(other, Stanza): |
|
145 |
return False |
|
146 |
return self.items == other.items |
|
147 |
||
148 |
def __ne__(self, other): |
|
149 |
return not self.__eq__(other) |
|
150 |
||
151 |
def __repr__(self): |
|
152 |
return "Stanza(%r)" % self.items |
|
153 |
||
154 |
def iter_pairs(self): |
|
155 |
"""Return iterator of tag, value pairs."""
|
|
156 |
return iter(self.items) |
|
157 |
||
158 |
def to_lines(self): |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
159 |
"""Generate sequence of lines for external version of this file.
|
160 |
|
|
161 |
The lines are always utf-8 encoded strings.
|
|
162 |
"""
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
163 |
if not self.items: |
164 |
# max() complains if sequence is empty
|
|
165 |
return [] |
|
166 |
result = [] |
|
167 |
for tag, value in self.items: |
|
168 |
if value == '': |
|
169 |
result.append(tag + ': \n') |
|
170 |
elif '\n' in value: |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
171 |
# don't want splitlines behaviour on empty lines
|
172 |
val_lines = value.split('\n') |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
173 |
result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
174 |
for line in val_lines[1:]: |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
175 |
result.append('\t' + line.encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
176 |
else: |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
177 |
result.append(tag + ': ' + value.encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
178 |
return result |
179 |
||
180 |
def to_string(self): |
|
181 |
"""Return stanza as a single string"""
|
|
182 |
return ''.join(self.to_lines()) |
|
183 |
||
2030.1.1
by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents |
184 |
def to_unicode(self): |
185 |
"""Return stanza as a single Unicode string.
|
|
186 |
||
187 |
This is most useful when adding a Stanza to a parent Stanza
|
|
188 |
"""
|
|
189 |
if not self.items: |
|
190 |
return u'' |
|
191 |
||
192 |
result = [] |
|
193 |
for tag, value in self.items: |
|
194 |
if value == '': |
|
195 |
result.append(tag + ': \n') |
|
196 |
elif '\n' in value: |
|
197 |
# don't want splitlines behaviour on empty lines
|
|
198 |
val_lines = value.split('\n') |
|
199 |
result.append(tag + ': ' + val_lines[0] + '\n') |
|
200 |
for line in val_lines[1:]: |
|
201 |
result.append('\t' + line + '\n') |
|
202 |
else: |
|
203 |
result.append(tag + ': ' + value + '\n') |
|
204 |
return u''.join(result) |
|
205 |
||
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
206 |
def write(self, to_file): |
207 |
"""Write stanza to a file"""
|
|
208 |
to_file.writelines(self.to_lines()) |
|
209 |
||
210 |
def get(self, tag): |
|
211 |
"""Return the value for a field wih given tag.
|
|
212 |
||
213 |
If there is more than one value, only the first is returned. If the
|
|
214 |
tag is not present, KeyError is raised.
|
|
215 |
"""
|
|
216 |
for t, v in self.items: |
|
217 |
if t == tag: |
|
218 |
return v |
|
219 |
else: |
|
220 |
raise KeyError(tag) |
|
221 |
||
222 |
__getitem__ = get |
|
223 |
||
224 |
def get_all(self, tag): |
|
225 |
r = [] |
|
226 |
for t, v in self.items: |
|
227 |
if t == tag: |
|
228 |
r.append(v) |
|
229 |
return r |
|
1553.5.8
by Martin Pool
New Rio.as_dict method |
230 |
|
231 |
def as_dict(self): |
|
232 |
"""Return a dict containing the unique values of the stanza.
|
|
233 |
"""
|
|
234 |
d = {} |
|
235 |
for tag, value in self.items: |
|
236 |
d[tag] = value |
|
237 |
return d |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
238 |
|
239 |
_tag_re = re.compile(r'^[-a-zA-Z0-9_]+$') |
|
240 |
def valid_tag(tag): |
|
241 |
return bool(_tag_re.match(tag)) |
|
242 |
||
243 |
||
244 |
def read_stanza(line_iter): |
|
245 |
"""Return new Stanza read from list of lines or a file
|
|
246 |
|
|
247 |
Returns one Stanza that was read, or returns None at end of file. If a
|
|
248 |
blank line follows the stanza, it is consumed. It's not an error for
|
|
249 |
there to be no blank at end of file. If there is a blank file at the
|
|
250 |
start of the input this is really an empty stanza and that is returned.
|
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
251 |
|
252 |
Only the stanza lines and the trailing blank (if any) are consumed
|
|
253 |
from the line_iter.
|
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
254 |
|
255 |
The raw lines must be in utf-8 encoding.
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
256 |
"""
|
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
257 |
unicode_iter = (line.decode('utf-8') for line in line_iter) |
258 |
return read_stanza_unicode(unicode_iter) |
|
259 |
||
260 |
||
261 |
def read_stanza_unicode(unicode_iter): |
|
262 |
"""Read a Stanza from a list of lines or a file.
|
|
263 |
||
264 |
The lines should already be in unicode form. This returns a single
|
|
265 |
stanza that was read. If there is a blank line at the end of the Stanza,
|
|
266 |
it is consumed. It is not an error for there to be no blank line at
|
|
267 |
the end of the iterable. If there is a blank line at the beginning,
|
|
268 |
this is treated as an empty Stanza and None is returned.
|
|
269 |
||
270 |
Only the stanza lines and the trailing blank (if any) are consumed
|
|
271 |
from the unicode_iter
|
|
272 |
||
273 |
:param unicode_iter: A iterable, yeilding Unicode strings. See read_stanza
|
|
274 |
if you have a utf-8 encoded string.
|
|
275 |
:return: A Stanza object if there are any lines in the file.
|
|
276 |
None otherwise
|
|
277 |
"""
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
278 |
stanza = Stanza() |
1185.47.2
by Martin Pool
Finish rio format and tests. |
279 |
tag = None |
280 |
accum_value = None |
|
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
281 |
|
282 |
# TODO: jam 20060922 This code should raise real errors rather than
|
|
283 |
# using 'assert' to process user input, or raising ValueError
|
|
284 |
# rather than a more specific error.
|
|
285 |
||
286 |
for line in unicode_iter: |
|
1963.2.6
by Robey Pointer
pychecker is on crack; go back to using 'is None'. |
287 |
if line is None or line == '': |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
288 |
break # end of file |
289 |
if line == '\n': |
|
290 |
break # end of stanza |
|
291 |
real_l = line |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
292 |
if line[0] == '\t': # continues previous value |
293 |
if tag is None: |
|
294 |
raise ValueError('invalid continuation line %r' % real_l) |
|
295 |
accum_value += '\n' + line[1:-1] |
|
296 |
else: # new tag:value line |
|
297 |
if tag is not None: |
|
298 |
stanza.add(tag, accum_value) |
|
299 |
try: |
|
300 |
colon_index = line.index(': ') |
|
301 |
except ValueError: |
|
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
302 |
raise ValueError('tag/value separator not found in line %r' |
303 |
% real_l) |
|
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
304 |
tag = str(line[:colon_index]) |
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
305 |
if not valid_tag(tag): |
306 |
raise ValueError("invalid rio tag %r" % (tag,)) |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
307 |
accum_value = line[colon_index+2:-1] |
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
308 |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
309 |
if tag is not None: # add last tag-value |
310 |
stanza.add(tag, accum_value) |
|
311 |
return stanza |
|
312 |
else: # didn't see any content |
|
313 |
return None |
|
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
314 |
|
1551.12.38
by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions |
315 |
|
1551.12.10
by Aaron Bentley
Reduce max width to 72 |
316 |
def to_patch_lines(stanza, max_width=72): |
1551.12.38
by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions |
317 |
"""Convert a stanza into RIO-Patch format lines.
|
318 |
||
319 |
RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
|
|
320 |
It resists common forms of damage such as newline conversion or the removal
|
|
321 |
of trailing whitespace, yet is also reasonably easy to read.
|
|
322 |
||
323 |
:param max_width: The maximum number of characters per physical line.
|
|
324 |
:return: a list of lines
|
|
325 |
"""
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
326 |
if max_width <= 6: |
327 |
raise ValueError(max_width) |
|
1551.12.10
by Aaron Bentley
Reduce max width to 72 |
328 |
max_rio_width = max_width - 4 |
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
329 |
lines = [] |
330 |
for pline in stanza.to_lines(): |
|
331 |
for line in pline.split('\n')[:-1]: |
|
332 |
line = re.sub('\\\\', '\\\\\\\\', line) |
|
1551.12.9
by Aaron Bentley
force patch-rio to 76 characters |
333 |
while len(line) > 0: |
1551.12.10
by Aaron Bentley
Reduce max width to 72 |
334 |
partline = line[:max_rio_width] |
335 |
line = line[max_rio_width:] |
|
1551.12.21
by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places |
336 |
if len(line) > 0 and line[0] != [' ']: |
337 |
break_index = -1 |
|
338 |
break_index = partline.rfind(' ', -20) |
|
1551.12.23
by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability |
339 |
if break_index < 3: |
1551.12.21
by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places |
340 |
break_index = partline.rfind('-', -20) |
341 |
break_index += 1 |
|
1551.12.23
by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability |
342 |
if break_index < 3: |
1551.12.21
by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places |
343 |
break_index = partline.rfind('/', -20) |
1551.12.23
by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability |
344 |
if break_index >= 3: |
1551.12.21
by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places |
345 |
line = partline[break_index:] + line |
346 |
partline = partline[:break_index] |
|
1551.12.23
by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability |
347 |
if len(line) > 0: |
348 |
line = ' ' + line |
|
1551.12.11
by Aaron Bentley
Handle trailing whitepace cleanly |
349 |
partline = re.sub('\r', '\\\\r', partline) |
350 |
blank_line = False |
|
1551.12.9
by Aaron Bentley
force patch-rio to 76 characters |
351 |
if len(line) > 0: |
352 |
partline += '\\' |
|
1551.12.11
by Aaron Bentley
Handle trailing whitepace cleanly |
353 |
elif re.search(' $', partline): |
354 |
partline += '\\' |
|
355 |
blank_line = True |
|
356 |
lines.append('# ' + partline + '\n') |
|
357 |
if blank_line: |
|
1551.12.23
by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability |
358 |
lines.append('# \n') |
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
359 |
return lines |
360 |
||
1551.12.23
by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability |
361 |
|
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
362 |
def _patch_stanza_iter(line_iter): |
363 |
map = {'\\\\': '\\', |
|
1551.12.9
by Aaron Bentley
force patch-rio to 76 characters |
364 |
'\\r' : '\r', |
365 |
'\\\n': ''} |
|
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
366 |
def mapget(match): |
367 |
return map[match.group(0)] |
|
368 |
||
1551.12.9
by Aaron Bentley
force patch-rio to 76 characters |
369 |
last_line = None |
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
370 |
for line in line_iter: |
1551.12.22
by Aaron Bentley
Fix handling of whitespace-stripping without newline munging |
371 |
if line.startswith('# '): |
372 |
line = line[2:] |
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
373 |
elif line.startswith('#'): |
1551.12.22
by Aaron Bentley
Fix handling of whitespace-stripping without newline munging |
374 |
line = line[1:] |
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
375 |
else: |
376 |
raise ValueError("bad line %r" % (line,)) |
|
1551.12.23
by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability |
377 |
if last_line is not None and len(line) > 2: |
378 |
line = line[2:] |
|
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
379 |
line = re.sub('\r', '', line) |
1551.12.9
by Aaron Bentley
force patch-rio to 76 characters |
380 |
line = re.sub('\\\\(.|\n)', mapget, line) |
381 |
if last_line is None: |
|
382 |
last_line = line |
|
383 |
else: |
|
384 |
last_line += line |
|
385 |
if last_line[-1] == '\n': |
|
386 |
yield last_line |
|
387 |
last_line = None |
|
388 |
if last_line is not None: |
|
389 |
yield last_line |
|
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
390 |
|
1551.12.23
by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability |
391 |
|
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
392 |
def read_patch_stanza(line_iter): |
1551.12.38
by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions |
393 |
"""Convert an iterable of RIO-Patch format lines into a Stanza.
|
394 |
||
395 |
RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
|
|
396 |
It resists common forms of damage such as newline conversion or the removal
|
|
397 |
of trailing whitespace, yet is also reasonably easy to read.
|
|
398 |
||
399 |
:return: a Stanza
|
|
400 |
"""
|
|
1551.12.1
by Aaron Bentley
Basic RIO patch-compatible format is working |
401 |
return read_stanza(_patch_stanza_iter(line_iter)) |