2052.3.2
by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical |
1 |
# Copyright (C) 2005 Canonical Ltd
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
2 |
#
|
2052.3.1
by John Arbash Meinel
Add tests to cleanup the copyright of all source files |
3 |
# This program is free software; you can redistribute it and/or modify
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
1553.5.6
by Martin Pool
Clean up comments |
16 |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
17 |
# \subsection{\emph{rio} - simple text metaformat}
|
18 |
#
|
|
19 |
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
|
|
20 |
#
|
|
21 |
# The stored data consists of a series of \emph{stanzas}, each of which contains
|
|
22 |
# \emph{fields} identified by an ascii name, with Unicode or string contents.
|
|
23 |
# The field tag is constrained to alphanumeric characters.
|
|
24 |
# There may be more than one field in a stanza with the same name.
|
|
25 |
#
|
|
26 |
# The format itself does not deal with character encoding issues, though
|
|
27 |
# the result will normally be written in Unicode.
|
|
28 |
#
|
|
29 |
# The format is intended to be simple enough that there is exactly one character
|
|
30 |
# stream representation of an object and vice versa, and that this relation
|
|
31 |
# will continue to hold for future versions of bzr.
|
|
32 |
||
33 |
import re |
|
34 |
||
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
35 |
from bzrlib.iterablefile import IterableFile |
36 |
||
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
37 |
# XXX: some redundancy is allowing to write stanzas in isolation as well as
|
38 |
# through a writer object.
|
|
39 |
||
40 |
class RioWriter(object): |
|
41 |
def __init__(self, to_file): |
|
42 |
self._soft_nl = False |
|
43 |
self._to_file = to_file |
|
44 |
||
45 |
def write_stanza(self, stanza): |
|
46 |
if self._soft_nl: |
|
47 |
print >>self._to_file |
|
48 |
stanza.write(self._to_file) |
|
49 |
self._soft_nl = True |
|
50 |
||
51 |
||
52 |
class RioReader(object): |
|
53 |
"""Read stanzas from a file as a sequence
|
|
54 |
|
|
55 |
to_file can be anything that can be enumerated as a sequence of
|
|
56 |
lines (with newlines.)
|
|
57 |
"""
|
|
58 |
def __init__(self, from_file): |
|
59 |
self._from_file = from_file |
|
60 |
||
61 |
def __iter__(self): |
|
62 |
while True: |
|
63 |
s = read_stanza(self._from_file) |
|
64 |
if s is None: |
|
65 |
break
|
|
66 |
else: |
|
67 |
yield s |
|
68 |
||
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
69 |
|
70 |
def rio_file(stanzas, header=None): |
|
71 |
"""Produce a rio IterableFile from an iterable of stanzas"""
|
|
72 |
def str_iter(): |
|
73 |
if header is not None: |
|
74 |
yield header + '\n' |
|
75 |
first_stanza = True |
|
76 |
for s in stanzas: |
|
77 |
if first_stanza is not True: |
|
78 |
yield '\n' |
|
79 |
for line in s.to_lines(): |
|
80 |
yield line |
|
81 |
first_stanza = False |
|
82 |
return IterableFile(str_iter()) |
|
83 |
||
84 |
||
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
85 |
def read_stanzas(from_file): |
86 |
while True: |
|
87 |
s = read_stanza(from_file) |
|
88 |
if s is None: |
|
89 |
break
|
|
90 |
else: |
|
91 |
yield s |
|
92 |
||
93 |
class Stanza(object): |
|
94 |
"""One stanza for rio.
|
|
95 |
||
96 |
Each stanza contains a set of named fields.
|
|
97 |
|
|
98 |
Names must be non-empty ascii alphanumeric plus _. Names can be repeated
|
|
99 |
within a stanza. Names are case-sensitive. The ordering of fields is
|
|
100 |
preserved.
|
|
101 |
||
102 |
Each field value must be either an int or a string.
|
|
103 |
"""
|
|
104 |
||
105 |
__slots__ = ['items'] |
|
106 |
||
107 |
def __init__(self, **kwargs): |
|
108 |
"""Construct a new Stanza.
|
|
109 |
||
110 |
The keyword arguments, if any, are added in sorted order to the stanza.
|
|
111 |
"""
|
|
112 |
self.items = [] |
|
113 |
if kwargs: |
|
114 |
for tag, value in sorted(kwargs.items()): |
|
115 |
self.add(tag, value) |
|
116 |
||
117 |
def add(self, tag, value): |
|
118 |
"""Append a name and value to the stanza."""
|
|
119 |
assert valid_tag(tag), \ |
|
120 |
("invalid tag %r" % tag) |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
121 |
if isinstance(value, str): |
122 |
value = unicode(value) |
|
123 |
elif isinstance(value, unicode): |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
124 |
pass
|
125 |
## elif isinstance(value, (int, long)):
|
|
126 |
## value = str(value) # XXX: python2.4 without L-suffix
|
|
127 |
else: |
|
1553.5.7
by Martin Pool
rio.Stanza.add should raise TypeError on invalid types. |
128 |
raise TypeError("invalid type for rio value: %r of type %s" |
129 |
% (value, type(value))) |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
130 |
self.items.append((tag, value)) |
131 |
||
132 |
def __contains__(self, find_tag): |
|
133 |
"""True if there is any field in this stanza with the given tag."""
|
|
134 |
for tag, value in self.items: |
|
135 |
if tag == find_tag: |
|
136 |
return True |
|
137 |
return False |
|
138 |
||
139 |
def __len__(self): |
|
140 |
"""Return number of pairs in the stanza."""
|
|
141 |
return len(self.items) |
|
142 |
||
143 |
def __eq__(self, other): |
|
144 |
if not isinstance(other, Stanza): |
|
145 |
return False |
|
146 |
return self.items == other.items |
|
147 |
||
148 |
def __ne__(self, other): |
|
149 |
return not self.__eq__(other) |
|
150 |
||
151 |
def __repr__(self): |
|
152 |
return "Stanza(%r)" % self.items |
|
153 |
||
154 |
def iter_pairs(self): |
|
155 |
"""Return iterator of tag, value pairs."""
|
|
156 |
return iter(self.items) |
|
157 |
||
158 |
def to_lines(self): |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
159 |
"""Generate sequence of lines for external version of this file.
|
160 |
|
|
161 |
The lines are always utf-8 encoded strings.
|
|
162 |
"""
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
163 |
if not self.items: |
164 |
# max() complains if sequence is empty
|
|
165 |
return [] |
|
166 |
result = [] |
|
167 |
for tag, value in self.items: |
|
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
168 |
assert isinstance(tag, str), type(tag) |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
169 |
assert isinstance(value, unicode) |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
170 |
if value == '': |
171 |
result.append(tag + ': \n') |
|
172 |
elif '\n' in value: |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
173 |
# don't want splitlines behaviour on empty lines
|
174 |
val_lines = value.split('\n') |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
175 |
result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
176 |
for line in val_lines[1:]: |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
177 |
result.append('\t' + line.encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
178 |
else: |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
179 |
result.append(tag + ': ' + value.encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
180 |
return result |
181 |
||
182 |
def to_string(self): |
|
183 |
"""Return stanza as a single string"""
|
|
184 |
return ''.join(self.to_lines()) |
|
185 |
||
2030.1.1
by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents |
186 |
def to_unicode(self): |
187 |
"""Return stanza as a single Unicode string.
|
|
188 |
||
189 |
This is most useful when adding a Stanza to a parent Stanza
|
|
190 |
"""
|
|
191 |
if not self.items: |
|
192 |
return u'' |
|
193 |
||
194 |
result = [] |
|
195 |
for tag, value in self.items: |
|
196 |
if value == '': |
|
197 |
result.append(tag + ': \n') |
|
198 |
elif '\n' in value: |
|
199 |
# don't want splitlines behaviour on empty lines
|
|
200 |
val_lines = value.split('\n') |
|
201 |
result.append(tag + ': ' + val_lines[0] + '\n') |
|
202 |
for line in val_lines[1:]: |
|
203 |
result.append('\t' + line + '\n') |
|
204 |
else: |
|
205 |
result.append(tag + ': ' + value + '\n') |
|
206 |
return u''.join(result) |
|
207 |
||
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
208 |
def write(self, to_file): |
209 |
"""Write stanza to a file"""
|
|
210 |
to_file.writelines(self.to_lines()) |
|
211 |
||
212 |
def get(self, tag): |
|
213 |
"""Return the value for a field wih given tag.
|
|
214 |
||
215 |
If there is more than one value, only the first is returned. If the
|
|
216 |
tag is not present, KeyError is raised.
|
|
217 |
"""
|
|
218 |
for t, v in self.items: |
|
219 |
if t == tag: |
|
220 |
return v |
|
221 |
else: |
|
222 |
raise KeyError(tag) |
|
223 |
||
224 |
__getitem__ = get |
|
225 |
||
226 |
def get_all(self, tag): |
|
227 |
r = [] |
|
228 |
for t, v in self.items: |
|
229 |
if t == tag: |
|
230 |
r.append(v) |
|
231 |
return r |
|
1553.5.8
by Martin Pool
New Rio.as_dict method |
232 |
|
233 |
def as_dict(self): |
|
234 |
"""Return a dict containing the unique values of the stanza.
|
|
235 |
"""
|
|
236 |
d = {} |
|
237 |
for tag, value in self.items: |
|
238 |
assert tag not in d |
|
239 |
d[tag] = value |
|
240 |
return d |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
241 |
|
242 |
_tag_re = re.compile(r'^[-a-zA-Z0-9_]+$') |
|
243 |
def valid_tag(tag): |
|
244 |
return bool(_tag_re.match(tag)) |
|
245 |
||
246 |
||
247 |
def read_stanza(line_iter): |
|
248 |
"""Return new Stanza read from list of lines or a file
|
|
249 |
|
|
250 |
Returns one Stanza that was read, or returns None at end of file. If a
|
|
251 |
blank line follows the stanza, it is consumed. It's not an error for
|
|
252 |
there to be no blank at end of file. If there is a blank file at the
|
|
253 |
start of the input this is really an empty stanza and that is returned.
|
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
254 |
|
255 |
Only the stanza lines and the trailing blank (if any) are consumed
|
|
256 |
from the line_iter.
|
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
257 |
|
258 |
The raw lines must be in utf-8 encoding.
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
259 |
"""
|
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
260 |
unicode_iter = (line.decode('utf-8') for line in line_iter) |
261 |
return read_stanza_unicode(unicode_iter) |
|
262 |
||
263 |
||
264 |
def read_stanza_unicode(unicode_iter): |
|
265 |
"""Read a Stanza from a list of lines or a file.
|
|
266 |
||
267 |
The lines should already be in unicode form. This returns a single
|
|
268 |
stanza that was read. If there is a blank line at the end of the Stanza,
|
|
269 |
it is consumed. It is not an error for there to be no blank line at
|
|
270 |
the end of the iterable. If there is a blank line at the beginning,
|
|
271 |
this is treated as an empty Stanza and None is returned.
|
|
272 |
||
273 |
Only the stanza lines and the trailing blank (if any) are consumed
|
|
274 |
from the unicode_iter
|
|
275 |
||
276 |
:param unicode_iter: A iterable, yeilding Unicode strings. See read_stanza
|
|
277 |
if you have a utf-8 encoded string.
|
|
278 |
:return: A Stanza object if there are any lines in the file.
|
|
279 |
None otherwise
|
|
280 |
"""
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
281 |
stanza = Stanza() |
1185.47.2
by Martin Pool
Finish rio format and tests. |
282 |
tag = None |
283 |
accum_value = None |
|
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
284 |
|
285 |
# TODO: jam 20060922 This code should raise real errors rather than
|
|
286 |
# using 'assert' to process user input, or raising ValueError
|
|
287 |
# rather than a more specific error.
|
|
288 |
||
289 |
for line in unicode_iter: |
|
1963.2.6
by Robey Pointer
pychecker is on crack; go back to using 'is None'. |
290 |
if line is None or line == '': |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
291 |
break # end of file |
292 |
if line == '\n': |
|
293 |
break # end of stanza |
|
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
294 |
assert line.endswith('\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
295 |
real_l = line |
1185.47.2
by Martin Pool
Finish rio format and tests. |
296 |
if line[0] == '\t': # continues previous value |
297 |
if tag is None: |
|
298 |
raise ValueError('invalid continuation line %r' % real_l) |
|
299 |
accum_value += '\n' + line[1:-1] |
|
300 |
else: # new tag:value line |
|
301 |
if tag is not None: |
|
302 |
stanza.add(tag, accum_value) |
|
303 |
try: |
|
304 |
colon_index = line.index(': ') |
|
305 |
except ValueError: |
|
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
306 |
raise ValueError('tag/value separator not found in line %r' |
307 |
% real_l) |
|
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
308 |
tag = str(line[:colon_index]) |
1185.47.2
by Martin Pool
Finish rio format and tests. |
309 |
assert valid_tag(tag), \ |
310 |
"invalid rio tag %r" % tag |
|
311 |
accum_value = line[colon_index+2:-1] |
|
2030.1.5
by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing |
312 |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
313 |
if tag is not None: # add last tag-value |
314 |
stanza.add(tag, accum_value) |
|
315 |
return stanza |
|
316 |
else: # didn't see any content |
|
317 |
return None |