1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
1 |
# Copyright (C) 2005 by Canonical Ltd
|
2 |
#
|
|
3 |
# Distributed under the GNU General Public Licence v2
|
|
1553.5.6
by Martin Pool
Clean up comments |
4 |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
5 |
# \subsection{\emph{rio} - simple text metaformat}
|
6 |
#
|
|
7 |
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
|
|
8 |
#
|
|
9 |
# The stored data consists of a series of \emph{stanzas}, each of which contains
|
|
10 |
# \emph{fields} identified by an ascii name, with Unicode or string contents.
|
|
11 |
# The field tag is constrained to alphanumeric characters.
|
|
12 |
# There may be more than one field in a stanza with the same name.
|
|
13 |
#
|
|
14 |
# The format itself does not deal with character encoding issues, though
|
|
15 |
# the result will normally be written in Unicode.
|
|
16 |
#
|
|
17 |
# The format is intended to be simple enough that there is exactly one character
|
|
18 |
# stream representation of an object and vice versa, and that this relation
|
|
19 |
# will continue to hold for future versions of bzr.
|
|
20 |
||
21 |
import re |
|
22 |
||
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
23 |
from bzrlib.iterablefile import IterableFile |
24 |
||
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
25 |
# XXX: some redundancy is allowing to write stanzas in isolation as well as
|
26 |
# through a writer object.
|
|
27 |
||
28 |
class RioWriter(object): |
|
29 |
def __init__(self, to_file): |
|
30 |
self._soft_nl = False |
|
31 |
self._to_file = to_file |
|
32 |
||
33 |
def write_stanza(self, stanza): |
|
34 |
if self._soft_nl: |
|
35 |
print >>self._to_file |
|
36 |
stanza.write(self._to_file) |
|
37 |
self._soft_nl = True |
|
38 |
||
39 |
||
40 |
class RioReader(object): |
|
41 |
"""Read stanzas from a file as a sequence
|
|
42 |
|
|
43 |
to_file can be anything that can be enumerated as a sequence of
|
|
44 |
lines (with newlines.)
|
|
45 |
"""
|
|
46 |
def __init__(self, from_file): |
|
47 |
self._from_file = from_file |
|
48 |
||
49 |
def __iter__(self): |
|
50 |
while True: |
|
51 |
s = read_stanza(self._from_file) |
|
52 |
if s is None: |
|
53 |
break
|
|
54 |
else: |
|
55 |
yield s |
|
56 |
||
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
57 |
|
58 |
def rio_file(stanzas, header=None): |
|
59 |
"""Produce a rio IterableFile from an iterable of stanzas"""
|
|
60 |
def str_iter(): |
|
61 |
if header is not None: |
|
62 |
yield header + '\n' |
|
63 |
first_stanza = True |
|
64 |
for s in stanzas: |
|
65 |
if first_stanza is not True: |
|
66 |
yield '\n' |
|
67 |
for line in s.to_lines(): |
|
68 |
yield line |
|
69 |
first_stanza = False |
|
70 |
return IterableFile(str_iter()) |
|
71 |
||
72 |
||
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
73 |
def read_stanzas(from_file): |
74 |
while True: |
|
75 |
s = read_stanza(from_file) |
|
76 |
if s is None: |
|
77 |
break
|
|
78 |
else: |
|
79 |
yield s |
|
80 |
||
81 |
class Stanza(object): |
|
82 |
"""One stanza for rio.
|
|
83 |
||
84 |
Each stanza contains a set of named fields.
|
|
85 |
|
|
86 |
Names must be non-empty ascii alphanumeric plus _. Names can be repeated
|
|
87 |
within a stanza. Names are case-sensitive. The ordering of fields is
|
|
88 |
preserved.
|
|
89 |
||
90 |
Each field value must be either an int or a string.
|
|
91 |
"""
|
|
92 |
||
93 |
__slots__ = ['items'] |
|
94 |
||
95 |
def __init__(self, **kwargs): |
|
96 |
"""Construct a new Stanza.
|
|
97 |
||
98 |
The keyword arguments, if any, are added in sorted order to the stanza.
|
|
99 |
"""
|
|
100 |
self.items = [] |
|
101 |
if kwargs: |
|
102 |
for tag, value in sorted(kwargs.items()): |
|
103 |
self.add(tag, value) |
|
104 |
||
105 |
def add(self, tag, value): |
|
106 |
"""Append a name and value to the stanza."""
|
|
107 |
assert valid_tag(tag), \ |
|
108 |
("invalid tag %r" % tag) |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
109 |
if isinstance(value, str): |
110 |
value = unicode(value) |
|
111 |
elif isinstance(value, unicode): |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
112 |
pass
|
113 |
## elif isinstance(value, (int, long)):
|
|
114 |
## value = str(value) # XXX: python2.4 without L-suffix
|
|
115 |
else: |
|
1553.5.7
by Martin Pool
rio.Stanza.add should raise TypeError on invalid types. |
116 |
raise TypeError("invalid type for rio value: %r of type %s" |
117 |
% (value, type(value))) |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
118 |
self.items.append((tag, value)) |
119 |
||
120 |
def __contains__(self, find_tag): |
|
121 |
"""True if there is any field in this stanza with the given tag."""
|
|
122 |
for tag, value in self.items: |
|
123 |
if tag == find_tag: |
|
124 |
return True |
|
125 |
return False |
|
126 |
||
127 |
def __len__(self): |
|
128 |
"""Return number of pairs in the stanza."""
|
|
129 |
return len(self.items) |
|
130 |
||
131 |
def __eq__(self, other): |
|
132 |
if not isinstance(other, Stanza): |
|
133 |
return False |
|
134 |
return self.items == other.items |
|
135 |
||
136 |
def __ne__(self, other): |
|
137 |
return not self.__eq__(other) |
|
138 |
||
139 |
def __repr__(self): |
|
140 |
return "Stanza(%r)" % self.items |
|
141 |
||
142 |
def iter_pairs(self): |
|
143 |
"""Return iterator of tag, value pairs."""
|
|
144 |
return iter(self.items) |
|
145 |
||
146 |
def to_lines(self): |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
147 |
"""Generate sequence of lines for external version of this file.
|
148 |
|
|
149 |
The lines are always utf-8 encoded strings.
|
|
150 |
"""
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
151 |
if not self.items: |
152 |
# max() complains if sequence is empty
|
|
153 |
return [] |
|
154 |
result = [] |
|
155 |
for tag, value in self.items: |
|
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
156 |
assert isinstance(tag, str), type(tag) |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
157 |
assert isinstance(value, unicode) |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
158 |
if value == '': |
159 |
result.append(tag + ': \n') |
|
160 |
elif '\n' in value: |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
161 |
# don't want splitlines behaviour on empty lines
|
162 |
val_lines = value.split('\n') |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
163 |
result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
164 |
for line in val_lines[1:]: |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
165 |
result.append('\t' + line.encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
166 |
else: |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
167 |
result.append(tag + ': ' + value.encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
168 |
return result |
169 |
||
170 |
def to_string(self): |
|
171 |
"""Return stanza as a single string"""
|
|
172 |
return ''.join(self.to_lines()) |
|
173 |
||
174 |
def write(self, to_file): |
|
175 |
"""Write stanza to a file"""
|
|
176 |
to_file.writelines(self.to_lines()) |
|
177 |
||
178 |
def get(self, tag): |
|
179 |
"""Return the value for a field wih given tag.
|
|
180 |
||
181 |
If there is more than one value, only the first is returned. If the
|
|
182 |
tag is not present, KeyError is raised.
|
|
183 |
"""
|
|
184 |
for t, v in self.items: |
|
185 |
if t == tag: |
|
186 |
return v |
|
187 |
else: |
|
188 |
raise KeyError(tag) |
|
189 |
||
190 |
__getitem__ = get |
|
191 |
||
192 |
def get_all(self, tag): |
|
193 |
r = [] |
|
194 |
for t, v in self.items: |
|
195 |
if t == tag: |
|
196 |
r.append(v) |
|
197 |
return r |
|
1553.5.8
by Martin Pool
New Rio.as_dict method |
198 |
|
199 |
def as_dict(self): |
|
200 |
"""Return a dict containing the unique values of the stanza.
|
|
201 |
"""
|
|
202 |
d = {} |
|
203 |
for tag, value in self.items: |
|
204 |
assert tag not in d |
|
205 |
d[tag] = value |
|
206 |
return d |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
207 |
|
208 |
_tag_re = re.compile(r'^[-a-zA-Z0-9_]+$') |
|
209 |
def valid_tag(tag): |
|
210 |
return bool(_tag_re.match(tag)) |
|
211 |
||
212 |
||
213 |
def read_stanza(line_iter): |
|
214 |
"""Return new Stanza read from list of lines or a file
|
|
215 |
|
|
216 |
Returns one Stanza that was read, or returns None at end of file. If a
|
|
217 |
blank line follows the stanza, it is consumed. It's not an error for
|
|
218 |
there to be no blank at end of file. If there is a blank file at the
|
|
219 |
start of the input this is really an empty stanza and that is returned.
|
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
220 |
|
221 |
Only the stanza lines and the trailing blank (if any) are consumed
|
|
222 |
from the line_iter.
|
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
223 |
|
224 |
The raw lines must be in utf-8 encoding.
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
225 |
"""
|
226 |
items = [] |
|
227 |
stanza = Stanza() |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
228 |
tag = None |
229 |
accum_value = None |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
230 |
for line in line_iter: |
231 |
if line == None or line == '': |
|
232 |
break # end of file |
|
233 |
if line == '\n': |
|
234 |
break # end of stanza |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
235 |
line = line.decode('utf-8') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
236 |
assert line[-1] == '\n' |
237 |
real_l = line |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
238 |
if line[0] == '\t': # continues previous value |
239 |
if tag is None: |
|
240 |
raise ValueError('invalid continuation line %r' % real_l) |
|
241 |
accum_value += '\n' + line[1:-1] |
|
242 |
else: # new tag:value line |
|
243 |
if tag is not None: |
|
244 |
stanza.add(tag, accum_value) |
|
245 |
try: |
|
246 |
colon_index = line.index(': ') |
|
247 |
except ValueError: |
|
248 |
raise ValueError('tag/value separator not found in line %r' % real_l) |
|
1534.10.2
by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas |
249 |
tag = str(line[:colon_index]) |
1185.47.2
by Martin Pool
Finish rio format and tests. |
250 |
assert valid_tag(tag), \ |
251 |
"invalid rio tag %r" % tag |
|
252 |
accum_value = line[colon_index+2:-1] |
|
253 |
if tag is not None: # add last tag-value |
|
254 |
stanza.add(tag, accum_value) |
|
255 |
return stanza |
|
256 |
else: # didn't see any content |
|
257 |
return None |