1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
1 |
# Copyright (C) 2005 by Canonical Ltd
|
2 |
#
|
|
3 |
# Distributed under the GNU General Public Licence v2
|
|
1553.5.6
by Martin Pool
Clean up comments |
4 |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
5 |
# \subsection{\emph{rio} - simple text metaformat}
|
6 |
#
|
|
7 |
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
|
|
8 |
#
|
|
9 |
# The stored data consists of a series of \emph{stanzas}, each of which contains
|
|
10 |
# \emph{fields} identified by an ascii name, with Unicode or string contents.
|
|
11 |
# The field tag is constrained to alphanumeric characters.
|
|
12 |
# There may be more than one field in a stanza with the same name.
|
|
13 |
#
|
|
14 |
# The format itself does not deal with character encoding issues, though
|
|
15 |
# the result will normally be written in Unicode.
|
|
16 |
#
|
|
17 |
# The format is intended to be simple enough that there is exactly one character
|
|
18 |
# stream representation of an object and vice versa, and that this relation
|
|
19 |
# will continue to hold for future versions of bzr.
|
|
20 |
||
21 |
import re |
|
22 |
||
23 |
# XXX: some redundancy is allowing to write stanzas in isolation as well as
|
|
24 |
# through a writer object.
|
|
25 |
||
26 |
class RioWriter(object): |
|
27 |
def __init__(self, to_file): |
|
28 |
self._soft_nl = False |
|
29 |
self._to_file = to_file |
|
30 |
||
31 |
def write_stanza(self, stanza): |
|
32 |
if self._soft_nl: |
|
33 |
print >>self._to_file |
|
34 |
stanza.write(self._to_file) |
|
35 |
self._soft_nl = True |
|
36 |
||
37 |
||
38 |
class RioReader(object): |
|
39 |
"""Read stanzas from a file as a sequence
|
|
40 |
|
|
41 |
to_file can be anything that can be enumerated as a sequence of
|
|
42 |
lines (with newlines.)
|
|
43 |
"""
|
|
44 |
def __init__(self, from_file): |
|
45 |
self._from_file = from_file |
|
46 |
||
47 |
def __iter__(self): |
|
48 |
while True: |
|
49 |
s = read_stanza(self._from_file) |
|
50 |
if s is None: |
|
51 |
break
|
|
52 |
else: |
|
53 |
yield s |
|
54 |
||
55 |
def read_stanzas(from_file): |
|
56 |
while True: |
|
57 |
s = read_stanza(from_file) |
|
58 |
if s is None: |
|
59 |
break
|
|
60 |
else: |
|
61 |
yield s |
|
62 |
||
63 |
class Stanza(object): |
|
64 |
"""One stanza for rio.
|
|
65 |
||
66 |
Each stanza contains a set of named fields.
|
|
67 |
|
|
68 |
Names must be non-empty ascii alphanumeric plus _. Names can be repeated
|
|
69 |
within a stanza. Names are case-sensitive. The ordering of fields is
|
|
70 |
preserved.
|
|
71 |
||
72 |
Each field value must be either an int or a string.
|
|
73 |
"""
|
|
74 |
||
75 |
__slots__ = ['items'] |
|
76 |
||
77 |
def __init__(self, **kwargs): |
|
78 |
"""Construct a new Stanza.
|
|
79 |
||
80 |
The keyword arguments, if any, are added in sorted order to the stanza.
|
|
81 |
"""
|
|
82 |
self.items = [] |
|
83 |
if kwargs: |
|
84 |
for tag, value in sorted(kwargs.items()): |
|
85 |
self.add(tag, value) |
|
86 |
||
87 |
def add(self, tag, value): |
|
88 |
"""Append a name and value to the stanza."""
|
|
89 |
assert valid_tag(tag), \ |
|
90 |
("invalid tag %r" % tag) |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
91 |
if isinstance(value, str): |
92 |
value = unicode(value) |
|
93 |
elif isinstance(value, unicode): |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
94 |
pass
|
95 |
## elif isinstance(value, (int, long)):
|
|
96 |
## value = str(value) # XXX: python2.4 without L-suffix
|
|
97 |
else: |
|
1553.5.7
by Martin Pool
rio.Stanza.add should raise TypeError on invalid types. |
98 |
raise TypeError("invalid type for rio value: %r of type %s" |
99 |
% (value, type(value))) |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
100 |
self.items.append((tag, value)) |
101 |
||
102 |
def __contains__(self, find_tag): |
|
103 |
"""True if there is any field in this stanza with the given tag."""
|
|
104 |
for tag, value in self.items: |
|
105 |
if tag == find_tag: |
|
106 |
return True |
|
107 |
return False |
|
108 |
||
109 |
def __len__(self): |
|
110 |
"""Return number of pairs in the stanza."""
|
|
111 |
return len(self.items) |
|
112 |
||
113 |
def __eq__(self, other): |
|
114 |
if not isinstance(other, Stanza): |
|
115 |
return False |
|
116 |
return self.items == other.items |
|
117 |
||
118 |
def __ne__(self, other): |
|
119 |
return not self.__eq__(other) |
|
120 |
||
121 |
def __repr__(self): |
|
122 |
return "Stanza(%r)" % self.items |
|
123 |
||
124 |
def iter_pairs(self): |
|
125 |
"""Return iterator of tag, value pairs."""
|
|
126 |
return iter(self.items) |
|
127 |
||
128 |
def to_lines(self): |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
129 |
"""Generate sequence of lines for external version of this file.
|
130 |
|
|
131 |
The lines are always utf-8 encoded strings.
|
|
132 |
"""
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
133 |
if not self.items: |
134 |
# max() complains if sequence is empty
|
|
135 |
return [] |
|
136 |
result = [] |
|
137 |
for tag, value in self.items: |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
138 |
assert isinstance(tag, str) |
139 |
assert isinstance(value, unicode) |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
140 |
if value == '': |
141 |
result.append(tag + ': \n') |
|
142 |
elif '\n' in value: |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
143 |
# don't want splitlines behaviour on empty lines
|
144 |
val_lines = value.split('\n') |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
145 |
result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
146 |
for line in val_lines[1:]: |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
147 |
result.append('\t' + line.encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
148 |
else: |
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
149 |
result.append(tag + ': ' + value.encode('utf-8') + '\n') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
150 |
return result |
151 |
||
152 |
def to_string(self): |
|
153 |
"""Return stanza as a single string"""
|
|
154 |
return ''.join(self.to_lines()) |
|
155 |
||
156 |
def write(self, to_file): |
|
157 |
"""Write stanza to a file"""
|
|
158 |
to_file.writelines(self.to_lines()) |
|
159 |
||
160 |
def get(self, tag): |
|
161 |
"""Return the value for a field wih given tag.
|
|
162 |
||
163 |
If there is more than one value, only the first is returned. If the
|
|
164 |
tag is not present, KeyError is raised.
|
|
165 |
"""
|
|
166 |
for t, v in self.items: |
|
167 |
if t == tag: |
|
168 |
return v |
|
169 |
else: |
|
170 |
raise KeyError(tag) |
|
171 |
||
172 |
__getitem__ = get |
|
173 |
||
174 |
def get_all(self, tag): |
|
175 |
r = [] |
|
176 |
for t, v in self.items: |
|
177 |
if t == tag: |
|
178 |
r.append(v) |
|
179 |
return r |
|
1553.5.8
by Martin Pool
New Rio.as_dict method |
180 |
|
181 |
def as_dict(self): |
|
182 |
"""Return a dict containing the unique values of the stanza.
|
|
183 |
"""
|
|
184 |
d = {} |
|
185 |
for tag, value in self.items: |
|
186 |
assert tag not in d |
|
187 |
d[tag] = value |
|
188 |
return d |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
189 |
|
190 |
_tag_re = re.compile(r'^[-a-zA-Z0-9_]+$') |
|
191 |
def valid_tag(tag): |
|
192 |
return bool(_tag_re.match(tag)) |
|
193 |
||
194 |
||
195 |
def read_stanza(line_iter): |
|
196 |
"""Return new Stanza read from list of lines or a file
|
|
197 |
|
|
198 |
Returns one Stanza that was read, or returns None at end of file. If a
|
|
199 |
blank line follows the stanza, it is consumed. It's not an error for
|
|
200 |
there to be no blank at end of file. If there is a blank file at the
|
|
201 |
start of the input this is really an empty stanza and that is returned.
|
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
202 |
|
203 |
Only the stanza lines and the trailing blank (if any) are consumed
|
|
204 |
from the line_iter.
|
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
205 |
|
206 |
The raw lines must be in utf-8 encoding.
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
207 |
"""
|
208 |
items = [] |
|
209 |
stanza = Stanza() |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
210 |
tag = None |
211 |
accum_value = None |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
212 |
for line in line_iter: |
213 |
if line == None or line == '': |
|
214 |
break # end of file |
|
215 |
if line == '\n': |
|
216 |
break # end of stanza |
|
1553.5.32
by Martin Pool
rio files are always externalized in utf-8. test this. |
217 |
line = line.decode('utf-8') |
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
218 |
assert line[-1] == '\n' |
219 |
real_l = line |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
220 |
if line[0] == '\t': # continues previous value |
221 |
if tag is None: |
|
222 |
raise ValueError('invalid continuation line %r' % real_l) |
|
223 |
accum_value += '\n' + line[1:-1] |
|
224 |
else: # new tag:value line |
|
225 |
if tag is not None: |
|
226 |
stanza.add(tag, accum_value) |
|
227 |
try: |
|
228 |
colon_index = line.index(': ') |
|
229 |
except ValueError: |
|
230 |
raise ValueError('tag/value separator not found in line %r' % real_l) |
|
231 |
tag = line[:colon_index] |
|
232 |
assert valid_tag(tag), \ |
|
233 |
"invalid rio tag %r" % tag |
|
234 |
accum_value = line[colon_index+2:-1] |
|
235 |
if tag is not None: # add last tag-value |
|
236 |
stanza.add(tag, accum_value) |
|
237 |
return stanza |
|
238 |
else: # didn't see any content |
|
239 |
return None |