1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
1 |
# Copyright (C) 2005 by Canonical Ltd
|
2 |
#
|
|
3 |
# Distributed under the GNU General Public Licence v2
|
|
4 |
#
|
|
5 |
# \subsection{\emph{rio} - simple text metaformat}
|
|
6 |
#
|
|
7 |
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
|
|
8 |
#
|
|
9 |
# The stored data consists of a series of \emph{stanzas}, each of which contains
|
|
10 |
# \emph{fields} identified by an ascii name, with Unicode or string contents.
|
|
11 |
# The field tag is constrained to alphanumeric characters.
|
|
12 |
# There may be more than one field in a stanza with the same name.
|
|
13 |
#
|
|
14 |
# The format itself does not deal with character encoding issues, though
|
|
15 |
# the result will normally be written in Unicode.
|
|
16 |
#
|
|
17 |
# The format is intended to be simple enough that there is exactly one character
|
|
18 |
# stream representation of an object and vice versa, and that this relation
|
|
19 |
# will continue to hold for future versions of bzr.
|
|
20 |
||
21 |
# In comments, $\min(1,10)$
|
|
22 |
||
23 |
min(1,10) |
|
24 |
||
25 |
import re |
|
26 |
||
27 |
# XXX: some redundancy is allowing to write stanzas in isolation as well as
|
|
28 |
# through a writer object.
|
|
29 |
||
30 |
class RioWriter(object): |
|
31 |
def __init__(self, to_file): |
|
32 |
self._soft_nl = False |
|
33 |
self._to_file = to_file |
|
34 |
||
35 |
def write_stanza(self, stanza): |
|
36 |
if self._soft_nl: |
|
37 |
print >>self._to_file |
|
38 |
stanza.write(self._to_file) |
|
39 |
self._soft_nl = True |
|
40 |
||
41 |
||
42 |
class RioReader(object): |
|
43 |
"""Read stanzas from a file as a sequence
|
|
44 |
|
|
45 |
to_file can be anything that can be enumerated as a sequence of
|
|
46 |
lines (with newlines.)
|
|
47 |
"""
|
|
48 |
def __init__(self, from_file): |
|
49 |
self._from_file = from_file |
|
50 |
||
51 |
def __iter__(self): |
|
52 |
while True: |
|
53 |
s = read_stanza(self._from_file) |
|
54 |
if s is None: |
|
55 |
break
|
|
56 |
else: |
|
57 |
yield s |
|
58 |
||
59 |
def read_stanzas(from_file): |
|
60 |
while True: |
|
61 |
s = read_stanza(from_file) |
|
62 |
if s is None: |
|
63 |
break
|
|
64 |
else: |
|
65 |
yield s |
|
66 |
||
67 |
class Stanza(object): |
|
68 |
"""One stanza for rio.
|
|
69 |
||
70 |
Each stanza contains a set of named fields.
|
|
71 |
|
|
72 |
Names must be non-empty ascii alphanumeric plus _. Names can be repeated
|
|
73 |
within a stanza. Names are case-sensitive. The ordering of fields is
|
|
74 |
preserved.
|
|
75 |
||
76 |
Each field value must be either an int or a string.
|
|
77 |
"""
|
|
78 |
||
79 |
__slots__ = ['items'] |
|
80 |
||
81 |
def __init__(self, **kwargs): |
|
82 |
"""Construct a new Stanza.
|
|
83 |
||
84 |
The keyword arguments, if any, are added in sorted order to the stanza.
|
|
85 |
"""
|
|
86 |
self.items = [] |
|
87 |
if kwargs: |
|
88 |
for tag, value in sorted(kwargs.items()): |
|
89 |
self.add(tag, value) |
|
90 |
||
91 |
def add(self, tag, value): |
|
92 |
"""Append a name and value to the stanza."""
|
|
93 |
assert valid_tag(tag), \ |
|
94 |
("invalid tag %r" % tag) |
|
95 |
if isinstance(value, (str, unicode)): |
|
96 |
pass
|
|
97 |
## elif isinstance(value, (int, long)):
|
|
98 |
## value = str(value) # XXX: python2.4 without L-suffix
|
|
99 |
else: |
|
100 |
raise ValueError("invalid value %r" % value) |
|
101 |
self.items.append((tag, value)) |
|
102 |
||
103 |
def __contains__(self, find_tag): |
|
104 |
"""True if there is any field in this stanza with the given tag."""
|
|
105 |
for tag, value in self.items: |
|
106 |
if tag == find_tag: |
|
107 |
return True |
|
108 |
return False |
|
109 |
||
110 |
def __len__(self): |
|
111 |
"""Return number of pairs in the stanza."""
|
|
112 |
return len(self.items) |
|
113 |
||
114 |
def __eq__(self, other): |
|
115 |
if not isinstance(other, Stanza): |
|
116 |
return False |
|
117 |
return self.items == other.items |
|
118 |
||
119 |
def __ne__(self, other): |
|
120 |
return not self.__eq__(other) |
|
121 |
||
122 |
def __repr__(self): |
|
123 |
return "Stanza(%r)" % self.items |
|
124 |
||
125 |
def iter_pairs(self): |
|
126 |
"""Return iterator of tag, value pairs."""
|
|
127 |
return iter(self.items) |
|
128 |
||
129 |
def to_lines(self): |
|
130 |
"""Generate sequence of lines for external version of this file."""
|
|
131 |
if not self.items: |
|
132 |
# max() complains if sequence is empty
|
|
133 |
return [] |
|
134 |
result = [] |
|
135 |
for tag, value in self.items: |
|
136 |
assert isinstance(value, (str, unicode)) |
|
137 |
if value == '': |
|
138 |
result.append(tag + ': \n') |
|
139 |
elif '\n' in value: |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
140 |
# don't want splitlines behaviour on empty lines
|
141 |
val_lines = value.split('\n') |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
142 |
result.append(tag + ': ' + val_lines[0] + '\n') |
143 |
for line in val_lines[1:]: |
|
144 |
result.append('\t' + line + '\n') |
|
145 |
else: |
|
146 |
result.append(tag + ': ' + value + '\n') |
|
147 |
return result |
|
148 |
||
149 |
def to_string(self): |
|
150 |
"""Return stanza as a single string"""
|
|
151 |
return ''.join(self.to_lines()) |
|
152 |
||
153 |
def write(self, to_file): |
|
154 |
"""Write stanza to a file"""
|
|
155 |
to_file.writelines(self.to_lines()) |
|
156 |
||
157 |
def get(self, tag): |
|
158 |
"""Return the value for a field wih given tag.
|
|
159 |
||
160 |
If there is more than one value, only the first is returned. If the
|
|
161 |
tag is not present, KeyError is raised.
|
|
162 |
"""
|
|
163 |
for t, v in self.items: |
|
164 |
if t == tag: |
|
165 |
return v |
|
166 |
else: |
|
167 |
raise KeyError(tag) |
|
168 |
||
169 |
__getitem__ = get |
|
170 |
||
171 |
def get_all(self, tag): |
|
172 |
r = [] |
|
173 |
for t, v in self.items: |
|
174 |
if t == tag: |
|
175 |
r.append(v) |
|
176 |
return r |
|
177 |
||
178 |
_tag_re = re.compile(r'^[-a-zA-Z0-9_]+$') |
|
179 |
def valid_tag(tag): |
|
180 |
return bool(_tag_re.match(tag)) |
|
181 |
||
182 |
||
183 |
def read_stanza(line_iter): |
|
184 |
"""Return new Stanza read from list of lines or a file
|
|
185 |
|
|
186 |
Returns one Stanza that was read, or returns None at end of file. If a
|
|
187 |
blank line follows the stanza, it is consumed. It's not an error for
|
|
188 |
there to be no blank at end of file. If there is a blank file at the
|
|
189 |
start of the input this is really an empty stanza and that is returned.
|
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
190 |
|
191 |
Only the stanza lines and the trailing blank (if any) are consumed
|
|
192 |
from the line_iter.
|
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
193 |
"""
|
194 |
items = [] |
|
195 |
stanza = Stanza() |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
196 |
tag = None |
197 |
accum_value = None |
|
1185.47.1
by Martin Pool
[broken] start converting basic_io to more rfc822-like format |
198 |
for line in line_iter: |
199 |
if line == None or line == '': |
|
200 |
break # end of file |
|
201 |
if line == '\n': |
|
202 |
break # end of stanza |
|
203 |
assert line[-1] == '\n' |
|
204 |
real_l = line |
|
1185.47.2
by Martin Pool
Finish rio format and tests. |
205 |
if line[0] == '\t': # continues previous value |
206 |
if tag is None: |
|
207 |
raise ValueError('invalid continuation line %r' % real_l) |
|
208 |
accum_value += '\n' + line[1:-1] |
|
209 |
else: # new tag:value line |
|
210 |
if tag is not None: |
|
211 |
stanza.add(tag, accum_value) |
|
212 |
try: |
|
213 |
colon_index = line.index(': ') |
|
214 |
except ValueError: |
|
215 |
raise ValueError('tag/value separator not found in line %r' % real_l) |
|
216 |
tag = line[:colon_index] |
|
217 |
assert valid_tag(tag), \ |
|
218 |
"invalid rio tag %r" % tag |
|
219 |
accum_value = line[colon_index+2:-1] |
|
220 |
if tag is not None: # add last tag-value |
|
221 |
stanza.add(tag, accum_value) |
|
222 |
return stanza |
|
223 |
else: # didn't see any content |
|
224 |
return None |