~bzr-pqm/bzr/bzr.dev

2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2005 Canonical Ltd
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
2
#
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1553.5.6 by Martin Pool
Clean up comments
16
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
17
# \subsection{\emph{rio} - simple text metaformat}
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
18
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
19
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
20
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
21
# The stored data consists of a series of \emph{stanzas}, each of which contains
22
# \emph{fields} identified by an ascii name, with Unicode or string contents.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
23
# The field tag is constrained to alphanumeric characters.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
24
# There may be more than one field in a stanza with the same name.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
25
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
26
# The format itself does not deal with character encoding issues, though
27
# the result will normally be written in Unicode.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
28
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
29
# The format is intended to be simple enough that there is exactly one character
30
# stream representation of an object and vice versa, and that this relation
31
# will continue to hold for future versions of bzr.
32
33
import re
34
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
35
from bzrlib.iterablefile import IterableFile
36
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
37
# XXX: some redundancy is allowing to write stanzas in isolation as well as
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
38
# through a writer object.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
39
40
class RioWriter(object):
41
    def __init__(self, to_file):
42
        self._soft_nl = False
43
        self._to_file = to_file
44
45
    def write_stanza(self, stanza):
46
        if self._soft_nl:
2911.6.1 by Blake Winton
Change 'print >> f,'s to 'f.write('s.
47
            self._to_file.write('\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
48
        stanza.write(self._to_file)
49
        self._soft_nl = True
50
51
52
class RioReader(object):
53
    """Read stanzas from a file as a sequence
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
54
55
    to_file can be anything that can be enumerated as a sequence of
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
56
    lines (with newlines.)
57
    """
58
    def __init__(self, from_file):
59
        self._from_file = from_file
60
61
    def __iter__(self):
62
        while True:
63
            s = read_stanza(self._from_file)
64
            if s is None:
65
                break
66
            else:
67
                yield s
68
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
69
70
def rio_file(stanzas, header=None):
71
    """Produce a rio IterableFile from an iterable of stanzas"""
72
    def str_iter():
73
        if header is not None:
74
            yield header + '\n'
75
        first_stanza = True
76
        for s in stanzas:
77
            if first_stanza is not True:
78
                yield '\n'
79
            for line in s.to_lines():
80
                yield line
81
            first_stanza = False
82
    return IterableFile(str_iter())
83
84
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
85
def read_stanzas(from_file):
86
    while True:
87
        s = read_stanza(from_file)
88
        if s is None:
89
            break
90
        else:
91
            yield s
92
93
class Stanza(object):
94
    """One stanza for rio.
95
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
96
    Each stanza contains a set of named fields.
97
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
98
    Names must be non-empty ascii alphanumeric plus _.  Names can be repeated
99
    within a stanza.  Names are case-sensitive.  The ordering of fields is
100
    preserved.
101
102
    Each field value must be either an int or a string.
103
    """
104
105
    __slots__ = ['items']
106
107
    def __init__(self, **kwargs):
108
        """Construct a new Stanza.
109
110
        The keyword arguments, if any, are added in sorted order to the stanza.
111
        """
112
        self.items = []
113
        if kwargs:
114
            for tag, value in sorted(kwargs.items()):
115
                self.add(tag, value)
116
117
    def add(self, tag, value):
118
        """Append a name and value to the stanza."""
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
119
        if not valid_tag(tag):
120
            raise ValueError("invalid tag %r" % (tag,))
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
121
        if isinstance(value, str):
122
            value = unicode(value)
123
        elif isinstance(value, unicode):
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
124
            pass
125
        ## elif isinstance(value, (int, long)):
126
        ##    value = str(value)           # XXX: python2.4 without L-suffix
127
        else:
1553.5.7 by Martin Pool
rio.Stanza.add should raise TypeError on invalid types.
128
            raise TypeError("invalid type for rio value: %r of type %s"
129
                            % (value, type(value)))
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
130
        self.items.append((tag, value))
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
131
4354.3.3 by Jelmer Vernooij
More performance tweaks.
132
    @classmethod
133
    def from_pairs(cls, pairs):
134
        ret = cls()
135
        ret.items = pairs
136
        return ret
137
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
138
    def __contains__(self, find_tag):
139
        """True if there is any field in this stanza with the given tag."""
140
        for tag, value in self.items:
141
            if tag == find_tag:
142
                return True
143
        return False
144
145
    def __len__(self):
146
        """Return number of pairs in the stanza."""
147
        return len(self.items)
148
149
    def __eq__(self, other):
150
        if not isinstance(other, Stanza):
151
            return False
152
        return self.items == other.items
153
154
    def __ne__(self, other):
155
        return not self.__eq__(other)
156
157
    def __repr__(self):
158
        return "Stanza(%r)" % self.items
159
160
    def iter_pairs(self):
161
        """Return iterator of tag, value pairs."""
162
        return iter(self.items)
163
164
    def to_lines(self):
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
165
        """Generate sequence of lines for external version of this file.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
166
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
167
        The lines are always utf-8 encoded strings.
168
        """
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
169
        if not self.items:
170
            # max() complains if sequence is empty
171
            return []
172
        result = []
173
        for tag, value in self.items:
174
            if value == '':
175
                result.append(tag + ': \n')
176
            elif '\n' in value:
1185.47.2 by Martin Pool
Finish rio format and tests.
177
                # don't want splitlines behaviour on empty lines
178
                val_lines = value.split('\n')
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
179
                result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
180
                for line in val_lines[1:]:
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
181
                    result.append('\t' + line.encode('utf-8') + '\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
182
            else:
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
183
                result.append(tag + ': ' + value.encode('utf-8') + '\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
184
        return result
185
186
    def to_string(self):
187
        """Return stanza as a single string"""
188
        return ''.join(self.to_lines())
189
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
190
    def to_unicode(self):
191
        """Return stanza as a single Unicode string.
192
193
        This is most useful when adding a Stanza to a parent Stanza
194
        """
195
        if not self.items:
196
            return u''
197
198
        result = []
199
        for tag, value in self.items:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
200
            if value == u'':
201
                result.append(tag + u': \n')
202
            elif u'\n' in value:
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
203
                # don't want splitlines behaviour on empty lines
4354.3.3 by Jelmer Vernooij
More performance tweaks.
204
                val_lines = value.split(u'\n')
205
                result.append(tag + u': ' + val_lines[0] + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
206
                for line in val_lines[1:]:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
207
                    result.append(u'\t' + line + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
208
            else:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
209
                result.append(tag + u': ' + value + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
210
        return u''.join(result)
211
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
212
    def write(self, to_file):
213
        """Write stanza to a file"""
214
        to_file.writelines(self.to_lines())
215
216
    def get(self, tag):
217
        """Return the value for a field wih given tag.
218
219
        If there is more than one value, only the first is returned.  If the
220
        tag is not present, KeyError is raised.
221
        """
222
        for t, v in self.items:
223
            if t == tag:
224
                return v
225
        else:
226
            raise KeyError(tag)
227
228
    __getitem__ = get
229
230
    def get_all(self, tag):
231
        r = []
232
        for t, v in self.items:
233
            if t == tag:
234
                r.append(v)
235
        return r
1553.5.8 by Martin Pool
New Rio.as_dict method
236
237
    def as_dict(self):
238
        """Return a dict containing the unique values of the stanza.
239
        """
240
        d = {}
241
        for tag, value in self.items:
242
            d[tag] = value
243
        return d
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
244
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
245
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
246
def valid_tag(tag):
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
247
    return _valid_tag(tag)
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
248
249
250
def read_stanza(line_iter):
251
    """Return new Stanza read from list of lines or a file
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
252
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
253
    Returns one Stanza that was read, or returns None at end of file.  If a
254
    blank line follows the stanza, it is consumed.  It's not an error for
255
    there to be no blank at end of file.  If there is a blank file at the
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
256
    start of the input this is really an empty stanza and that is returned.
1185.47.2 by Martin Pool
Finish rio format and tests.
257
258
    Only the stanza lines and the trailing blank (if any) are consumed
259
    from the line_iter.
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
260
261
    The raw lines must be in utf-8 encoding.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
262
    """
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
263
    return _read_stanza_utf8(line_iter)
2030.1.5 by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing
264
265
266
def read_stanza_unicode(unicode_iter):
267
    """Read a Stanza from a list of lines or a file.
268
269
    The lines should already be in unicode form. This returns a single
270
    stanza that was read. If there is a blank line at the end of the Stanza,
271
    it is consumed. It is not an error for there to be no blank line at
272
    the end of the iterable. If there is a blank line at the beginning,
273
    this is treated as an empty Stanza and None is returned.
274
275
    Only the stanza lines and the trailing blank (if any) are consumed
276
    from the unicode_iter
277
278
    :param unicode_iter: A iterable, yeilding Unicode strings. See read_stanza
279
        if you have a utf-8 encoded string.
280
    :return: A Stanza object if there are any lines in the file.
281
        None otherwise
282
    """
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
283
    return _read_stanza_unicode(unicode_iter)
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
284
285
1551.12.10 by Aaron Bentley
Reduce max width to 72
286
def to_patch_lines(stanza, max_width=72):
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
287
    """Convert a stanza into RIO-Patch format lines.
288
289
    RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
290
    It resists common forms of damage such as newline conversion or the removal
291
    of trailing whitespace, yet is also reasonably easy to read.
292
293
    :param max_width: The maximum number of characters per physical line.
294
    :return: a list of lines
295
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
296
    if max_width <= 6:
297
        raise ValueError(max_width)
1551.12.10 by Aaron Bentley
Reduce max width to 72
298
    max_rio_width = max_width - 4
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
299
    lines = []
300
    for pline in stanza.to_lines():
301
        for line in pline.split('\n')[:-1]:
302
            line = re.sub('\\\\', '\\\\\\\\', line)
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
303
            while len(line) > 0:
1551.12.10 by Aaron Bentley
Reduce max width to 72
304
                partline = line[:max_rio_width]
305
                line = line[max_rio_width:]
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
306
                if len(line) > 0 and line[0] != [' ']:
307
                    break_index = -1
308
                    break_index = partline.rfind(' ', -20)
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
309
                    if break_index < 3:
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
310
                        break_index = partline.rfind('-', -20)
311
                        break_index += 1
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
312
                    if break_index < 3:
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
313
                        break_index = partline.rfind('/', -20)
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
314
                    if break_index >= 3:
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
315
                        line = partline[break_index:] + line
316
                        partline = partline[:break_index]
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
317
                if len(line) > 0:
318
                    line = '  ' + line
1551.12.11 by Aaron Bentley
Handle trailing whitepace cleanly
319
                partline = re.sub('\r', '\\\\r', partline)
320
                blank_line = False
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
321
                if len(line) > 0:
322
                    partline += '\\'
1551.12.11 by Aaron Bentley
Handle trailing whitepace cleanly
323
                elif re.search(' $', partline):
324
                    partline += '\\'
325
                    blank_line = True
326
                lines.append('# ' + partline + '\n')
327
                if blank_line:
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
328
                    lines.append('#   \n')
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
329
    return lines
330
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
331
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
332
def _patch_stanza_iter(line_iter):
333
    map = {'\\\\': '\\',
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
334
           '\\r' : '\r',
335
           '\\\n': ''}
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
336
    def mapget(match):
337
        return map[match.group(0)]
338
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
339
    last_line = None
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
340
    for line in line_iter:
1551.12.22 by Aaron Bentley
Fix handling of whitespace-stripping without newline munging
341
        if line.startswith('# '):
342
            line = line[2:]
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
343
        elif line.startswith('#'):
1551.12.22 by Aaron Bentley
Fix handling of whitespace-stripping without newline munging
344
            line = line[1:]
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
345
        else:
346
            raise ValueError("bad line %r" % (line,))
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
347
        if last_line is not None and len(line) > 2:
348
            line = line[2:]
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
349
        line = re.sub('\r', '', line)
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
350
        line = re.sub('\\\\(.|\n)', mapget, line)
351
        if last_line is None:
352
            last_line = line
353
        else:
354
            last_line += line
355
        if last_line[-1] == '\n':
356
            yield last_line
357
            last_line = None
358
    if last_line is not None:
359
        yield last_line
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
360
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
361
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
362
def read_patch_stanza(line_iter):
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
363
    """Convert an iterable of RIO-Patch format lines into a Stanza.
364
365
    RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
366
    It resists common forms of damage such as newline conversion or the removal
367
    of trailing whitespace, yet is also reasonably easy to read.
368
369
    :return: a Stanza
370
    """
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
371
    return read_stanza(_patch_stanza_iter(line_iter))
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
372
373
374
try:
375
    from bzrlib._rio_pyx import (
376
        _read_stanza_utf8,
377
        _read_stanza_unicode,
378
        _valid_tag,
379
        )
380
except ImportError:
381
    from bzrlib._rio_py import (
382
       _read_stanza_utf8,
383
       _read_stanza_unicode,
384
       _valid_tag,
385
       )