~bzr-pqm/bzr/bzr.dev

1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
1
# Copyright (C) 2005, 2006 Canonical Ltd
2
#
1189 by Martin Pool
- BROKEN: partial support for commit into weave
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
7
#
1189 by Martin Pool
- BROKEN: partial support for commit into weave
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
12
#
1189 by Martin Pool
- BROKEN: partial support for commit into weave
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
17
import cStringIO
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
18
import re
1189 by Martin Pool
- BROKEN: partial support for commit into weave
19
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
20
from bzrlib import (
21
    cache_utf8,
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
22
    inventory,
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
23
    )
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
24
from bzrlib.xml_serializer import SubElement, Element, Serializer
1189 by Martin Pool
- BROKEN: partial support for commit into weave
25
from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
26
from bzrlib.revision import Revision
1189 by Martin Pool
- BROKEN: partial support for commit into weave
27
from bzrlib.errors import BzrError
28
29
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
30
_utf8_re = None
31
_utf8_escape_map = {
32
    "&":'&',
33
    "'":"'", # FIXME: overkill
34
    "\"":""",
35
    "<":"&lt;",
36
    ">":"&gt;",
37
    }
38
39
40
def _ensure_utf8_re():
41
    """Make sure the _utf8_re regex has been compiled"""
42
    global _utf8_re
43
    if _utf8_re is not None:
44
        return
45
    _utf8_re = re.compile(u'[&<>\'\"\u0080-\uffff]')
46
47
48
def _utf8_escape_replace(match, _map=_utf8_escape_map):
49
    """Replace a string of non-ascii, non XML safe characters with their escape
50
51
    This will escape both Standard XML escapes, like <>"', etc.
52
    As well as escaping non ascii characters, because ElementTree did.
53
    This helps us remain compatible to older versions of bzr. We may change
54
    our policy in the future, though.
55
    """
56
    # TODO: jam 20060816 Benchmark this, is it better to use try/except or
57
    #       to use _map.get() and check for None.
58
    #       Or still further, it might be better to pre-generate all
59
    #       possible conversions. However, the occurance of unicode
60
    #       characters is quite low, so an initial guess is that this
61
    #       is the most efficient method
62
    #       Also need to benchmark whether it is better to have a regex
63
    #       which matches multiple characters, or if it is better to
64
    #       only match a single character and call this function multiple
65
    #       times. The chance that we actually need multiple escapes
66
    #       is probably very low for our expected usage
67
    try:
68
        return _map[match.group()]
69
    except KeyError:
70
        return "&#%d;" % ord(match.group())
71
72
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
73
_unicode_to_escaped_map = {}
74
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
75
def _encode_and_escape(unicode_str, _map=_unicode_to_escaped_map):
76
    """Encode the string into utf8, and escape invalid XML characters"""
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
77
    text = _map.get(unicode_str)
78
    if text is None:
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
79
        # The alternative policy is to do a regular UTF8 encoding
80
        # and then escape only XML meta characters. This could take
81
        # advantage of cache_utf8 since a lot of the revision ids
82
        # and file ids would already be cached.
83
        text = str(_utf8_re.sub(_utf8_escape_replace, unicode_str))
84
        _map[unicode_str] = text
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
85
    return text
86
87
88
def _clear_cache():
89
    """Clean out the unicode => escaped map"""
90
    _unicode_to_escaped_map.clear()
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
91
92
1189 by Martin Pool
- BROKEN: partial support for commit into weave
93
class Serializer_v5(Serializer):
94
    """Version 5 serializer
95
96
    Packs objects into XML and vice versa.
97
    """
98
    
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
99
    __slots__ = []
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
100
101
    def write_inventory_to_string(self, inv):
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
102
        """Just call write_inventory with a StringIO and return the value"""
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
103
        sio = cStringIO.StringIO()
104
        self.write_inventory(inv, sio)
105
        return sio.getvalue()
106
107
    def write_inventory(self, inv, f):
108
        """Write inventory to a file.
109
        
110
        :param inv: the inventory to write.
111
        :param f: the file to write.
112
        """
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
113
        _ensure_utf8_re()
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
114
        output = []
115
        self._append_inventory_root(output, inv)
116
        entries = inv.iter_entries()
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
117
        # Skip the root
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
118
        root_path, root_ie = entries.next()
119
        for path, ie in entries:
120
            self._append_entry(output, ie)
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
121
        output.append('</inventory>\n')
122
        f.writelines(output)
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
123
        # Just to keep the cache from growing without bounds
124
        # but we may actually not want to do clear the cache
125
        _clear_cache()
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
126
127
    def _append_inventory_root(self, output, inv):
128
        """Append the inventory root to output."""
129
        output.append('<inventory')
130
        if inv.root.file_id not in (None, ROOT_ID):
131
            output.append(' file_id="')
132
            self._append_utf8_escaped(output, inv.root.file_id)
133
        output.append(' format="5"')
134
        if inv.revision_id is not None:
135
            output.append(' revision_id="')
136
            self._append_utf8_escaped(output, inv.revision_id)
137
        output.append('>\n')
138
        
139
    def _append_entry(self, output, ie):
140
        """Convert InventoryEntry to XML element and append to output."""
141
        # TODO: should just be a plain assertion
142
        assert InventoryEntry.versionable_kind(ie.kind), \
143
            'unsupported entry kind %s' % ie.kind
144
145
        output.append("<")
146
        output.append(ie.kind)
147
        if ie.executable:
148
            output.append(' executable="yes"')
149
        output.append(' file_id="')
150
        self._append_utf8_escaped(output, ie.file_id)
151
        output.append(' name="')
152
        self._append_utf8_escaped(output, ie.name)
153
        if ie.parent_id != ROOT_ID:
154
            assert isinstance(ie.parent_id, basestring)
155
            output.append(' parent_id="')
156
            self._append_utf8_escaped(output, ie.parent_id)
157
        if ie.revision is not None:
158
            output.append(' revision="')
159
            self._append_utf8_escaped(output, ie.revision)
160
        if ie.symlink_target is not None:
161
            output.append(' symlink_target="')
162
            self._append_utf8_escaped(output, ie.symlink_target)
163
        if ie.text_sha1 is not None:
164
            output.append(' text_size="')
165
            output.append(ie.text_sha1)
166
            output.append('"')
167
        if ie.text_size is not None:
168
            output.append(' text_size="%d"' % ie.text_size)
169
        output.append(" />\n")
170
        return
171
172
    def _append_utf8_escaped(self, output, a_string):
173
        """Append a_string to output as utf8."""
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
174
        output.append(_encode_and_escape(a_string))
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
175
        output.append('"')
176
1189 by Martin Pool
- BROKEN: partial support for commit into weave
177
    def _pack_inventory(self, inv):
178
        """Convert to XML Element"""
1852.6.3 by Robert Collins
Make iter(Tree) consistent for all tree types.
179
        entries = inv.iter_entries()
1393.1.59 by Martin Pool
- put 'format=5' on inventory and revision xml
180
        e = Element('inventory',
181
                    format='5')
1189 by Martin Pool
- BROKEN: partial support for commit into weave
182
        e.text = '\n'
1852.6.3 by Robert Collins
Make iter(Tree) consistent for all tree types.
183
        path, root = entries.next()
184
        if root.file_id not in (None, ROOT_ID):
185
            e.set('file_id', root.file_id)
1638.1.2 by Robert Collins
Change the basis-inventory file to not have the revision-id in the file name.
186
        if inv.revision_id is not None:
187
            e.set('revision_id', inv.revision_id)
1852.6.3 by Robert Collins
Make iter(Tree) consistent for all tree types.
188
        for path, ie in entries:
1189 by Martin Pool
- BROKEN: partial support for commit into weave
189
            e.append(self._pack_entry(ie))
190
        return e
191
192
    def _pack_entry(self, ie):
193
        """Convert InventoryEntry to XML element"""
1704.2.24 by Martin Pool
todo
194
        # TODO: should just be a plain assertion
1399.1.6 by Robert Collins
move exporting functionality into inventory.py - uncovers bug in symlink support
195
        if not InventoryEntry.versionable_kind(ie.kind):
196
            raise AssertionError('unsupported entry kind %s' % ie.kind)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
197
        e = Element(ie.kind)
198
        e.set('name', ie.name)
199
        e.set('file_id', ie.file_id)
200
201
        if ie.text_size != None:
202
            e.set('text_size', '%d' % ie.text_size)
203
1092.2.22 by Robert Collins
text_version and name_version unification looking reasonable
204
        for f in ['text_sha1', 'revision', 'symlink_target']:
1189 by Martin Pool
- BROKEN: partial support for commit into weave
205
            v = getattr(ie, f)
206
            if v != None:
207
                e.set(f, v)
208
1398 by Robert Collins
integrate in Gustavos x-bit patch
209
        if ie.executable:
210
            e.set('executable', 'yes')
211
1189 by Martin Pool
- BROKEN: partial support for commit into weave
212
        # to be conservative, we don't externalize the root pointers
213
        # for now, leaving them as null in the xml form.  in a future
214
        # version it will be implied by nested elements.
215
        if ie.parent_id != ROOT_ID:
216
            assert isinstance(ie.parent_id, basestring)
217
            e.set('parent_id', ie.parent_id)
218
        e.tail = '\n'
219
        return e
220
221
    def _pack_revision(self, rev):
222
        """Revision object -> xml tree"""
223
        root = Element('revision',
224
                       committer = rev.committer,
225
                       timestamp = '%.9f' % rev.timestamp,
226
                       revision_id = rev.revision_id,
227
                       inventory_sha1 = rev.inventory_sha1,
1393.1.59 by Martin Pool
- put 'format=5' on inventory and revision xml
228
                       format='5',
1189 by Martin Pool
- BROKEN: partial support for commit into weave
229
                       )
1913.1.1 by John Arbash Meinel
Fix bug #55783
230
        if rev.timezone is not None:
1189 by Martin Pool
- BROKEN: partial support for commit into weave
231
            root.set('timezone', str(rev.timezone))
232
        root.text = '\n'
233
        msg = SubElement(root, 'message')
234
        msg.text = rev.message
235
        msg.tail = '\n'
1313 by Martin Pool
- rename to Revision.parent_ids to avoid confusion with old usage
236
        if rev.parent_ids:
1189 by Martin Pool
- BROKEN: partial support for commit into weave
237
            pelts = SubElement(root, 'parents')
238
            pelts.tail = pelts.text = '\n'
1313 by Martin Pool
- rename to Revision.parent_ids to avoid confusion with old usage
239
            for parent_id in rev.parent_ids:
1311 by Martin Pool
- remove RevisionReference; just hold parent ids directly
240
                assert isinstance(parent_id, basestring)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
241
                p = SubElement(pelts, 'revision_ref')
242
                p.tail = '\n'
1311 by Martin Pool
- remove RevisionReference; just hold parent ids directly
243
                p.set('revision_id', parent_id)
1185.16.36 by Martin Pool
- store revision properties in revision xml
244
        if rev.properties:
245
            self._pack_revision_properties(rev, root)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
246
        return root
1185.16.36 by Martin Pool
- store revision properties in revision xml
247
248
249
    def _pack_revision_properties(self, rev, under_element):
250
        top_elt = SubElement(under_element, 'properties')
251
        for prop_name, prop_value in sorted(rev.properties.items()):
252
            assert isinstance(prop_name, basestring) 
253
            assert isinstance(prop_value, basestring) 
254
            prop_elt = SubElement(top_elt, 'property')
255
            prop_elt.set('name', prop_name)
256
            prop_elt.text = prop_value
257
            prop_elt.tail = '\n'
258
        top_elt.tail = '\n'
259
1189 by Martin Pool
- BROKEN: partial support for commit into weave
260
261
    def _unpack_inventory(self, elt):
262
        """Construct from XML Element
263
        """
264
        assert elt.tag == 'inventory'
265
        root_id = elt.get('file_id') or ROOT_ID
1393.1.59 by Martin Pool
- put 'format=5' on inventory and revision xml
266
        format = elt.get('format')
267
        if format is not None:
268
            if format != '5':
269
                raise BzrError("invalid format version %r on inventory"
270
                                % format)
1638.1.2 by Robert Collins
Change the basis-inventory file to not have the revision-id in the file name.
271
        revision_id = elt.get('revision_id')
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
272
        if revision_id is not None:
273
            revision_id = cache_utf8.get_cached_unicode(revision_id)
1638.1.2 by Robert Collins
Change the basis-inventory file to not have the revision-id in the file name.
274
        inv = Inventory(root_id, revision_id=revision_id)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
275
        for e in elt:
276
            ie = self._unpack_entry(e)
277
            if ie.parent_id == ROOT_ID:
278
                ie.parent_id = root_id
279
            inv.add(ie)
280
        return inv
281
282
283
    def _unpack_entry(self, elt):
284
        kind = elt.tag
1399.1.6 by Robert Collins
move exporting functionality into inventory.py - uncovers bug in symlink support
285
        if not InventoryEntry.versionable_kind(kind):
1092.2.20 by Robert Collins
symlink and weaves, whaddya know
286
            raise AssertionError('unsupported entry kind %s' % kind)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
287
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
288
        get_cached = cache_utf8.get_cached_unicode
289
1189 by Martin Pool
- BROKEN: partial support for commit into weave
290
        parent_id = elt.get('parent_id')
291
        if parent_id == None:
292
            parent_id = ROOT_ID
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
293
        parent_id = get_cached(parent_id)
294
        file_id = get_cached(elt.get('file_id'))
1189 by Martin Pool
- BROKEN: partial support for commit into weave
295
1399.1.8 by Robert Collins
factor out inventory directory logic into 'InventoryDirectory' class
296
        if kind == 'directory':
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
297
            ie = inventory.InventoryDirectory(file_id,
1399.1.8 by Robert Collins
factor out inventory directory logic into 'InventoryDirectory' class
298
                                              elt.get('name'),
299
                                              parent_id)
1399.1.9 by Robert Collins
factor out file related logic from InventoryEntry to InventoryFile
300
        elif kind == 'file':
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
301
            ie = inventory.InventoryFile(file_id,
1399.1.9 by Robert Collins
factor out file related logic from InventoryEntry to InventoryFile
302
                                         elt.get('name'),
303
                                         parent_id)
304
            ie.text_sha1 = elt.get('text_sha1')
305
            if elt.get('executable') == 'yes':
306
                ie.executable = True
307
            v = elt.get('text_size')
308
            ie.text_size = v and int(v)
1399.1.10 by Robert Collins
remove kind from the InventoryEntry constructor - only child classes should be created now
309
        elif kind == 'symlink':
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
310
            ie = inventory.InventoryLink(file_id,
1399.1.10 by Robert Collins
remove kind from the InventoryEntry constructor - only child classes should be created now
311
                                         elt.get('name'),
312
                                         parent_id)
313
            ie.symlink_target = elt.get('symlink_target')
1399.1.8 by Robert Collins
factor out inventory directory logic into 'InventoryDirectory' class
314
        else:
1399.1.10 by Robert Collins
remove kind from the InventoryEntry constructor - only child classes should be created now
315
            raise BzrError("unknown kind %r" % kind)
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
316
        revision = elt.get('revision')
317
        if revision is not None:
318
            revision = get_cached(revision)
319
        ie.revision = revision
1189 by Martin Pool
- BROKEN: partial support for commit into weave
320
321
        return ie
322
323
324
    def _unpack_revision(self, elt):
325
        """XML Element -> Revision object"""
326
        assert elt.tag == 'revision'
1393.1.59 by Martin Pool
- put 'format=5' on inventory and revision xml
327
        format = elt.get('format')
328
        if format is not None:
329
            if format != '5':
330
                raise BzrError("invalid format version %r on inventory"
331
                                % format)
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
332
        get_cached = cache_utf8.get_cached_unicode
1189 by Martin Pool
- BROKEN: partial support for commit into weave
333
        rev = Revision(committer = elt.get('committer'),
334
                       timestamp = float(elt.get('timestamp')),
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
335
                       revision_id = get_cached(elt.get('revision_id')),
1189 by Martin Pool
- BROKEN: partial support for commit into weave
336
                       inventory_sha1 = elt.get('inventory_sha1')
337
                       )
338
        parents = elt.find('parents') or []
339
        for p in parents:
340
            assert p.tag == 'revision_ref', \
341
                   "bad parent node tag %r" % p.tag
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
342
            rev.parent_ids.append(get_cached(p.get('revision_id')))
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
343
        self._unpack_revision_properties(elt, rev)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
344
        v = elt.get('timezone')
1913.1.1 by John Arbash Meinel
Fix bug #55783
345
        if v is None:
346
            rev.timezone = 0
347
        else:
348
            rev.timezone = int(v)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
349
        rev.message = elt.findtext('message') # text of <message>
350
        return rev
351
352
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
353
    def _unpack_revision_properties(self, elt, rev):
354
        """Unpack properties onto a revision."""
355
        props_elt = elt.find('properties')
356
        assert len(rev.properties) == 0
357
        if not props_elt:
358
            return
359
        for prop_elt in props_elt:
360
            assert prop_elt.tag == 'property', \
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
361
                "bad tag under properties list: %r" % prop_elt.tag
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
362
            name = prop_elt.get('name')
363
            value = prop_elt.text
1886.1.1 by John Arbash Meinel
Fix bug #47782,
364
            # If a property had an empty value ('') cElementTree reads
365
            # that back as None, convert it back to '', so that all
366
            # properties have string values
367
            if value is None:
368
                value = ''
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
369
            assert name not in rev.properties, \
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
370
                "repeated property %r" % name
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
371
            rev.properties[name] = value
372
373
1189 by Martin Pool
- BROKEN: partial support for commit into weave
374
serializer_v5 = Serializer_v5()