1
# Copyright (C) 2005, 2006 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
from bzrlib.xml import ElementTree, SubElement, Element, Serializer
25
from bzrlib.xml_serializer import SubElement, Element, Serializer
19
26
from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
20
from bzrlib.revision import Revision, RevisionReference
27
from bzrlib.revision import Revision
21
28
from bzrlib.errors import BzrError
34
"'":"'", # FIXME: overkill
41
def _ensure_utf8_re():
42
"""Make sure the _utf8_re regex has been compiled"""
44
if _utf8_re is not None:
46
_utf8_re = re.compile(u'[&<>\'\"\u0080-\uffff]')
49
def _utf8_escape_replace(match, _map=_utf8_escape_map):
50
"""Replace a string of non-ascii, non XML safe characters with their escape
52
This will escape both Standard XML escapes, like <>"', etc.
53
As well as escaping non ascii characters, because ElementTree did.
54
This helps us remain compatible to older versions of bzr. We may change
55
our policy in the future, though.
57
# jam 20060816 Benchmarks show that try/KeyError is faster if you
58
# expect the entity to rarely miss. There is about a 10% difference
59
# in overall time. But if you miss frequently, then if None is much
60
# faster. For our use case, we *rarely* have a revision id, file id
61
# or path name that is unicode. So use try/KeyError.
63
return _map[match.group()]
65
return "&#%d;" % ord(match.group())
68
_unicode_to_escaped_map = {}
70
def _encode_and_escape(unicode_str, _map=_unicode_to_escaped_map):
71
"""Encode the string into utf8, and escape invalid XML characters"""
72
# We frequently get entities we have not seen before, so it is better
73
# to check if None, rather than try/KeyError
74
text = _map.get(unicode_str)
76
# The alternative policy is to do a regular UTF8 encoding
77
# and then escape only XML meta characters.
78
# Performance is equivalent once you use cache_utf8. *However*
79
# this makes the serialized texts incompatible with old versions
80
# of bzr. So no net gain. (Perhaps the read code would handle utf8
81
# better than entity escapes, but cElementTree seems to do just fine
83
text = str(_utf8_re.sub(_utf8_escape_replace, unicode_str)) + '"'
84
_map[unicode_str] = text
89
"""Clean out the unicode => escaped map"""
90
_unicode_to_escaped_map.clear()
27
93
class Serializer_v5(Serializer):
35
def _pack_inventory(self, inv):
36
"""Convert to XML Element"""
37
e = Element('inventory')
101
support_altered_by_hack = True
102
# This format supports the altered-by hack that reads file ids directly out
103
# of the versionedfile, without doing XML parsing.
105
supported_kinds = set(['file', 'directory', 'symlink'])
107
def write_inventory_to_string(self, inv):
108
"""Just call write_inventory with a StringIO and return the value"""
109
sio = cStringIO.StringIO()
110
self.write_inventory(inv, sio)
111
return sio.getvalue()
113
def write_inventory(self, inv, f):
114
"""Write inventory to a file.
116
:param inv: the inventory to write.
117
:param f: the file to write.
121
append = output.append
122
self._append_inventory_root(append, inv)
123
entries = inv.iter_entries()
125
root_path, root_ie = entries.next()
126
for path, ie in entries:
127
self._append_entry(append, ie)
128
append('</inventory>\n')
130
# Just to keep the cache from growing without bounds
131
# but we may actually not want to do clear the cache
134
def _append_inventory_root(self, append, inv):
135
"""Append the inventory root to output."""
39
137
if inv.root.file_id not in (None, ROOT_ID):
40
e.set('file_id', inv.root.file_id)
41
for path, ie in inv.iter_entries():
42
e.append(self._pack_entry(ie))
46
def _pack_entry(self, ie):
47
"""Convert InventoryEntry to XML element"""
48
assert ie.kind == 'directory' or ie.kind == 'file'
50
e.set('name', ie.name)
51
e.set('file_id', ie.file_id)
53
if ie.text_size != None:
54
e.set('text_size', '%d' % ie.text_size)
56
for f in ['text_version', 'text_sha1', 'entry_version']:
61
# to be conservative, we don't externalize the root pointers
62
# for now, leaving them as null in the xml form. in a future
63
# version it will be implied by nested elements.
64
if ie.parent_id != ROOT_ID:
139
append(_encode_and_escape(inv.root.file_id))
140
append(' format="5"')
141
if inv.revision_id is not None:
142
append(' revision_id="')
143
append(_encode_and_escape(inv.revision_id))
146
def _append_entry(self, append, ie):
147
"""Convert InventoryEntry to XML element and append to output."""
148
# TODO: should just be a plain assertion
149
if ie.kind not in self.supported_kinds:
150
raise errors.UnsupportedInventoryKind(ie.kind)
155
append(' executable="yes"')
157
append(_encode_and_escape(ie.file_id))
159
append(_encode_and_escape(ie.name))
160
if self._parent_condition(ie):
65
161
assert isinstance(ie.parent_id, basestring)
66
e.set('parent_id', ie.parent_id)
162
append(' parent_id="')
163
append(_encode_and_escape(ie.parent_id))
164
if ie.revision is not None:
165
append(' revision="')
166
append(_encode_and_escape(ie.revision))
167
if ie.symlink_target is not None:
168
append(' symlink_target="')
169
append(_encode_and_escape(ie.symlink_target))
170
if ie.text_sha1 is not None:
171
append(' text_sha1="')
174
if ie.text_size is not None:
175
append(' text_size="%d"' % ie.text_size)
176
if getattr(ie, 'reference_revision', None) is not None:
177
append(' reference_revision="')
178
append(_encode_and_escape(ie.reference_revision))
182
def _parent_condition(self, ie):
183
return ie.parent_id != ROOT_ID
73
185
def _pack_revision(self, rev):
74
186
"""Revision object -> xml tree"""
75
187
root = Element('revision',
76
188
committer = rev.committer,
77
timestamp = '%.9f' % rev.timestamp,
189
timestamp = '%.3f' % rev.timestamp,
78
190
revision_id = rev.revision_id,
79
191
inventory_sha1 = rev.inventory_sha1,
194
if rev.timezone is not None:
82
195
root.set('timezone', str(rev.timezone))
85
197
msg = SubElement(root, 'message')
86
198
msg.text = rev.message
90
201
pelts = SubElement(root, 'parents')
91
202
pelts.tail = pelts.text = '\n'
92
for rr in rev.parents:
93
assert isinstance(rr, RevisionReference)
203
for parent_id in rev.parent_ids:
204
assert isinstance(parent_id, basestring)
94
205
p = SubElement(pelts, 'revision_ref')
97
p.set('revision_id', rr.revision_id)
207
p.set('revision_id', parent_id)
209
self._pack_revision_properties(rev, root)
212
def _pack_revision_properties(self, rev, under_element):
213
top_elt = SubElement(under_element, 'properties')
214
for prop_name, prop_value in sorted(rev.properties.items()):
215
assert isinstance(prop_name, basestring)
216
assert isinstance(prop_value, basestring)
217
prop_elt = SubElement(top_elt, 'property')
218
prop_elt.set('name', prop_name)
219
prop_elt.text = prop_value
103
223
def _unpack_inventory(self, elt):
104
224
"""Construct from XML Element
106
226
assert elt.tag == 'inventory'
107
227
root_id = elt.get('file_id') or ROOT_ID
108
inv = Inventory(root_id)
228
format = elt.get('format')
229
if format is not None:
231
raise BzrError("invalid format version %r on inventory"
233
revision_id = elt.get('revision_id')
234
if revision_id is not None:
235
revision_id = cache_utf8.get_cached_unicode(revision_id)
236
inv = Inventory(root_id, revision_id=revision_id)
110
238
ie = self._unpack_entry(e)
111
239
if ie.parent_id == ROOT_ID:
117
def _unpack_entry(self, elt):
244
def _unpack_entry(self, elt, none_parents=False):
119
assert kind == 'directory' or kind == 'file'
246
if not InventoryEntry.versionable_kind(kind):
247
raise AssertionError('unsupported entry kind %s' % kind)
249
get_cached = cache_utf8.get_cached_unicode
121
251
parent_id = elt.get('parent_id')
122
if parent_id == None:
252
if parent_id is None and not none_parents:
123
253
parent_id = ROOT_ID
254
# TODO: jam 20060817 At present, caching file ids costs us too
255
# much time. It slows down overall read performances from
256
# approx 500ms to 700ms. And doesn't improve future reads.
257
# it might be because revision ids and file ids are mixing.
258
# Consider caching *just* the file ids, for a limited period
260
#parent_id = get_cached(parent_id)
261
#file_id = get_cached(elt.get('file_id'))
262
file_id = elt.get('file_id')
125
ie = InventoryEntry(elt.get('file_id'),
129
ie.text_version = elt.get('text_version')
130
ie.entry_version = elt.get('entry_version')
131
ie.text_sha1 = elt.get('text_sha1')
132
v = elt.get('text_size')
133
ie.text_size = v and int(v)
264
if kind == 'directory':
265
ie = inventory.InventoryDirectory(file_id,
269
ie = inventory.InventoryFile(file_id,
272
ie.text_sha1 = elt.get('text_sha1')
273
if elt.get('executable') == 'yes':
275
v = elt.get('text_size')
276
ie.text_size = v and int(v)
277
elif kind == 'symlink':
278
ie = inventory.InventoryLink(file_id,
281
ie.symlink_target = elt.get('symlink_target')
283
raise errors.UnsupportedInventoryKind(kind)
284
revision = elt.get('revision')
285
if revision is not None:
286
revision = get_cached(revision)
287
ie.revision = revision
138
291
def _unpack_revision(self, elt):
139
292
"""XML Element -> Revision object"""
140
293
assert elt.tag == 'revision'
294
format = elt.get('format')
295
if format is not None:
297
raise BzrError("invalid format version %r on inventory"
299
get_cached = cache_utf8.get_cached_unicode
142
300
rev = Revision(committer = elt.get('committer'),
143
301
timestamp = float(elt.get('timestamp')),
144
revision_id = elt.get('revision_id'),
302
revision_id = get_cached(elt.get('revision_id')),
145
303
inventory_sha1 = elt.get('inventory_sha1')
148
305
parents = elt.find('parents') or []
149
306
for p in parents:
150
307
assert p.tag == 'revision_ref', \
151
308
"bad parent node tag %r" % p.tag
152
rev_ref = RevisionReference(p.get('revision_id'))
153
rev.parents.append(rev_ref)
309
rev.parent_ids.append(get_cached(p.get('revision_id')))
310
self._unpack_revision_properties(elt, rev)
155
311
v = elt.get('timezone')
156
rev.timezone = v and int(v)
315
rev.timezone = int(v)
158
316
rev.message = elt.findtext('message') # text of <message>
319
def _unpack_revision_properties(self, elt, rev):
320
"""Unpack properties onto a revision."""
321
props_elt = elt.find('properties')
322
assert len(rev.properties) == 0
325
for prop_elt in props_elt:
326
assert prop_elt.tag == 'property', \
327
"bad tag under properties list: %r" % prop_elt.tag
328
name = prop_elt.get('name')
329
value = prop_elt.text
330
# If a property had an empty value ('') cElementTree reads
331
# that back as None, convert it back to '', so that all
332
# properties have string values
335
assert name not in rev.properties, \
336
"repeated property %r" % name
337
rev.properties[name] = value
163
340
serializer_v5 = Serializer_v5()