1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
from bzrlib.xml_serializer import SubElement, Element, Serializer
25
from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
26
from bzrlib.revision import Revision
27
from bzrlib.errors import BzrError
30
_unicode_to_escaped_map = {}
35
"'":"'", # FIXME: overkill
42
def _ensure_utf8_re():
43
"""Make sure the _utf8_re regex has been compiled"""
45
if _utf8_re is not None:
47
_utf8_re = re.compile(u'[&<>\'\"\u0080-\uffff]')
50
def _utf8_escape_replace(match, _map=_utf8_escape_map):
51
"""Replace a string of non-ascii, non XML safe characters with their escape
53
This will escape both Standard XML escapes, like <>"', etc.
54
As well as escaping non ascii characters, because ElementTree did.
55
This helps us remain compatible to older versions of bzr. We may change
56
our policy in the future, though.
58
# TODO: jam 20060816 Benchmark this, is it better to use try/except or
59
# to use _map.get() and check for None.
60
# Or still further, it might be better to pre-generate all
61
# possible conversions. However, the occurance of unicode
62
# characters is quite low, so an initial guess is that this
63
# is the most efficient method
64
# Also need to benchmark whether it is better to have a regex
65
# which matches multiple characters, or if it is better to
66
# only match a single character and call this function multiple
67
# times. The chance that we actually need multiple escapes
68
# is probably very low for our expected usage
70
return _map[match.group()]
72
return "&#%d;" % ord(match.group())
75
def _encode_and_escape(unicode_str, _map=_unicode_to_escaped_map):
76
"""Encode the string into utf8, and escape invalid XML characters"""
78
return _map[unicode_str]
80
# The alternative policy is to do a regular UTF8 encoding
81
# and then escape only XML meta characters. This could take
82
# advantage of cache_utf8 since a lot of the revision ids
83
# and file ids would already be cached.
84
text = str(_utf8_re.sub(_utf8_escape_replace, unicode_str))
85
_map[unicode_str] = text
89
class Serializer_v5(Serializer):
90
"""Version 5 serializer
92
Packs objects into XML and vice versa.
97
def write_inventory_to_string(self, inv):
98
"""Just call write_inventory with a StringIO and return the value"""
99
sio = cStringIO.StringIO()
100
self.write_inventory(inv, sio)
101
return sio.getvalue()
103
def write_inventory(self, inv, f):
104
"""Write inventory to a file.
106
:param inv: the inventory to write.
107
:param f: the file to write.
111
self._append_inventory_root(output, inv)
112
entries = inv.iter_entries()
114
root_path, root_ie = entries.next()
115
for path, ie in entries:
116
self._append_entry(output, ie)
117
output.append('</inventory>\n')
120
def _append_inventory_root(self, output, inv):
121
"""Append the inventory root to output."""
122
output.append('<inventory')
123
if inv.root.file_id not in (None, ROOT_ID):
124
output.append(' file_id="')
125
self._append_utf8_escaped(output, inv.root.file_id)
126
output.append(' format="5"')
127
if inv.revision_id is not None:
128
output.append(' revision_id="')
129
self._append_utf8_escaped(output, inv.revision_id)
132
def _append_entry(self, output, ie):
133
"""Convert InventoryEntry to XML element and append to output."""
134
# TODO: should just be a plain assertion
135
assert InventoryEntry.versionable_kind(ie.kind), \
136
'unsupported entry kind %s' % ie.kind
139
output.append(ie.kind)
141
output.append(' executable="yes"')
142
output.append(' file_id="')
143
self._append_utf8_escaped(output, ie.file_id)
144
output.append(' name="')
145
self._append_utf8_escaped(output, ie.name)
146
if ie.parent_id != ROOT_ID:
147
assert isinstance(ie.parent_id, basestring)
148
output.append(' parent_id="')
149
self._append_utf8_escaped(output, ie.parent_id)
150
if ie.revision is not None:
151
output.append(' revision="')
152
self._append_utf8_escaped(output, ie.revision)
153
if ie.symlink_target is not None:
154
output.append(' symlink_target="')
155
self._append_utf8_escaped(output, ie.symlink_target)
156
if ie.text_sha1 is not None:
157
output.append(' text_size="')
158
output.append(ie.text_sha1)
160
if ie.text_size is not None:
161
output.append(' text_size="%d"' % ie.text_size)
162
output.append(" />\n")
165
def _append_utf8_escaped(self, output, a_string):
166
"""Append a_string to output as utf8."""
167
#output.append(_encode_and_escape(a_string))
168
text = str(_utf8_re.sub(_utf8_escape_replace, a_string))
172
def _pack_inventory(self, inv):
173
"""Convert to XML Element"""
174
entries = inv.iter_entries()
175
e = Element('inventory',
178
path, root = entries.next()
179
if root.file_id not in (None, ROOT_ID):
180
e.set('file_id', root.file_id)
181
if inv.revision_id is not None:
182
e.set('revision_id', inv.revision_id)
183
for path, ie in entries:
184
e.append(self._pack_entry(ie))
187
def _pack_entry(self, ie):
188
"""Convert InventoryEntry to XML element"""
189
# TODO: should just be a plain assertion
190
if not InventoryEntry.versionable_kind(ie.kind):
191
raise AssertionError('unsupported entry kind %s' % ie.kind)
193
e.set('name', ie.name)
194
e.set('file_id', ie.file_id)
196
if ie.text_size != None:
197
e.set('text_size', '%d' % ie.text_size)
199
for f in ['text_sha1', 'revision', 'symlink_target']:
205
e.set('executable', 'yes')
207
# to be conservative, we don't externalize the root pointers
208
# for now, leaving them as null in the xml form. in a future
209
# version it will be implied by nested elements.
210
if ie.parent_id != ROOT_ID:
211
assert isinstance(ie.parent_id, basestring)
212
e.set('parent_id', ie.parent_id)
216
def _pack_revision(self, rev):
217
"""Revision object -> xml tree"""
218
root = Element('revision',
219
committer = rev.committer,
220
timestamp = '%.9f' % rev.timestamp,
221
revision_id = rev.revision_id,
222
inventory_sha1 = rev.inventory_sha1,
225
if rev.timezone is not None:
226
root.set('timezone', str(rev.timezone))
228
msg = SubElement(root, 'message')
229
msg.text = rev.message
232
pelts = SubElement(root, 'parents')
233
pelts.tail = pelts.text = '\n'
234
for parent_id in rev.parent_ids:
235
assert isinstance(parent_id, basestring)
236
p = SubElement(pelts, 'revision_ref')
238
p.set('revision_id', parent_id)
240
self._pack_revision_properties(rev, root)
244
def _pack_revision_properties(self, rev, under_element):
245
top_elt = SubElement(under_element, 'properties')
246
for prop_name, prop_value in sorted(rev.properties.items()):
247
assert isinstance(prop_name, basestring)
248
assert isinstance(prop_value, basestring)
249
prop_elt = SubElement(top_elt, 'property')
250
prop_elt.set('name', prop_name)
251
prop_elt.text = prop_value
256
def _unpack_inventory(self, elt):
257
"""Construct from XML Element
259
assert elt.tag == 'inventory'
260
root_id = elt.get('file_id') or ROOT_ID
261
format = elt.get('format')
262
if format is not None:
264
raise BzrError("invalid format version %r on inventory"
266
revision_id = elt.get('revision_id')
267
if revision_id is not None:
268
revision_id = cache_utf8.get_cached_unicode(revision_id)
269
inv = Inventory(root_id, revision_id=revision_id)
271
ie = self._unpack_entry(e)
272
if ie.parent_id == ROOT_ID:
273
ie.parent_id = root_id
278
def _unpack_entry(self, elt):
280
if not InventoryEntry.versionable_kind(kind):
281
raise AssertionError('unsupported entry kind %s' % kind)
283
get_cached = cache_utf8.get_cached_unicode
285
parent_id = elt.get('parent_id')
286
if parent_id == None:
288
parent_id = get_cached(parent_id)
289
file_id = get_cached(elt.get('file_id'))
291
if kind == 'directory':
292
ie = inventory.InventoryDirectory(file_id,
296
ie = inventory.InventoryFile(file_id,
299
ie.text_sha1 = elt.get('text_sha1')
300
if elt.get('executable') == 'yes':
302
v = elt.get('text_size')
303
ie.text_size = v and int(v)
304
elif kind == 'symlink':
305
ie = inventory.InventoryLink(file_id,
308
ie.symlink_target = elt.get('symlink_target')
310
raise BzrError("unknown kind %r" % kind)
311
revision = elt.get('revision')
312
if revision is not None:
313
revision = get_cached(revision)
314
ie.revision = revision
319
def _unpack_revision(self, elt):
320
"""XML Element -> Revision object"""
321
assert elt.tag == 'revision'
322
format = elt.get('format')
323
if format is not None:
325
raise BzrError("invalid format version %r on inventory"
327
get_cached = cache_utf8.get_cached_unicode
328
rev = Revision(committer = elt.get('committer'),
329
timestamp = float(elt.get('timestamp')),
330
revision_id = get_cached(elt.get('revision_id')),
331
inventory_sha1 = elt.get('inventory_sha1')
333
parents = elt.find('parents') or []
335
assert p.tag == 'revision_ref', \
336
"bad parent node tag %r" % p.tag
337
rev.parent_ids.append(get_cached(p.get('revision_id')))
338
self._unpack_revision_properties(elt, rev)
339
v = elt.get('timezone')
343
rev.timezone = int(v)
344
rev.message = elt.findtext('message') # text of <message>
348
def _unpack_revision_properties(self, elt, rev):
349
"""Unpack properties onto a revision."""
350
props_elt = elt.find('properties')
351
assert len(rev.properties) == 0
354
for prop_elt in props_elt:
355
assert prop_elt.tag == 'property', \
356
"bad tag under properties list: %r" % prop_elt.tag
357
name = prop_elt.get('name')
358
value = prop_elt.text
359
# If a property had an empty value ('') cElementTree reads
360
# that back as None, convert it back to '', so that all
361
# properties have string values
364
assert name not in rev.properties, \
365
"repeated property %r" % name
366
rev.properties[name] = value
369
serializer_v5 = Serializer_v5()