1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Inventory delta serialisation.
19
See doc/developers/inventory.txt for the description of the format.
21
In this module the interesting classes are:
22
- InventoryDeltaSerializer - object to read/write inventory deltas.
25
from __future__ import absolute_import
27
__all__ = ['InventoryDeltaSerializer']
29
from bzrlib import errors
30
from bzrlib.osutils import basename
31
from bzrlib import inventory
32
from bzrlib.revision import NULL_REVISION
34
FORMAT_1 = 'bzr inventory delta v1 (bzr 1.14)'
37
class InventoryDeltaError(errors.BzrError):
38
"""An error when serializing or deserializing an inventory delta."""
40
# Most errors when serializing and deserializing are due to bugs, although
41
# damaged input (i.e. a bug in a different process) could cause
42
# deserialization errors too.
46
class IncompatibleInventoryDelta(errors.BzrError):
47
"""The delta could not be deserialised because its contents conflict with
48
the allow_versioned_root or allow_tree_references flags of the
51
internal_error = False
54
def _directory_content(entry):
55
"""Serialize the content component of entry which is a directory.
57
:param entry: An InventoryDirectory.
62
def _file_content(entry):
63
"""Serialize the content component of entry which is a file.
65
:param entry: An InventoryFile.
71
size_exec_sha = (entry.text_size, exec_bytes, entry.text_sha1)
72
if None in size_exec_sha:
73
raise InventoryDeltaError('Missing size or sha for %s' % entry.file_id)
74
return "file\x00%d\x00%s\x00%s" % size_exec_sha
77
def _link_content(entry):
78
"""Serialize the content component of entry which is a symlink.
80
:param entry: An InventoryLink.
82
target = entry.symlink_target
84
raise InventoryDeltaError('Missing target for %s' % entry.file_id)
85
return "link\x00%s" % target.encode('utf8')
88
def _reference_content(entry):
89
"""Serialize the content component of entry which is a tree-reference.
91
:param entry: A TreeReference.
93
tree_revision = entry.reference_revision
94
if tree_revision is None:
95
raise InventoryDeltaError(
96
'Missing reference revision for %s' % entry.file_id)
97
return "tree\x00%s" % tree_revision
100
def _dir_to_entry(content, name, parent_id, file_id, last_modified,
101
_type=inventory.InventoryDirectory):
102
"""Convert a dir content record to an InventoryDirectory."""
103
result = _type(file_id, name, parent_id)
104
result.revision = last_modified
108
def _file_to_entry(content, name, parent_id, file_id, last_modified,
109
_type=inventory.InventoryFile):
110
"""Convert a dir content record to an InventoryFile."""
111
result = _type(file_id, name, parent_id)
112
result.revision = last_modified
113
result.text_size = int(content[1])
114
result.text_sha1 = content[3]
116
result.executable = True
118
result.executable = False
122
def _link_to_entry(content, name, parent_id, file_id, last_modified,
123
_type=inventory.InventoryLink):
124
"""Convert a link content record to an InventoryLink."""
125
result = _type(file_id, name, parent_id)
126
result.revision = last_modified
127
result.symlink_target = content[1].decode('utf8')
131
def _tree_to_entry(content, name, parent_id, file_id, last_modified,
132
_type=inventory.TreeReference):
133
"""Convert a tree content record to a TreeReference."""
134
result = _type(file_id, name, parent_id)
135
result.revision = last_modified
136
result.reference_revision = content[1]
140
class InventoryDeltaSerializer(object):
141
"""Serialize inventory deltas."""
143
def __init__(self, versioned_root, tree_references):
144
"""Create an InventoryDeltaSerializer.
146
:param versioned_root: If True, any root entry that is seen is expected
147
to be versioned, and root entries can have any fileid.
148
:param tree_references: If True support tree-reference entries.
150
self._versioned_root = versioned_root
151
self._tree_references = tree_references
152
self._entry_to_content = {
153
'directory': _directory_content,
154
'file': _file_content,
155
'symlink': _link_content,
158
self._entry_to_content['tree-reference'] = _reference_content
160
def delta_to_lines(self, old_name, new_name, delta_to_new):
161
"""Return a line sequence for delta_to_new.
163
Both the versioned_root and tree_references flags must be set via
164
require_flags before calling this.
166
:param old_name: A UTF8 revision id for the old inventory. May be
167
NULL_REVISION if there is no older inventory and delta_to_new
168
includes the entire inventory contents.
169
:param new_name: The version name of the inventory we create with this
171
:param delta_to_new: An inventory delta such as Inventory.apply_delta
173
:return: The serialized delta as lines.
175
if type(old_name) is not str:
176
raise TypeError('old_name should be str, got %r' % (old_name,))
177
if type(new_name) is not str:
178
raise TypeError('new_name should be str, got %r' % (new_name,))
179
lines = ['', '', '', '', '']
180
to_line = self._delta_item_to_line
181
for delta_item in delta_to_new:
182
line = to_line(delta_item, new_name)
183
if line.__class__ != str:
184
raise InventoryDeltaError(
185
'to_line generated non-str output %r' % lines[-1])
188
lines[0] = "format: %s\n" % FORMAT_1
189
lines[1] = "parent: %s\n" % old_name
190
lines[2] = "version: %s\n" % new_name
191
lines[3] = "versioned_root: %s\n" % self._serialize_bool(
192
self._versioned_root)
193
lines[4] = "tree_references: %s\n" % self._serialize_bool(
194
self._tree_references)
197
def _serialize_bool(self, value):
203
def _delta_item_to_line(self, delta_item, new_version):
204
"""Convert delta_item to a line."""
205
oldpath, newpath, file_id, entry = delta_item
208
oldpath_utf8 = '/' + oldpath.encode('utf8')
209
newpath_utf8 = 'None'
211
last_modified = NULL_REVISION
212
content = 'deleted\x00\x00'
215
oldpath_utf8 = 'None'
217
oldpath_utf8 = '/' + oldpath.encode('utf8')
219
raise AssertionError(
220
"Bad inventory delta: '/' is not a valid newpath "
221
"(should be '') in delta item %r" % (delta_item,))
222
# TODO: Test real-world utf8 cache hit rate. It may be a win.
223
newpath_utf8 = '/' + newpath.encode('utf8')
224
# Serialize None as ''
225
parent_id = entry.parent_id or ''
226
# Serialize unknown revisions as NULL_REVISION
227
last_modified = entry.revision
228
# special cases for /
229
if newpath_utf8 == '/' and not self._versioned_root:
230
# This is an entry for the root, this inventory does not
231
# support versioned roots. So this must be an unversioned
232
# root, i.e. last_modified == new revision. Otherwise, this
234
# Note: the non-rich-root repositories *can* have roots with
235
# file-ids other than TREE_ROOT, e.g. repo formats that use the
237
if last_modified != new_version:
238
raise InventoryDeltaError(
239
'Version present for / in %s (%s != %s)'
240
% (file_id, last_modified, new_version))
241
if last_modified is None:
242
raise InventoryDeltaError("no version for fileid %s" % file_id)
243
content = self._entry_to_content[entry.kind](entry)
244
return ("%s\x00%s\x00%s\x00%s\x00%s\x00%s\n" %
245
(oldpath_utf8, newpath_utf8, file_id, parent_id, last_modified,
249
class InventoryDeltaDeserializer(object):
250
"""Deserialize inventory deltas."""
252
def __init__(self, allow_versioned_root=True, allow_tree_references=True):
253
"""Create an InventoryDeltaDeserializer.
255
:param versioned_root: If True, any root entry that is seen is expected
256
to be versioned, and root entries can have any fileid.
257
:param tree_references: If True support tree-reference entries.
259
self._allow_versioned_root = allow_versioned_root
260
self._allow_tree_references = allow_tree_references
262
def _deserialize_bool(self, value):
265
elif value == "false":
268
raise InventoryDeltaError("value %r is not a bool" % (value,))
270
def parse_text_bytes(self, bytes):
271
"""Parse the text bytes of a serialized inventory delta.
273
If versioned_root and/or tree_references flags were set via
274
require_flags, then the parsed flags must match or a BzrError will be
277
:param bytes: The bytes to parse. This can be obtained by calling
278
delta_to_lines and then doing ''.join(delta_lines).
279
:return: (parent_id, new_id, versioned_root, tree_references,
282
if bytes[-1:] != '\n':
283
last_line = bytes.rsplit('\n', 1)[-1]
284
raise InventoryDeltaError('last line not empty: %r' % (last_line,))
285
lines = bytes.split('\n')[:-1] # discard the last empty line
286
if not lines or lines[0] != 'format: %s' % FORMAT_1:
287
raise InventoryDeltaError('unknown format %r' % lines[0:1])
288
if len(lines) < 2 or not lines[1].startswith('parent: '):
289
raise InventoryDeltaError('missing parent: marker')
290
delta_parent_id = lines[1][8:]
291
if len(lines) < 3 or not lines[2].startswith('version: '):
292
raise InventoryDeltaError('missing version: marker')
293
delta_version_id = lines[2][9:]
294
if len(lines) < 4 or not lines[3].startswith('versioned_root: '):
295
raise InventoryDeltaError('missing versioned_root: marker')
296
delta_versioned_root = self._deserialize_bool(lines[3][16:])
297
if len(lines) < 5 or not lines[4].startswith('tree_references: '):
298
raise InventoryDeltaError('missing tree_references: marker')
299
delta_tree_references = self._deserialize_bool(lines[4][17:])
300
if (not self._allow_versioned_root and delta_versioned_root):
301
raise IncompatibleInventoryDelta("versioned_root not allowed")
304
line_iter = iter(lines)
307
for line in line_iter:
308
(oldpath_utf8, newpath_utf8, file_id, parent_id, last_modified,
309
content) = line.split('\x00', 5)
310
parent_id = parent_id or None
311
if file_id in seen_ids:
312
raise InventoryDeltaError(
313
"duplicate file id in inventory delta %r" % lines)
314
seen_ids.add(file_id)
315
if (newpath_utf8 == '/' and not delta_versioned_root and
316
last_modified != delta_version_id):
317
# Delta claims to be not have a versioned root, yet here's
318
# a root entry with a non-default version.
319
raise InventoryDeltaError("Versioned root found: %r" % line)
320
elif newpath_utf8 != 'None' and last_modified[-1] == ':':
321
# Deletes have a last_modified of null:, but otherwise special
322
# revision ids should not occur.
323
raise InventoryDeltaError('special revisionid found: %r' % line)
324
if content.startswith('tree\x00'):
325
if delta_tree_references is False:
326
raise InventoryDeltaError(
327
"Tree reference found (but header said "
328
"tree_references: false): %r" % line)
329
elif not self._allow_tree_references:
330
raise IncompatibleInventoryDelta(
331
"Tree reference not allowed")
332
if oldpath_utf8 == 'None':
334
elif oldpath_utf8[:1] != '/':
335
raise InventoryDeltaError(
336
"oldpath invalid (does not start with /): %r"
339
oldpath_utf8 = oldpath_utf8[1:]
340
oldpath = oldpath_utf8.decode('utf8')
341
if newpath_utf8 == 'None':
343
elif newpath_utf8[:1] != '/':
344
raise InventoryDeltaError(
345
"newpath invalid (does not start with /): %r"
349
newpath_utf8 = newpath_utf8[1:]
350
newpath = newpath_utf8.decode('utf8')
351
content_tuple = tuple(content.split('\x00'))
352
if content_tuple[0] == 'deleted':
355
entry = _parse_entry(
356
newpath, file_id, parent_id, last_modified, content_tuple)
357
delta_item = (oldpath, newpath, file_id, entry)
358
result.append(delta_item)
359
return (delta_parent_id, delta_version_id, delta_versioned_root,
360
delta_tree_references, result)
363
def _parse_entry(path, file_id, parent_id, last_modified, content):
365
'dir': _dir_to_entry,
366
'file': _file_to_entry,
367
'link': _link_to_entry,
368
'tree': _tree_to_entry,
371
if path.startswith('/'):
373
name = basename(path)
374
return entry_factory[content[0]](
375
content, name, parent_id, file_id, last_modified)