3
Read in a bundle stream, and process it into a BundleReader object.
1
# Copyright (C) 2005-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
19
from __future__ import absolute_import
7
22
from cStringIO import StringIO
11
from bzrlib.errors import (TestamentMismatch, BzrError,
12
MalformedHeader, MalformedPatches, NotABundle)
13
from bzrlib.bundle.common import get_header, header_str
14
from bzrlib.inventory import (Inventory, InventoryEntry,
15
InventoryDirectory, InventoryFile,
17
from bzrlib.osutils import sha_file, sha_string
30
from bzrlib.bundle import apply_bundle
31
from bzrlib.errors import (
35
from bzrlib.inventory import (
41
from bzrlib.osutils import sha_string, pathjoin
18
42
from bzrlib.revision import Revision, NULL_REVISION
19
43
from bzrlib.testament import StrictTestament
20
44
from bzrlib.trace import mutter, warning
153
196
raise KeyError(revision_id)
156
class BundleReader(object):
157
"""This class reads in a bundle from a file, and returns
158
a Bundle object, which can then be applied against a tree.
160
def __init__(self, from_file):
161
"""Read in the bundle from the file.
163
:param from_file: A file-like object (must have iterator support).
165
object.__init__(self)
166
self.from_file = iter(from_file)
167
self._next_line = None
169
self.info = BundleInfo()
170
# We put the actual inventory ids in the footer, so that the patch
171
# is easier to read for humans.
172
# Unfortunately, that means we need to read everything before we
173
# can create a proper bundle.
179
while self._next_line is not None:
180
self._read_revision_header()
181
if self._next_line is None:
187
"""Make sure that the information read in makes sense
188
and passes appropriate checksums.
190
# Fill in all the missing blanks for the revisions
191
# and generate the real_revisions list.
192
self.info.complete_info()
194
def _validate_revision(self, inventory, revision_id):
195
"""Make sure all revision entries match their checksum."""
197
# This is a mapping from each revision id to it's sha hash
200
rev = self.info.get_revision(revision_id)
201
rev_info = self.info.get_revision_info(revision_id)
202
assert rev.revision_id == rev_info.revision_id
203
assert rev.revision_id == revision_id
204
sha1 = StrictTestament(rev, inventory).as_sha1()
205
if sha1 != rev_info.sha1:
206
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
207
if rev_to_sha1.has_key(rev.revision_id):
208
raise BzrError('Revision {%s} given twice in the list'
210
rev_to_sha1[rev.revision_id] = sha1
198
def revision_tree(self, repository, revision_id, base=None):
199
revision = self.get_revision(revision_id)
200
base = self.get_base(revision)
201
if base == revision_id:
202
raise AssertionError()
203
if not self._validated_revisions_against_repo:
204
self._validate_references_from_repository(repository)
205
revision_info = self.get_revision_info(revision_id)
206
inventory_revision_id = revision_id
207
bundle_tree = BundleTree(repository.revision_tree(base),
208
inventory_revision_id)
209
self._update_tree(bundle_tree, revision_id)
211
inv = bundle_tree.inventory
212
self._validate_inventory(inv, revision_id)
213
self._validate_revision(bundle_tree, revision_id)
212
217
def _validate_references_from_repository(self, repository):
213
218
"""Now that we have a repository which should have some of the
235
240
# All of the contained revisions were checked
236
241
# in _validate_revisions
238
for rev_info in self.info.revisions:
243
for rev_info in self.revisions:
239
244
checked[rev_info.revision_id] = True
240
245
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
242
for (rev, rev_info) in zip(self.info.real_revisions, self.info.revisions):
247
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
243
248
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
247
252
for revision_id, sha1 in rev_to_sha.iteritems():
248
253
if repository.has_revision(revision_id):
249
testament = StrictTestament.from_revision(repository,
254
testament = StrictTestament.from_revision(repository,
251
local_sha1 = testament.as_sha1()
256
local_sha1 = self._testament_sha1_from_revision(repository,
252
258
if sha1 != local_sha1:
253
raise BzrError('sha1 mismatch. For revision id {%s}'
259
raise BzrError('sha1 mismatch. For revision id {%s}'
254
260
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
257
263
elif revision_id not in checked:
258
264
missing[revision_id] = sha1
260
for inv_id, sha1 in inv_to_sha.iteritems():
261
if repository.has_revision(inv_id):
262
# Note: branch.get_inventory_sha1() just returns the value that
263
# is stored in the revision text, and that value may be out
264
# of date. This is bogus, because that means we aren't
265
# validating the actual text, just that we wrote and read the
266
# string. But for now, what the hell.
267
local_sha1 = repository.get_inventory_sha1(inv_id)
268
if sha1 != local_sha1:
269
raise BzrError('sha1 mismatch. For inventory id {%s}'
270
'local: %s, bundle: %s' %
271
(inv_id, local_sha1, sha1))
275
266
if len(missing) > 0:
276
267
# I don't know if this is an error yet
277
268
warning('Not all revision hashes could be validated.'
278
269
' Unable validate %d hashes' % len(missing))
279
270
mutter('Verified %d sha hashes for the bundle.' % count)
271
self._validated_revisions_against_repo = True
281
273
def _validate_inventory(self, inv, revision_id):
282
274
"""At this point we should have generated the BundleTree,
283
275
so build up an inventory, and make sure the hashes match.
286
assert inv is not None
288
277
# Now we should have a complete inventory entry.
289
278
s = serializer_v5.write_inventory_to_string(inv)
290
279
sha1 = sha_string(s)
291
280
# Target revision is the last entry in the real_revisions list
292
rev = self.info.get_revision(revision_id)
293
assert rev.revision_id == revision_id
281
rev = self.get_revision(revision_id)
282
if rev.revision_id != revision_id:
283
raise AssertionError()
294
284
if sha1 != rev.inventory_sha1:
295
open(',,bogus-inv', 'wb').write(s)
285
f = open(',,bogus-inv', 'wb')
296
290
warning('Inventory sha hash mismatch for revision %s. %s'
297
291
' != %s' % (revision_id, sha1, rev.inventory_sha1))
299
def get_bundle(self, repository):
300
"""Return the meta information, and a Bundle tree which can
301
be used to populate the local stores and working tree, respectively.
303
return self.info, self.revision_tree(repository, self.info.target)
305
def revision_tree(self, repository, revision_id, base=None):
306
revision = self.info.get_revision(revision_id)
307
base = self.info.get_base(revision)
308
assert base != revision_id
309
self._validate_references_from_repository(repository)
310
revision_info = self.info.get_revision_info(revision_id)
311
inventory_revision_id = revision_id
312
bundle_tree = BundleTree(repository.revision_tree(base),
313
inventory_revision_id)
314
self._update_tree(bundle_tree, revision_id)
316
inv = bundle_tree.inventory
317
self._validate_inventory(inv, revision_id)
318
self._validate_revision(inv, revision_id)
323
"""yield the next line, but secretly
324
keep 1 extra line for peeking.
326
for line in self.from_file:
327
last = self._next_line
328
self._next_line = line
330
#mutter('yielding line: %r' % last)
332
last = self._next_line
333
self._next_line = None
334
#mutter('yielding line: %r' % last)
337
def _read_header(self):
338
"""Read the bzr header"""
339
header = get_header()
341
for line in self._next():
343
# not all mailers will keep trailing whitespace
346
if (not line.startswith('# ') or not line.endswith('\n')
347
or line[2:-1].decode('utf-8') != header[0]):
348
raise MalformedHeader('Found a header, but it'
349
' was improperly formatted')
350
header.pop(0) # We read this line.
352
break # We found everything.
353
elif (line.startswith('#') and line.endswith('\n')):
354
line = line[1:-1].strip().decode('utf-8')
355
if line[:len(header_str)] == header_str:
356
if line == header[0]:
359
raise MalformedHeader('Found what looks like'
360
' a header, but did not match')
363
raise NotABundle('Did not find an opening header')
365
def _read_revision_header(self):
366
self.info.revisions.append(RevisionInfo(None))
367
for line in self._next():
368
# The bzr header is terminated with a blank line
369
# which does not start with '#'
370
if line is None or line == '\n':
372
self._handle_next(line)
374
def _read_next_entry(self, line, indent=1):
375
"""Read in a key-value pair
377
if not line.startswith('#'):
378
raise MalformedHeader('Bzr header did not start with #')
379
line = line[1:-1].decode('utf-8') # Remove the '#' and '\n'
380
if line[:indent] == ' '*indent:
383
return None, None# Ignore blank lines
385
loc = line.find(': ')
390
value = self._read_many(indent=indent+2)
391
elif line[-1:] == ':':
393
value = self._read_many(indent=indent+2)
395
raise MalformedHeader('While looking for key: value pairs,'
396
' did not find the colon %r' % (line))
398
key = key.replace(' ', '_')
399
#mutter('found %s: %s' % (key, value))
402
def _handle_next(self, line):
405
key, value = self._read_next_entry(line, indent=1)
406
mutter('_handle_next %r => %r' % (key, value))
410
revision_info = self.info.revisions[-1]
411
if hasattr(revision_info, key):
412
if getattr(revision_info, key) is None:
413
setattr(revision_info, key, value)
415
raise MalformedHeader('Duplicated Key: %s' % key)
417
# What do we do with a key we don't recognize
418
raise MalformedHeader('Unknown Key: "%s"' % key)
420
def _read_many(self, indent):
421
"""If a line ends with no entry, that means that it should be
422
followed with multiple lines of values.
424
This detects the end of the list, because it will be a line that
425
does not start properly indented.
428
start = '#' + (' '*indent)
430
if self._next_line is None or self._next_line[:len(start)] != start:
433
for line in self._next():
434
values.append(line[len(start):-1].decode('utf-8'))
435
if self._next_line is None or self._next_line[:len(start)] != start:
439
def _read_one_patch(self):
440
"""Read in one patch, return the complete patch, along with
443
:return: action, lines, do_continue
445
#mutter('_read_one_patch: %r' % self._next_line)
446
# Peek and see if there are no patches
447
if self._next_line is None or self._next_line.startswith('#'):
448
return None, [], False
452
for line in self._next():
454
if not line.startswith('==='):
455
raise MalformedPatches('The first line of all patches'
456
' should be a bzr meta line "==="'
458
action = line[4:-1].decode('utf-8')
459
elif line.startswith('... '):
460
action += line[len('... '):-1].decode('utf-8')
462
if (self._next_line is not None and
463
self._next_line.startswith('===')):
464
return action, lines, True
465
elif self._next_line is None or self._next_line.startswith('#'):
466
return action, lines, False
470
elif not line.startswith('... '):
473
return action, lines, False
475
def _read_patches(self):
477
revision_actions = []
479
action, lines, do_continue = self._read_one_patch()
480
if action is not None:
481
revision_actions.append((action, lines))
482
assert self.info.revisions[-1].tree_actions is None
483
self.info.revisions[-1].tree_actions = revision_actions
485
def _read_footer(self):
486
"""Read the rest of the meta information.
488
:param first_line: The previous step iterates past what it
489
can handle. That extra line is given here.
491
for line in self._next():
492
self._handle_next(line)
493
if not self._next_line.startswith('#'):
496
if self._next_line is None:
293
def _validate_revision(self, tree, revision_id):
294
"""Make sure all revision entries match their checksum."""
296
# This is a mapping from each revision id to its sha hash
299
rev = self.get_revision(revision_id)
300
rev_info = self.get_revision_info(revision_id)
301
if not (rev.revision_id == rev_info.revision_id):
302
raise AssertionError()
303
if not (rev.revision_id == revision_id):
304
raise AssertionError()
305
sha1 = self._testament_sha1(rev, tree)
306
if sha1 != rev_info.sha1:
307
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
308
if rev.revision_id in rev_to_sha1:
309
raise BzrError('Revision {%s} given twice in the list'
311
rev_to_sha1[rev.revision_id] = sha1
499
313
def _update_tree(self, bundle_tree, revision_id):
500
314
"""This fills out a BundleTree based on the information
786
626
base_id = self.old_contents_id(file_id)
787
if base_id is not None:
627
if (base_id is not None and
628
base_id != self.base_tree.get_root_id()):
788
629
patch_original = self.base_tree.get_file(base_id)
790
631
patch_original = None
791
632
file_patch = self.patches.get(self.id2path(file_id))
792
633
if file_patch is None:
793
if (patch_original is None and
794
self.get_kind(file_id) == 'directory'):
634
if (patch_original is None and
635
self.kind(file_id) == 'directory'):
795
636
return StringIO()
796
assert patch_original is not None, "None: %s" % file_id
637
if patch_original is None:
638
raise AssertionError("None: %s" % file_id)
797
639
return patch_original
799
assert not file_patch.startswith('\\'), \
800
'Malformed patch for %s, %r' % (file_id, file_patch)
641
if file_patch.startswith('\\'):
643
'Malformed patch for %s, %r' % (file_id, file_patch))
801
644
return patched_file(file_patch, patch_original)
803
def get_symlink_target(self, file_id):
804
new_path = self.id2path(file_id)
646
def get_symlink_target(self, file_id, path=None):
648
path = self.id2path(file_id)
806
return self._targets[new_path]
650
return self._targets[path]
808
652
return self.base_tree.get_symlink_target(file_id)
810
def get_kind(self, file_id):
654
def kind(self, file_id):
811
655
if file_id in self._kinds:
812
656
return self._kinds[file_id]
813
return self.base_tree.inventory[file_id].kind
657
return self.base_tree.kind(file_id)
659
def get_file_revision(self, file_id):
660
path = self.id2path(file_id)
661
if path in self._last_changed:
662
return self._last_changed[path]
664
return self.base_tree.get_file_revision(file_id)
815
666
def is_executable(self, file_id):
816
667
path = self.id2path(file_id)
817
668
if path in self._executable:
818
669
return self._executable[path]
820
return self.base_tree.inventory[file_id].executable
671
return self.base_tree.is_executable(file_id)
822
673
def get_last_changed(self, file_id):
823
674
path = self.id2path(file_id)
824
675
if path in self._last_changed:
825
676
return self._last_changed[path]
826
return self.base_tree.inventory[file_id].revision
677
return self.base_tree.get_file_revision(file_id)
828
679
def get_size_and_sha1(self, file_id):
829
680
"""Return the size and sha1 hash of the given file id.