1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
20
from cStringIO import StringIO
28
from bzrlib.errors import (TestamentMismatch, BzrError,
29
MalformedHeader, MalformedPatches, NotABundle)
30
from bzrlib.inventory import (Inventory, InventoryEntry,
31
InventoryDirectory, InventoryFile,
33
from bzrlib.osutils import sha_file, sha_string, pathjoin
34
from bzrlib.revision import Revision, NULL_REVISION
35
from bzrlib.testament import StrictTestament
36
from bzrlib.trace import mutter, warning
37
import bzrlib.transport
38
from bzrlib.tree import Tree
39
import bzrlib.urlutils
40
from bzrlib.xml5 import serializer_v5
43
class RevisionInfo(object):
44
"""Gets filled out for each revision object that is read.
46
def __init__(self, revision_id):
47
self.revision_id = revision_id
53
self.inventory_sha1 = None
55
self.parent_ids = None
58
self.properties = None
59
self.tree_actions = None
62
return pprint.pformat(self.__dict__)
64
def as_revision(self):
65
rev = Revision(revision_id=self.revision_id,
66
committer=self.committer,
67
timestamp=float(self.timestamp),
68
timezone=int(self.timezone),
69
inventory_sha1=self.inventory_sha1,
70
message='\n'.join(self.message))
73
rev.parent_ids.extend(self.parent_ids)
76
for property in self.properties:
77
key_end = property.find(': ')
78
assert key_end is not None
79
key = property[:key_end].encode('utf-8')
80
value = property[key_end+2:].encode('utf-8')
81
rev.properties[key] = value
86
class BundleInfo(object):
87
"""This contains the meta information. Stuff that allows you to
88
recreate the revision or inventory XML.
95
# A list of RevisionInfo objects
98
# The next entries are created during complete_info() and
99
# other post-read functions.
101
# A list of real Revision objects
102
self.real_revisions = []
104
self.timestamp = None
108
return pprint.pformat(self.__dict__)
110
def complete_info(self):
111
"""This makes sure that all information is properly
112
split up, based on the assumptions that can be made
113
when information is missing.
115
from bzrlib.timestamp import unpack_highres_date
116
# Put in all of the guessable information.
117
if not self.timestamp and self.date:
118
self.timestamp, self.timezone = unpack_highres_date(self.date)
120
self.real_revisions = []
121
for rev in self.revisions:
122
if rev.timestamp is None:
123
if rev.date is not None:
124
rev.timestamp, rev.timezone = \
125
unpack_highres_date(rev.date)
127
rev.timestamp = self.timestamp
128
rev.timezone = self.timezone
129
if rev.message is None and self.message:
130
rev.message = self.message
131
if rev.committer is None and self.committer:
132
rev.committer = self.committer
133
self.real_revisions.append(rev.as_revision())
135
def get_base(self, revision):
136
revision_info = self.get_revision_info(revision.revision_id)
137
if revision_info.base_id is not None:
138
if revision_info.base_id == NULL_REVISION:
141
return revision_info.base_id
142
if len(revision.parent_ids) == 0:
143
# There is no base listed, and
144
# the lowest revision doesn't have a parent
145
# so this is probably against the empty tree
146
# and thus base truly is None
149
return revision.parent_ids[-1]
151
def _get_target(self):
152
"""Return the target revision."""
153
if len(self.real_revisions) > 0:
154
return self.real_revisions[0].revision_id
155
elif len(self.revisions) > 0:
156
return self.revisions[0].revision_id
159
target = property(_get_target, doc='The target revision id')
161
def get_revision(self, revision_id):
162
for r in self.real_revisions:
163
if r.revision_id == revision_id:
165
raise KeyError(revision_id)
167
def get_revision_info(self, revision_id):
168
for r in self.revisions:
169
if r.revision_id == revision_id:
171
raise KeyError(revision_id)
173
def revision_tree(self, repository, revision_id, base=None):
174
revision_id = osutils.safe_revision_id(revision_id)
175
revision = self.get_revision(revision_id)
176
base = self.get_base(revision)
177
assert base != revision_id
178
self._validate_references_from_repository(repository)
179
revision_info = self.get_revision_info(revision_id)
180
inventory_revision_id = revision_id
181
bundle_tree = BundleTree(repository.revision_tree(base),
182
inventory_revision_id)
183
self._update_tree(bundle_tree, revision_id)
185
inv = bundle_tree.inventory
186
self._validate_inventory(inv, revision_id)
187
self._validate_revision(inv, revision_id)
191
def _validate_references_from_repository(self, repository):
192
"""Now that we have a repository which should have some of the
193
revisions we care about, go through and validate all of them
198
def add_sha(d, revision_id, sha1):
199
if revision_id is None:
201
raise BzrError('A Null revision should always'
202
'have a null sha1 hash')
205
# This really should have been validated as part
206
# of _validate_revisions but lets do it again
207
if sha1 != d[revision_id]:
208
raise BzrError('** Revision %r referenced with 2 different'
209
' sha hashes %s != %s' % (revision_id,
210
sha1, d[revision_id]))
212
d[revision_id] = sha1
214
# All of the contained revisions were checked
215
# in _validate_revisions
217
for rev_info in self.revisions:
218
checked[rev_info.revision_id] = True
219
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
221
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
222
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
226
for revision_id, sha1 in rev_to_sha.iteritems():
227
if repository.has_revision(revision_id):
228
testament = StrictTestament.from_revision(repository,
230
local_sha1 = self._testament_sha1_from_revision(repository,
232
if sha1 != local_sha1:
233
raise BzrError('sha1 mismatch. For revision id {%s}'
234
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
237
elif revision_id not in checked:
238
missing[revision_id] = sha1
240
for inv_id, sha1 in inv_to_sha.iteritems():
241
if repository.has_revision(inv_id):
242
# Note: branch.get_inventory_sha1() just returns the value that
243
# is stored in the revision text, and that value may be out
244
# of date. This is bogus, because that means we aren't
245
# validating the actual text, just that we wrote and read the
246
# string. But for now, what the hell.
247
local_sha1 = repository.get_inventory_sha1(inv_id)
248
if sha1 != local_sha1:
249
raise BzrError('sha1 mismatch. For inventory id {%s}'
250
'local: %s, bundle: %s' %
251
(inv_id, local_sha1, sha1))
256
# I don't know if this is an error yet
257
warning('Not all revision hashes could be validated.'
258
' Unable validate %d hashes' % len(missing))
259
mutter('Verified %d sha hashes for the bundle.' % count)
261
def _validate_inventory(self, inv, revision_id):
262
"""At this point we should have generated the BundleTree,
263
so build up an inventory, and make sure the hashes match.
266
assert inv is not None
268
# Now we should have a complete inventory entry.
269
s = serializer_v5.write_inventory_to_string(inv)
271
# Target revision is the last entry in the real_revisions list
272
rev = self.get_revision(revision_id)
273
assert rev.revision_id == revision_id
274
if sha1 != rev.inventory_sha1:
275
open(',,bogus-inv', 'wb').write(s)
276
warning('Inventory sha hash mismatch for revision %s. %s'
277
' != %s' % (revision_id, sha1, rev.inventory_sha1))
279
def _validate_revision(self, inventory, revision_id):
280
"""Make sure all revision entries match their checksum."""
282
# This is a mapping from each revision id to it's sha hash
285
rev = self.get_revision(revision_id)
286
rev_info = self.get_revision_info(revision_id)
287
assert rev.revision_id == rev_info.revision_id
288
assert rev.revision_id == revision_id
289
sha1 = self._testament_sha1(rev, inventory)
290
if sha1 != rev_info.sha1:
291
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
292
if rev.revision_id in rev_to_sha1:
293
raise BzrError('Revision {%s} given twice in the list'
295
rev_to_sha1[rev.revision_id] = sha1
297
def _update_tree(self, bundle_tree, revision_id):
298
"""This fills out a BundleTree based on the information
301
:param bundle_tree: A BundleTree to update with the new information.
304
def get_rev_id(last_changed, path, kind):
305
if last_changed is not None:
306
# last_changed will be a Unicode string because of how it was
307
# read. Convert it back to utf8.
308
changed_revision_id = osutils.safe_revision_id(last_changed,
311
changed_revision_id = revision_id
312
bundle_tree.note_last_changed(path, changed_revision_id)
313
return changed_revision_id
315
def extra_info(info, new_path):
318
for info_item in info:
320
name, value = info_item.split(':', 1)
322
raise 'Value %r has no colon' % info_item
323
if name == 'last-changed':
325
elif name == 'executable':
326
assert value in ('yes', 'no'), value
327
val = (value == 'yes')
328
bundle_tree.note_executable(new_path, val)
329
elif name == 'target':
330
bundle_tree.note_target(new_path, value)
331
elif name == 'encoding':
333
return last_changed, encoding
335
def do_patch(path, lines, encoding):
336
if encoding is not None:
337
assert encoding == 'base64'
338
patch = base64.decodestring(''.join(lines))
340
patch = ''.join(lines)
341
bundle_tree.note_patch(path, patch)
343
def renamed(kind, extra, lines):
344
info = extra.split(' // ')
346
raise BzrError('renamed action lines need both a from and to'
349
if info[1].startswith('=> '):
350
new_path = info[1][3:]
354
bundle_tree.note_rename(old_path, new_path)
355
last_modified, encoding = extra_info(info[2:], new_path)
356
revision = get_rev_id(last_modified, new_path, kind)
358
do_patch(new_path, lines, encoding)
360
def removed(kind, extra, lines):
361
info = extra.split(' // ')
363
# TODO: in the future we might allow file ids to be
364
# given for removed entries
365
raise BzrError('removed action lines should only have the path'
368
bundle_tree.note_deletion(path)
370
def added(kind, extra, lines):
371
info = extra.split(' // ')
373
raise BzrError('add action lines require the path and file id'
376
raise BzrError('add action lines have fewer than 5 entries.'
379
if not info[1].startswith('file-id:'):
380
raise BzrError('The file-id should follow the path for an add'
382
# This will be Unicode because of how the stream is read. Turn it
383
# back into a utf8 file_id
384
file_id = osutils.safe_file_id(info[1][8:], warn=False)
386
bundle_tree.note_id(file_id, path, kind)
387
# this will be overridden in extra_info if executable is specified.
388
bundle_tree.note_executable(path, False)
389
last_changed, encoding = extra_info(info[2:], path)
390
revision = get_rev_id(last_changed, path, kind)
391
if kind == 'directory':
393
do_patch(path, lines, encoding)
395
def modified(kind, extra, lines):
396
info = extra.split(' // ')
398
raise BzrError('modified action lines have at least'
399
'the path in them: %r' % extra)
402
last_modified, encoding = extra_info(info[1:], path)
403
revision = get_rev_id(last_modified, path, kind)
405
do_patch(path, lines, encoding)
413
for action_line, lines in \
414
self.get_revision_info(revision_id).tree_actions:
415
first = action_line.find(' ')
417
raise BzrError('Bogus action line'
418
' (no opening space): %r' % action_line)
419
second = action_line.find(' ', first+1)
421
raise BzrError('Bogus action line'
422
' (missing second space): %r' % action_line)
423
action = action_line[:first]
424
kind = action_line[first+1:second]
425
if kind not in ('file', 'directory', 'symlink'):
426
raise BzrError('Bogus action line'
427
' (invalid object kind %r): %r' % (kind, action_line))
428
extra = action_line[second+1:]
430
if action not in valid_actions:
431
raise BzrError('Bogus action line'
432
' (unrecognized action): %r' % action_line)
433
valid_actions[action](kind, extra, lines)
436
class BundleTree(Tree):
437
def __init__(self, base_tree, revision_id):
438
self.base_tree = base_tree
439
self._renamed = {} # Mapping from old_path => new_path
440
self._renamed_r = {} # new_path => old_path
441
self._new_id = {} # new_path => new_id
442
self._new_id_r = {} # new_id => new_path
443
self._kinds = {} # new_id => kind
444
self._last_changed = {} # new_id => revision_id
445
self._executable = {} # new_id => executable value
447
self._targets = {} # new path => new symlink target
449
self.contents_by_id = True
450
self.revision_id = revision_id
451
self._inventory = None
454
return pprint.pformat(self.__dict__)
456
def note_rename(self, old_path, new_path):
457
"""A file/directory has been renamed from old_path => new_path"""
458
assert new_path not in self._renamed
459
assert old_path not in self._renamed_r
460
self._renamed[new_path] = old_path
461
self._renamed_r[old_path] = new_path
463
def note_id(self, new_id, new_path, kind='file'):
464
"""Files that don't exist in base need a new id."""
465
self._new_id[new_path] = new_id
466
self._new_id_r[new_id] = new_path
467
self._kinds[new_id] = kind
469
def note_last_changed(self, file_id, revision_id):
470
if (file_id in self._last_changed
471
and self._last_changed[file_id] != revision_id):
472
raise BzrError('Mismatched last-changed revision for file_id {%s}'
473
': %s != %s' % (file_id,
474
self._last_changed[file_id],
476
self._last_changed[file_id] = revision_id
478
def note_patch(self, new_path, patch):
479
"""There is a patch for a given filename."""
480
self.patches[new_path] = patch
482
def note_target(self, new_path, target):
483
"""The symlink at the new path has the given target"""
484
self._targets[new_path] = target
486
def note_deletion(self, old_path):
487
"""The file at old_path has been deleted."""
488
self.deleted.append(old_path)
490
def note_executable(self, new_path, executable):
491
self._executable[new_path] = executable
493
def old_path(self, new_path):
494
"""Get the old_path (path in the base_tree) for the file at new_path"""
495
assert new_path[:1] not in ('\\', '/')
496
old_path = self._renamed.get(new_path)
497
if old_path is not None:
499
dirname,basename = os.path.split(new_path)
500
# dirname is not '' doesn't work, because
501
# dirname may be a unicode entry, and is
502
# requires the objects to be identical
504
old_dir = self.old_path(dirname)
508
old_path = pathjoin(old_dir, basename)
511
#If the new path wasn't in renamed, the old one shouldn't be in
513
if old_path in self._renamed_r:
517
def new_path(self, old_path):
518
"""Get the new_path (path in the target_tree) for the file at old_path
521
assert old_path[:1] not in ('\\', '/')
522
new_path = self._renamed_r.get(old_path)
523
if new_path is not None:
525
if new_path in self._renamed:
527
dirname,basename = os.path.split(old_path)
529
new_dir = self.new_path(dirname)
533
new_path = pathjoin(new_dir, basename)
536
#If the old path wasn't in renamed, the new one shouldn't be in
538
if new_path in self._renamed:
542
def path2id(self, path):
543
"""Return the id of the file present at path in the target tree."""
544
file_id = self._new_id.get(path)
545
if file_id is not None:
547
old_path = self.old_path(path)
550
if old_path in self.deleted:
552
if getattr(self.base_tree, 'path2id', None) is not None:
553
return self.base_tree.path2id(old_path)
555
return self.base_tree.inventory.path2id(old_path)
557
def id2path(self, file_id):
558
"""Return the new path in the target tree of the file with id file_id"""
559
path = self._new_id_r.get(file_id)
562
old_path = self.base_tree.id2path(file_id)
565
if old_path in self.deleted:
567
return self.new_path(old_path)
569
def old_contents_id(self, file_id):
570
"""Return the id in the base_tree for the given file_id.
571
Return None if the file did not exist in base.
573
if self.contents_by_id:
574
if self.base_tree.has_id(file_id):
578
new_path = self.id2path(file_id)
579
return self.base_tree.path2id(new_path)
581
def get_file(self, file_id):
582
"""Return a file-like object containing the new contents of the
583
file given by file_id.
585
TODO: It might be nice if this actually generated an entry
586
in the text-store, so that the file contents would
589
base_id = self.old_contents_id(file_id)
590
if (base_id is not None and
591
base_id != self.base_tree.inventory.root.file_id):
592
patch_original = self.base_tree.get_file(base_id)
594
patch_original = None
595
file_patch = self.patches.get(self.id2path(file_id))
596
if file_patch is None:
597
if (patch_original is None and
598
self.get_kind(file_id) == 'directory'):
600
assert patch_original is not None, "None: %s" % file_id
601
return patch_original
603
assert not file_patch.startswith('\\'), \
604
'Malformed patch for %s, %r' % (file_id, file_patch)
605
return patched_file(file_patch, patch_original)
607
def get_symlink_target(self, file_id):
608
new_path = self.id2path(file_id)
610
return self._targets[new_path]
612
return self.base_tree.get_symlink_target(file_id)
614
def get_kind(self, file_id):
615
if file_id in self._kinds:
616
return self._kinds[file_id]
617
return self.base_tree.inventory[file_id].kind
619
def is_executable(self, file_id):
620
path = self.id2path(file_id)
621
if path in self._executable:
622
return self._executable[path]
624
return self.base_tree.inventory[file_id].executable
626
def get_last_changed(self, file_id):
627
path = self.id2path(file_id)
628
if path in self._last_changed:
629
return self._last_changed[path]
630
return self.base_tree.inventory[file_id].revision
632
def get_size_and_sha1(self, file_id):
633
"""Return the size and sha1 hash of the given file id.
634
If the file was not locally modified, this is extracted
635
from the base_tree. Rather than re-reading the file.
637
new_path = self.id2path(file_id)
640
if new_path not in self.patches:
641
# If the entry does not have a patch, then the
642
# contents must be the same as in the base_tree
643
ie = self.base_tree.inventory[file_id]
644
if ie.text_size is None:
645
return ie.text_size, ie.text_sha1
646
return int(ie.text_size), ie.text_sha1
647
fileobj = self.get_file(file_id)
648
content = fileobj.read()
649
return len(content), sha_string(content)
651
def _get_inventory(self):
652
"""Build up the inventory entry for the BundleTree.
654
This need to be called before ever accessing self.inventory
656
from os.path import dirname, basename
658
assert self.base_tree is not None
659
base_inv = self.base_tree.inventory
660
inv = Inventory(None, self.revision_id)
662
def add_entry(file_id):
663
path = self.id2path(file_id)
669
parent_path = dirname(path)
670
parent_id = self.path2id(parent_path)
672
kind = self.get_kind(file_id)
673
revision_id = self.get_last_changed(file_id)
675
name = basename(path)
676
if kind == 'directory':
677
ie = InventoryDirectory(file_id, name, parent_id)
679
ie = InventoryFile(file_id, name, parent_id)
680
ie.executable = self.is_executable(file_id)
681
elif kind == 'symlink':
682
ie = InventoryLink(file_id, name, parent_id)
683
ie.symlink_target = self.get_symlink_target(file_id)
684
ie.revision = revision_id
686
if kind in ('directory', 'symlink'):
687
ie.text_size, ie.text_sha1 = None, None
689
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
690
if (ie.text_size is None) and (kind == 'file'):
691
raise BzrError('Got a text_size of None for file_id %r' % file_id)
694
sorted_entries = self.sorted_path_id()
695
for path, file_id in sorted_entries:
700
# Have to overload the inherited inventory property
701
# because _get_inventory is only called in the parent.
702
# Reading the docs, property() objects do not use
703
# overloading, they use the function as it was defined
705
inventory = property(_get_inventory)
708
for path, entry in self.inventory.iter_entries():
711
def sorted_path_id(self):
713
for result in self._new_id.iteritems():
715
for id in self.base_tree:
716
path = self.id2path(id)
719
paths.append((path, id))
724
def patched_file(file_patch, original):
725
"""Produce a file-like object with the patched version of a text"""
726
from bzrlib.patches import iter_patched
727
from bzrlib.iterablefile import IterableFile
729
return IterableFile(())
730
# string.splitlines(True) also splits on '\r', but the iter_patched code
731
# only expects to iterate over '\n' style lines
732
return IterableFile(iter_patched(original,
733
StringIO(file_patch).readlines()))