1
# Copyright (C) 2005-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
19
from __future__ import absolute_import
22
from cStringIO import StringIO
30
from bzrlib.bundle import apply_bundle
31
from bzrlib.errors import (
35
from bzrlib.inventory import (
41
from bzrlib.osutils import sha_string, pathjoin
42
from bzrlib.revision import Revision, NULL_REVISION
43
from bzrlib.testament import StrictTestament
44
from bzrlib.trace import mutter, warning
45
from bzrlib.tree import Tree
46
from bzrlib.xml5 import serializer_v5
49
class RevisionInfo(object):
50
"""Gets filled out for each revision object that is read.
52
def __init__(self, revision_id):
53
self.revision_id = revision_id
59
self.inventory_sha1 = None
61
self.parent_ids = None
64
self.properties = None
65
self.tree_actions = None
68
return pprint.pformat(self.__dict__)
70
def as_revision(self):
71
rev = Revision(revision_id=self.revision_id,
72
committer=self.committer,
73
timestamp=float(self.timestamp),
74
timezone=int(self.timezone),
75
inventory_sha1=self.inventory_sha1,
76
message='\n'.join(self.message))
79
rev.parent_ids.extend(self.parent_ids)
82
for property in self.properties:
83
key_end = property.find(': ')
85
if not property.endswith(':'):
86
raise ValueError(property)
87
key = str(property[:-1])
90
key = str(property[:key_end])
91
value = property[key_end+2:]
92
rev.properties[key] = value
97
def from_revision(revision):
98
revision_info = RevisionInfo(revision.revision_id)
99
date = timestamp.format_highres_date(revision.timestamp,
101
revision_info.date = date
102
revision_info.timezone = revision.timezone
103
revision_info.timestamp = revision.timestamp
104
revision_info.message = revision.message.split('\n')
105
revision_info.properties = [': '.join(p) for p in
106
revision.properties.iteritems()]
110
class BundleInfo(object):
111
"""This contains the meta information. Stuff that allows you to
112
recreate the revision or inventory XML.
114
def __init__(self, bundle_format=None):
115
self.bundle_format = None
116
self.committer = None
120
# A list of RevisionInfo objects
123
# The next entries are created during complete_info() and
124
# other post-read functions.
126
# A list of real Revision objects
127
self.real_revisions = []
129
self.timestamp = None
132
# Have we checked the repository yet?
133
self._validated_revisions_against_repo = False
136
return pprint.pformat(self.__dict__)
138
def complete_info(self):
139
"""This makes sure that all information is properly
140
split up, based on the assumptions that can be made
141
when information is missing.
143
from bzrlib.timestamp import unpack_highres_date
144
# Put in all of the guessable information.
145
if not self.timestamp and self.date:
146
self.timestamp, self.timezone = unpack_highres_date(self.date)
148
self.real_revisions = []
149
for rev in self.revisions:
150
if rev.timestamp is None:
151
if rev.date is not None:
152
rev.timestamp, rev.timezone = \
153
unpack_highres_date(rev.date)
155
rev.timestamp = self.timestamp
156
rev.timezone = self.timezone
157
if rev.message is None and self.message:
158
rev.message = self.message
159
if rev.committer is None and self.committer:
160
rev.committer = self.committer
161
self.real_revisions.append(rev.as_revision())
163
def get_base(self, revision):
164
revision_info = self.get_revision_info(revision.revision_id)
165
if revision_info.base_id is not None:
166
return revision_info.base_id
167
if len(revision.parent_ids) == 0:
168
# There is no base listed, and
169
# the lowest revision doesn't have a parent
170
# so this is probably against the empty tree
171
# and thus base truly is NULL_REVISION
174
return revision.parent_ids[-1]
176
def _get_target(self):
177
"""Return the target revision."""
178
if len(self.real_revisions) > 0:
179
return self.real_revisions[0].revision_id
180
elif len(self.revisions) > 0:
181
return self.revisions[0].revision_id
184
target = property(_get_target, doc='The target revision id')
186
def get_revision(self, revision_id):
187
for r in self.real_revisions:
188
if r.revision_id == revision_id:
190
raise KeyError(revision_id)
192
def get_revision_info(self, revision_id):
193
for r in self.revisions:
194
if r.revision_id == revision_id:
196
raise KeyError(revision_id)
198
def revision_tree(self, repository, revision_id, base=None):
199
revision = self.get_revision(revision_id)
200
base = self.get_base(revision)
201
if base == revision_id:
202
raise AssertionError()
203
if not self._validated_revisions_against_repo:
204
self._validate_references_from_repository(repository)
205
revision_info = self.get_revision_info(revision_id)
206
inventory_revision_id = revision_id
207
bundle_tree = BundleTree(repository.revision_tree(base),
208
inventory_revision_id)
209
self._update_tree(bundle_tree, revision_id)
211
inv = bundle_tree.inventory
212
self._validate_inventory(inv, revision_id)
213
self._validate_revision(bundle_tree, revision_id)
217
def _validate_references_from_repository(self, repository):
218
"""Now that we have a repository which should have some of the
219
revisions we care about, go through and validate all of them
224
def add_sha(d, revision_id, sha1):
225
if revision_id is None:
227
raise BzrError('A Null revision should always'
228
'have a null sha1 hash')
231
# This really should have been validated as part
232
# of _validate_revisions but lets do it again
233
if sha1 != d[revision_id]:
234
raise BzrError('** Revision %r referenced with 2 different'
235
' sha hashes %s != %s' % (revision_id,
236
sha1, d[revision_id]))
238
d[revision_id] = sha1
240
# All of the contained revisions were checked
241
# in _validate_revisions
243
for rev_info in self.revisions:
244
checked[rev_info.revision_id] = True
245
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
247
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
248
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
252
for revision_id, sha1 in rev_to_sha.iteritems():
253
if repository.has_revision(revision_id):
254
testament = StrictTestament.from_revision(repository,
256
local_sha1 = self._testament_sha1_from_revision(repository,
258
if sha1 != local_sha1:
259
raise BzrError('sha1 mismatch. For revision id {%s}'
260
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
263
elif revision_id not in checked:
264
missing[revision_id] = sha1
267
# I don't know if this is an error yet
268
warning('Not all revision hashes could be validated.'
269
' Unable validate %d hashes' % len(missing))
270
mutter('Verified %d sha hashes for the bundle.' % count)
271
self._validated_revisions_against_repo = True
273
def _validate_inventory(self, inv, revision_id):
274
"""At this point we should have generated the BundleTree,
275
so build up an inventory, and make sure the hashes match.
277
# Now we should have a complete inventory entry.
278
s = serializer_v5.write_inventory_to_string(inv)
280
# Target revision is the last entry in the real_revisions list
281
rev = self.get_revision(revision_id)
282
if rev.revision_id != revision_id:
283
raise AssertionError()
284
if sha1 != rev.inventory_sha1:
285
f = open(',,bogus-inv', 'wb')
290
warning('Inventory sha hash mismatch for revision %s. %s'
291
' != %s' % (revision_id, sha1, rev.inventory_sha1))
293
def _validate_revision(self, tree, revision_id):
294
"""Make sure all revision entries match their checksum."""
296
# This is a mapping from each revision id to its sha hash
299
rev = self.get_revision(revision_id)
300
rev_info = self.get_revision_info(revision_id)
301
if not (rev.revision_id == rev_info.revision_id):
302
raise AssertionError()
303
if not (rev.revision_id == revision_id):
304
raise AssertionError()
305
sha1 = self._testament_sha1(rev, tree)
306
if sha1 != rev_info.sha1:
307
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
308
if rev.revision_id in rev_to_sha1:
309
raise BzrError('Revision {%s} given twice in the list'
311
rev_to_sha1[rev.revision_id] = sha1
313
def _update_tree(self, bundle_tree, revision_id):
314
"""This fills out a BundleTree based on the information
317
:param bundle_tree: A BundleTree to update with the new information.
320
def get_rev_id(last_changed, path, kind):
321
if last_changed is not None:
322
# last_changed will be a Unicode string because of how it was
323
# read. Convert it back to utf8.
324
changed_revision_id = osutils.safe_revision_id(last_changed,
327
changed_revision_id = revision_id
328
bundle_tree.note_last_changed(path, changed_revision_id)
329
return changed_revision_id
331
def extra_info(info, new_path):
334
for info_item in info:
336
name, value = info_item.split(':', 1)
338
raise ValueError('Value %r has no colon' % info_item)
339
if name == 'last-changed':
341
elif name == 'executable':
342
val = (value == 'yes')
343
bundle_tree.note_executable(new_path, val)
344
elif name == 'target':
345
bundle_tree.note_target(new_path, value)
346
elif name == 'encoding':
348
return last_changed, encoding
350
def do_patch(path, lines, encoding):
351
if encoding == 'base64':
352
patch = base64.decodestring(''.join(lines))
353
elif encoding is None:
354
patch = ''.join(lines)
356
raise ValueError(encoding)
357
bundle_tree.note_patch(path, patch)
359
def renamed(kind, extra, lines):
360
info = extra.split(' // ')
362
raise BzrError('renamed action lines need both a from and to'
365
if info[1].startswith('=> '):
366
new_path = info[1][3:]
370
bundle_tree.note_rename(old_path, new_path)
371
last_modified, encoding = extra_info(info[2:], new_path)
372
revision = get_rev_id(last_modified, new_path, kind)
374
do_patch(new_path, lines, encoding)
376
def removed(kind, extra, lines):
377
info = extra.split(' // ')
379
# TODO: in the future we might allow file ids to be
380
# given for removed entries
381
raise BzrError('removed action lines should only have the path'
384
bundle_tree.note_deletion(path)
386
def added(kind, extra, lines):
387
info = extra.split(' // ')
389
raise BzrError('add action lines require the path and file id'
392
raise BzrError('add action lines have fewer than 5 entries.'
395
if not info[1].startswith('file-id:'):
396
raise BzrError('The file-id should follow the path for an add'
398
# This will be Unicode because of how the stream is read. Turn it
399
# back into a utf8 file_id
400
file_id = osutils.safe_file_id(info[1][8:], warn=False)
402
bundle_tree.note_id(file_id, path, kind)
403
# this will be overridden in extra_info if executable is specified.
404
bundle_tree.note_executable(path, False)
405
last_changed, encoding = extra_info(info[2:], path)
406
revision = get_rev_id(last_changed, path, kind)
407
if kind == 'directory':
409
do_patch(path, lines, encoding)
411
def modified(kind, extra, lines):
412
info = extra.split(' // ')
414
raise BzrError('modified action lines have at least'
415
'the path in them: %r' % extra)
418
last_modified, encoding = extra_info(info[1:], path)
419
revision = get_rev_id(last_modified, path, kind)
421
do_patch(path, lines, encoding)
429
for action_line, lines in \
430
self.get_revision_info(revision_id).tree_actions:
431
first = action_line.find(' ')
433
raise BzrError('Bogus action line'
434
' (no opening space): %r' % action_line)
435
second = action_line.find(' ', first+1)
437
raise BzrError('Bogus action line'
438
' (missing second space): %r' % action_line)
439
action = action_line[:first]
440
kind = action_line[first+1:second]
441
if kind not in ('file', 'directory', 'symlink'):
442
raise BzrError('Bogus action line'
443
' (invalid object kind %r): %r' % (kind, action_line))
444
extra = action_line[second+1:]
446
if action not in valid_actions:
447
raise BzrError('Bogus action line'
448
' (unrecognized action): %r' % action_line)
449
valid_actions[action](kind, extra, lines)
451
def install_revisions(self, target_repo, stream_input=True):
452
"""Install revisions and return the target revision
454
:param target_repo: The repository to install into
455
:param stream_input: Ignored by this implementation.
457
apply_bundle.install_bundle(target_repo, self)
460
def get_merge_request(self, target_repo):
461
"""Provide data for performing a merge
463
Returns suggested base, suggested target, and patch verification status
465
return None, self.target, 'inapplicable'
468
class BundleTree(Tree):
470
def __init__(self, base_tree, revision_id):
471
self.base_tree = base_tree
472
self._renamed = {} # Mapping from old_path => new_path
473
self._renamed_r = {} # new_path => old_path
474
self._new_id = {} # new_path => new_id
475
self._new_id_r = {} # new_id => new_path
476
self._kinds = {} # new_id => kind
477
self._last_changed = {} # new_id => revision_id
478
self._executable = {} # new_id => executable value
480
self._targets = {} # new path => new symlink target
482
self.contents_by_id = True
483
self.revision_id = revision_id
484
self._inventory = None
487
return pprint.pformat(self.__dict__)
489
def note_rename(self, old_path, new_path):
490
"""A file/directory has been renamed from old_path => new_path"""
491
if new_path in self._renamed:
492
raise AssertionError(new_path)
493
if old_path in self._renamed_r:
494
raise AssertionError(old_path)
495
self._renamed[new_path] = old_path
496
self._renamed_r[old_path] = new_path
498
def note_id(self, new_id, new_path, kind='file'):
499
"""Files that don't exist in base need a new id."""
500
self._new_id[new_path] = new_id
501
self._new_id_r[new_id] = new_path
502
self._kinds[new_id] = kind
504
def note_last_changed(self, file_id, revision_id):
505
if (file_id in self._last_changed
506
and self._last_changed[file_id] != revision_id):
507
raise BzrError('Mismatched last-changed revision for file_id {%s}'
508
': %s != %s' % (file_id,
509
self._last_changed[file_id],
511
self._last_changed[file_id] = revision_id
513
def note_patch(self, new_path, patch):
514
"""There is a patch for a given filename."""
515
self.patches[new_path] = patch
517
def note_target(self, new_path, target):
518
"""The symlink at the new path has the given target"""
519
self._targets[new_path] = target
521
def note_deletion(self, old_path):
522
"""The file at old_path has been deleted."""
523
self.deleted.append(old_path)
525
def note_executable(self, new_path, executable):
526
self._executable[new_path] = executable
528
def old_path(self, new_path):
529
"""Get the old_path (path in the base_tree) for the file at new_path"""
530
if new_path[:1] in ('\\', '/'):
531
raise ValueError(new_path)
532
old_path = self._renamed.get(new_path)
533
if old_path is not None:
535
dirname,basename = os.path.split(new_path)
536
# dirname is not '' doesn't work, because
537
# dirname may be a unicode entry, and is
538
# requires the objects to be identical
540
old_dir = self.old_path(dirname)
544
old_path = pathjoin(old_dir, basename)
547
#If the new path wasn't in renamed, the old one shouldn't be in
549
if old_path in self._renamed_r:
553
def new_path(self, old_path):
554
"""Get the new_path (path in the target_tree) for the file at old_path
557
if old_path[:1] in ('\\', '/'):
558
raise ValueError(old_path)
559
new_path = self._renamed_r.get(old_path)
560
if new_path is not None:
562
if new_path in self._renamed:
564
dirname,basename = os.path.split(old_path)
566
new_dir = self.new_path(dirname)
570
new_path = pathjoin(new_dir, basename)
573
#If the old path wasn't in renamed, the new one shouldn't be in
575
if new_path in self._renamed:
579
def path2id(self, path):
580
"""Return the id of the file present at path in the target tree."""
581
file_id = self._new_id.get(path)
582
if file_id is not None:
584
old_path = self.old_path(path)
587
if old_path in self.deleted:
589
if getattr(self.base_tree, 'path2id', None) is not None:
590
return self.base_tree.path2id(old_path)
592
return self.base_tree.inventory.path2id(old_path)
594
def id2path(self, file_id):
595
"""Return the new path in the target tree of the file with id file_id"""
596
path = self._new_id_r.get(file_id)
599
old_path = self.base_tree.id2path(file_id)
602
if old_path in self.deleted:
604
return self.new_path(old_path)
606
def old_contents_id(self, file_id):
607
"""Return the id in the base_tree for the given file_id.
608
Return None if the file did not exist in base.
610
if self.contents_by_id:
611
if self.base_tree.has_id(file_id):
615
new_path = self.id2path(file_id)
616
return self.base_tree.path2id(new_path)
618
def get_file(self, file_id):
619
"""Return a file-like object containing the new contents of the
620
file given by file_id.
622
TODO: It might be nice if this actually generated an entry
623
in the text-store, so that the file contents would
626
base_id = self.old_contents_id(file_id)
627
if (base_id is not None and
628
base_id != self.base_tree.inventory.root.file_id):
629
patch_original = self.base_tree.get_file(base_id)
631
patch_original = None
632
file_patch = self.patches.get(self.id2path(file_id))
633
if file_patch is None:
634
if (patch_original is None and
635
self.get_kind(file_id) == 'directory'):
637
if patch_original is None:
638
raise AssertionError("None: %s" % file_id)
639
return patch_original
641
if file_patch.startswith('\\'):
643
'Malformed patch for %s, %r' % (file_id, file_patch))
644
return patched_file(file_patch, patch_original)
646
def get_symlink_target(self, file_id, path=None):
648
path = self.id2path(file_id)
650
return self._targets[path]
652
return self.base_tree.get_symlink_target(file_id)
654
def get_kind(self, file_id):
655
if file_id in self._kinds:
656
return self._kinds[file_id]
657
return self.base_tree.inventory[file_id].kind
659
def is_executable(self, file_id):
660
path = self.id2path(file_id)
661
if path in self._executable:
662
return self._executable[path]
664
return self.base_tree.inventory[file_id].executable
666
def get_last_changed(self, file_id):
667
path = self.id2path(file_id)
668
if path in self._last_changed:
669
return self._last_changed[path]
670
return self.base_tree.get_file_revision(file_id)
672
def get_size_and_sha1(self, file_id):
673
"""Return the size and sha1 hash of the given file id.
674
If the file was not locally modified, this is extracted
675
from the base_tree. Rather than re-reading the file.
677
new_path = self.id2path(file_id)
680
if new_path not in self.patches:
681
# If the entry does not have a patch, then the
682
# contents must be the same as in the base_tree
683
ie = self.base_tree.inventory[file_id]
684
if ie.text_size is None:
685
return ie.text_size, ie.text_sha1
686
return int(ie.text_size), ie.text_sha1
687
fileobj = self.get_file(file_id)
688
content = fileobj.read()
689
return len(content), sha_string(content)
691
def _get_inventory(self):
692
"""Build up the inventory entry for the BundleTree.
694
This need to be called before ever accessing self.inventory
696
from os.path import dirname, basename
697
base_inv = self.base_tree.inventory
698
inv = Inventory(None, self.revision_id)
700
def add_entry(file_id):
701
path = self.id2path(file_id)
707
parent_path = dirname(path)
708
parent_id = self.path2id(parent_path)
710
kind = self.get_kind(file_id)
711
revision_id = self.get_last_changed(file_id)
713
name = basename(path)
714
if kind == 'directory':
715
ie = InventoryDirectory(file_id, name, parent_id)
717
ie = InventoryFile(file_id, name, parent_id)
718
ie.executable = self.is_executable(file_id)
719
elif kind == 'symlink':
720
ie = InventoryLink(file_id, name, parent_id)
721
ie.symlink_target = self.get_symlink_target(file_id, path)
722
ie.revision = revision_id
725
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
726
if ie.text_size is None:
728
'Got a text_size of None for file_id %r' % file_id)
731
sorted_entries = self.sorted_path_id()
732
for path, file_id in sorted_entries:
737
# Have to overload the inherited inventory property
738
# because _get_inventory is only called in the parent.
739
# Reading the docs, property() objects do not use
740
# overloading, they use the function as it was defined
742
inventory = property(_get_inventory)
745
for path, entry in self.inventory.iter_entries():
748
def list_files(self, include_root=False, from_dir=None, recursive=True):
749
# The only files returned by this are those from the version
754
from_dir_id = inv.path2id(from_dir)
755
if from_dir_id is None:
756
# Directory not versioned
758
entries = inv.iter_entries(from_dir=from_dir_id, recursive=recursive)
759
if inv.root is not None and not include_root and from_dir is None:
760
# skip the root for compatability with the current apis.
762
for path, entry in entries:
763
yield path, 'V', entry.kind, entry.file_id, entry
765
def sorted_path_id(self):
767
for result in self._new_id.iteritems():
769
for id in self.base_tree.all_file_ids():
770
path = self.id2path(id)
773
paths.append((path, id))
778
def patched_file(file_patch, original):
779
"""Produce a file-like object with the patched version of a text"""
780
from bzrlib.patches import iter_patched
781
from bzrlib.iterablefile import IterableFile
783
return IterableFile(())
784
# string.splitlines(True) also splits on '\r', but the iter_patched code
785
# only expects to iterate over '\n' style lines
786
return IterableFile(iter_patched(original,
787
StringIO(file_patch).readlines()))