1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
20
from cStringIO import StringIO
29
from bzrlib.bundle import apply_bundle
30
from bzrlib.errors import (TestamentMismatch, BzrError,
31
MalformedHeader, MalformedPatches, NotABundle)
32
from bzrlib.inventory import (Inventory, InventoryEntry,
33
InventoryDirectory, InventoryFile,
35
from bzrlib.osutils import sha_file, sha_string, pathjoin
36
from bzrlib.revision import Revision, NULL_REVISION
37
from bzrlib.testament import StrictTestament
38
from bzrlib.trace import mutter, warning
39
import bzrlib.transport
40
from bzrlib.tree import Tree
41
import bzrlib.urlutils
42
from bzrlib.xml5 import serializer_v5
45
class RevisionInfo(object):
46
"""Gets filled out for each revision object that is read.
48
def __init__(self, revision_id):
49
self.revision_id = revision_id
55
self.inventory_sha1 = None
57
self.parent_ids = None
60
self.properties = None
61
self.tree_actions = None
64
return pprint.pformat(self.__dict__)
66
def as_revision(self):
67
rev = Revision(revision_id=self.revision_id,
68
committer=self.committer,
69
timestamp=float(self.timestamp),
70
timezone=int(self.timezone),
71
inventory_sha1=self.inventory_sha1,
72
message='\n'.join(self.message))
75
rev.parent_ids.extend(self.parent_ids)
78
for property in self.properties:
79
key_end = property.find(': ')
81
assert property.endswith(':')
82
key = str(property[:-1])
85
key = str(property[:key_end])
86
value = property[key_end+2:]
87
rev.properties[key] = value
92
def from_revision(revision):
93
revision_info = RevisionInfo(revision.revision_id)
94
date = timestamp.format_highres_date(revision.timestamp,
96
revision_info.date = date
97
revision_info.timezone = revision.timezone
98
revision_info.timestamp = revision.timestamp
99
revision_info.message = revision.message.split('\n')
100
revision_info.properties = [': '.join(p) for p in
101
revision.properties.iteritems()]
105
class BundleInfo(object):
106
"""This contains the meta information. Stuff that allows you to
107
recreate the revision or inventory XML.
109
def __init__(self, bundle_format=None):
110
self.bundle_format = None
111
self.committer = None
115
# A list of RevisionInfo objects
118
# The next entries are created during complete_info() and
119
# other post-read functions.
121
# A list of real Revision objects
122
self.real_revisions = []
124
self.timestamp = None
127
# Have we checked the repository yet?
128
self._validated_revisions_against_repo = False
131
return pprint.pformat(self.__dict__)
133
def complete_info(self):
134
"""This makes sure that all information is properly
135
split up, based on the assumptions that can be made
136
when information is missing.
138
from bzrlib.timestamp import unpack_highres_date
139
# Put in all of the guessable information.
140
if not self.timestamp and self.date:
141
self.timestamp, self.timezone = unpack_highres_date(self.date)
143
self.real_revisions = []
144
for rev in self.revisions:
145
if rev.timestamp is None:
146
if rev.date is not None:
147
rev.timestamp, rev.timezone = \
148
unpack_highres_date(rev.date)
150
rev.timestamp = self.timestamp
151
rev.timezone = self.timezone
152
if rev.message is None and self.message:
153
rev.message = self.message
154
if rev.committer is None and self.committer:
155
rev.committer = self.committer
156
self.real_revisions.append(rev.as_revision())
158
def get_base(self, revision):
159
revision_info = self.get_revision_info(revision.revision_id)
160
if revision_info.base_id is not None:
161
if revision_info.base_id == NULL_REVISION:
164
return revision_info.base_id
165
if len(revision.parent_ids) == 0:
166
# There is no base listed, and
167
# the lowest revision doesn't have a parent
168
# so this is probably against the empty tree
169
# and thus base truly is None
172
return revision.parent_ids[-1]
174
def _get_target(self):
175
"""Return the target revision."""
176
if len(self.real_revisions) > 0:
177
return self.real_revisions[0].revision_id
178
elif len(self.revisions) > 0:
179
return self.revisions[0].revision_id
182
target = property(_get_target, doc='The target revision id')
184
def get_revision(self, revision_id):
185
for r in self.real_revisions:
186
if r.revision_id == revision_id:
188
raise KeyError(revision_id)
190
def get_revision_info(self, revision_id):
191
for r in self.revisions:
192
if r.revision_id == revision_id:
194
raise KeyError(revision_id)
196
def revision_tree(self, repository, revision_id, base=None):
197
revision_id = osutils.safe_revision_id(revision_id)
198
revision = self.get_revision(revision_id)
199
base = self.get_base(revision)
200
assert base != revision_id
201
if not self._validated_revisions_against_repo:
202
self._validate_references_from_repository(repository)
203
revision_info = self.get_revision_info(revision_id)
204
inventory_revision_id = revision_id
205
bundle_tree = BundleTree(repository.revision_tree(base),
206
inventory_revision_id)
207
self._update_tree(bundle_tree, revision_id)
209
inv = bundle_tree.inventory
210
self._validate_inventory(inv, revision_id)
211
self._validate_revision(inv, revision_id)
215
def _validate_references_from_repository(self, repository):
216
"""Now that we have a repository which should have some of the
217
revisions we care about, go through and validate all of them
222
def add_sha(d, revision_id, sha1):
223
if revision_id is None:
225
raise BzrError('A Null revision should always'
226
'have a null sha1 hash')
229
# This really should have been validated as part
230
# of _validate_revisions but lets do it again
231
if sha1 != d[revision_id]:
232
raise BzrError('** Revision %r referenced with 2 different'
233
' sha hashes %s != %s' % (revision_id,
234
sha1, d[revision_id]))
236
d[revision_id] = sha1
238
# All of the contained revisions were checked
239
# in _validate_revisions
241
for rev_info in self.revisions:
242
checked[rev_info.revision_id] = True
243
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
245
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
246
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
250
for revision_id, sha1 in rev_to_sha.iteritems():
251
if repository.has_revision(revision_id):
252
testament = StrictTestament.from_revision(repository,
254
local_sha1 = self._testament_sha1_from_revision(repository,
256
if sha1 != local_sha1:
257
raise BzrError('sha1 mismatch. For revision id {%s}'
258
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
261
elif revision_id not in checked:
262
missing[revision_id] = sha1
264
for inv_id, sha1 in inv_to_sha.iteritems():
265
if repository.has_revision(inv_id):
266
# Note: branch.get_inventory_sha1() just returns the value that
267
# is stored in the revision text, and that value may be out
268
# of date. This is bogus, because that means we aren't
269
# validating the actual text, just that we wrote and read the
270
# string. But for now, what the hell.
271
local_sha1 = repository.get_inventory_sha1(inv_id)
272
if sha1 != local_sha1:
273
raise BzrError('sha1 mismatch. For inventory id {%s}'
274
'local: %s, bundle: %s' %
275
(inv_id, local_sha1, sha1))
280
# I don't know if this is an error yet
281
warning('Not all revision hashes could be validated.'
282
' Unable validate %d hashes' % len(missing))
283
mutter('Verified %d sha hashes for the bundle.' % count)
284
self._validated_revisions_against_repo = True
286
def _validate_inventory(self, inv, revision_id):
287
"""At this point we should have generated the BundleTree,
288
so build up an inventory, and make sure the hashes match.
291
assert inv is not None
293
# Now we should have a complete inventory entry.
294
s = serializer_v5.write_inventory_to_string(inv)
296
# Target revision is the last entry in the real_revisions list
297
rev = self.get_revision(revision_id)
298
assert rev.revision_id == revision_id
299
if sha1 != rev.inventory_sha1:
300
open(',,bogus-inv', 'wb').write(s)
301
warning('Inventory sha hash mismatch for revision %s. %s'
302
' != %s' % (revision_id, sha1, rev.inventory_sha1))
304
def _validate_revision(self, inventory, revision_id):
305
"""Make sure all revision entries match their checksum."""
307
# This is a mapping from each revision id to it's sha hash
310
rev = self.get_revision(revision_id)
311
rev_info = self.get_revision_info(revision_id)
312
assert rev.revision_id == rev_info.revision_id
313
assert rev.revision_id == revision_id
314
sha1 = self._testament_sha1(rev, inventory)
315
if sha1 != rev_info.sha1:
316
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
317
if rev.revision_id in rev_to_sha1:
318
raise BzrError('Revision {%s} given twice in the list'
320
rev_to_sha1[rev.revision_id] = sha1
322
def _update_tree(self, bundle_tree, revision_id):
323
"""This fills out a BundleTree based on the information
326
:param bundle_tree: A BundleTree to update with the new information.
329
def get_rev_id(last_changed, path, kind):
330
if last_changed is not None:
331
# last_changed will be a Unicode string because of how it was
332
# read. Convert it back to utf8.
333
changed_revision_id = osutils.safe_revision_id(last_changed,
336
changed_revision_id = revision_id
337
bundle_tree.note_last_changed(path, changed_revision_id)
338
return changed_revision_id
340
def extra_info(info, new_path):
343
for info_item in info:
345
name, value = info_item.split(':', 1)
347
raise 'Value %r has no colon' % info_item
348
if name == 'last-changed':
350
elif name == 'executable':
351
assert value in ('yes', 'no'), value
352
val = (value == 'yes')
353
bundle_tree.note_executable(new_path, val)
354
elif name == 'target':
355
bundle_tree.note_target(new_path, value)
356
elif name == 'encoding':
358
return last_changed, encoding
360
def do_patch(path, lines, encoding):
361
if encoding is not None:
362
assert encoding == 'base64'
363
patch = base64.decodestring(''.join(lines))
365
patch = ''.join(lines)
366
bundle_tree.note_patch(path, patch)
368
def renamed(kind, extra, lines):
369
info = extra.split(' // ')
371
raise BzrError('renamed action lines need both a from and to'
374
if info[1].startswith('=> '):
375
new_path = info[1][3:]
379
bundle_tree.note_rename(old_path, new_path)
380
last_modified, encoding = extra_info(info[2:], new_path)
381
revision = get_rev_id(last_modified, new_path, kind)
383
do_patch(new_path, lines, encoding)
385
def removed(kind, extra, lines):
386
info = extra.split(' // ')
388
# TODO: in the future we might allow file ids to be
389
# given for removed entries
390
raise BzrError('removed action lines should only have the path'
393
bundle_tree.note_deletion(path)
395
def added(kind, extra, lines):
396
info = extra.split(' // ')
398
raise BzrError('add action lines require the path and file id'
401
raise BzrError('add action lines have fewer than 5 entries.'
404
if not info[1].startswith('file-id:'):
405
raise BzrError('The file-id should follow the path for an add'
407
# This will be Unicode because of how the stream is read. Turn it
408
# back into a utf8 file_id
409
file_id = osutils.safe_file_id(info[1][8:], warn=False)
411
bundle_tree.note_id(file_id, path, kind)
412
# this will be overridden in extra_info if executable is specified.
413
bundle_tree.note_executable(path, False)
414
last_changed, encoding = extra_info(info[2:], path)
415
revision = get_rev_id(last_changed, path, kind)
416
if kind == 'directory':
418
do_patch(path, lines, encoding)
420
def modified(kind, extra, lines):
421
info = extra.split(' // ')
423
raise BzrError('modified action lines have at least'
424
'the path in them: %r' % extra)
427
last_modified, encoding = extra_info(info[1:], path)
428
revision = get_rev_id(last_modified, path, kind)
430
do_patch(path, lines, encoding)
438
for action_line, lines in \
439
self.get_revision_info(revision_id).tree_actions:
440
first = action_line.find(' ')
442
raise BzrError('Bogus action line'
443
' (no opening space): %r' % action_line)
444
second = action_line.find(' ', first+1)
446
raise BzrError('Bogus action line'
447
' (missing second space): %r' % action_line)
448
action = action_line[:first]
449
kind = action_line[first+1:second]
450
if kind not in ('file', 'directory', 'symlink'):
451
raise BzrError('Bogus action line'
452
' (invalid object kind %r): %r' % (kind, action_line))
453
extra = action_line[second+1:]
455
if action not in valid_actions:
456
raise BzrError('Bogus action line'
457
' (unrecognized action): %r' % action_line)
458
valid_actions[action](kind, extra, lines)
460
def install_revisions(self, target_repo, stream_input=True):
461
"""Install revisions and return the target revision
463
:param target_repo: The repository to install into
464
:param stream_input: Ignored by this implementation.
466
apply_bundle.install_bundle(target_repo, self)
469
def get_merge_request(self, target_repo):
470
"""Provide data for performing a merge
472
Returns suggested base, suggested target, and patch verification status
474
return None, self.target, 'inapplicable'
477
class BundleTree(Tree):
478
def __init__(self, base_tree, revision_id):
479
self.base_tree = base_tree
480
self._renamed = {} # Mapping from old_path => new_path
481
self._renamed_r = {} # new_path => old_path
482
self._new_id = {} # new_path => new_id
483
self._new_id_r = {} # new_id => new_path
484
self._kinds = {} # new_id => kind
485
self._last_changed = {} # new_id => revision_id
486
self._executable = {} # new_id => executable value
488
self._targets = {} # new path => new symlink target
490
self.contents_by_id = True
491
self.revision_id = revision_id
492
self._inventory = None
495
return pprint.pformat(self.__dict__)
497
def note_rename(self, old_path, new_path):
498
"""A file/directory has been renamed from old_path => new_path"""
499
assert new_path not in self._renamed
500
assert old_path not in self._renamed_r
501
self._renamed[new_path] = old_path
502
self._renamed_r[old_path] = new_path
504
def note_id(self, new_id, new_path, kind='file'):
505
"""Files that don't exist in base need a new id."""
506
self._new_id[new_path] = new_id
507
self._new_id_r[new_id] = new_path
508
self._kinds[new_id] = kind
510
def note_last_changed(self, file_id, revision_id):
511
if (file_id in self._last_changed
512
and self._last_changed[file_id] != revision_id):
513
raise BzrError('Mismatched last-changed revision for file_id {%s}'
514
': %s != %s' % (file_id,
515
self._last_changed[file_id],
517
self._last_changed[file_id] = revision_id
519
def note_patch(self, new_path, patch):
520
"""There is a patch for a given filename."""
521
self.patches[new_path] = patch
523
def note_target(self, new_path, target):
524
"""The symlink at the new path has the given target"""
525
self._targets[new_path] = target
527
def note_deletion(self, old_path):
528
"""The file at old_path has been deleted."""
529
self.deleted.append(old_path)
531
def note_executable(self, new_path, executable):
532
self._executable[new_path] = executable
534
def old_path(self, new_path):
535
"""Get the old_path (path in the base_tree) for the file at new_path"""
536
assert new_path[:1] not in ('\\', '/')
537
old_path = self._renamed.get(new_path)
538
if old_path is not None:
540
dirname,basename = os.path.split(new_path)
541
# dirname is not '' doesn't work, because
542
# dirname may be a unicode entry, and is
543
# requires the objects to be identical
545
old_dir = self.old_path(dirname)
549
old_path = pathjoin(old_dir, basename)
552
#If the new path wasn't in renamed, the old one shouldn't be in
554
if old_path in self._renamed_r:
558
def new_path(self, old_path):
559
"""Get the new_path (path in the target_tree) for the file at old_path
562
assert old_path[:1] not in ('\\', '/')
563
new_path = self._renamed_r.get(old_path)
564
if new_path is not None:
566
if new_path in self._renamed:
568
dirname,basename = os.path.split(old_path)
570
new_dir = self.new_path(dirname)
574
new_path = pathjoin(new_dir, basename)
577
#If the old path wasn't in renamed, the new one shouldn't be in
579
if new_path in self._renamed:
583
def path2id(self, path):
584
"""Return the id of the file present at path in the target tree."""
585
file_id = self._new_id.get(path)
586
if file_id is not None:
588
old_path = self.old_path(path)
591
if old_path in self.deleted:
593
if getattr(self.base_tree, 'path2id', None) is not None:
594
return self.base_tree.path2id(old_path)
596
return self.base_tree.inventory.path2id(old_path)
598
def id2path(self, file_id):
599
"""Return the new path in the target tree of the file with id file_id"""
600
path = self._new_id_r.get(file_id)
603
old_path = self.base_tree.id2path(file_id)
606
if old_path in self.deleted:
608
return self.new_path(old_path)
610
def old_contents_id(self, file_id):
611
"""Return the id in the base_tree for the given file_id.
612
Return None if the file did not exist in base.
614
if self.contents_by_id:
615
if self.base_tree.has_id(file_id):
619
new_path = self.id2path(file_id)
620
return self.base_tree.path2id(new_path)
622
def get_file(self, file_id):
623
"""Return a file-like object containing the new contents of the
624
file given by file_id.
626
TODO: It might be nice if this actually generated an entry
627
in the text-store, so that the file contents would
630
base_id = self.old_contents_id(file_id)
631
if (base_id is not None and
632
base_id != self.base_tree.inventory.root.file_id):
633
patch_original = self.base_tree.get_file(base_id)
635
patch_original = None
636
file_patch = self.patches.get(self.id2path(file_id))
637
if file_patch is None:
638
if (patch_original is None and
639
self.get_kind(file_id) == 'directory'):
641
assert patch_original is not None, "None: %s" % file_id
642
return patch_original
644
assert not file_patch.startswith('\\'), \
645
'Malformed patch for %s, %r' % (file_id, file_patch)
646
return patched_file(file_patch, patch_original)
648
def get_symlink_target(self, file_id):
649
new_path = self.id2path(file_id)
651
return self._targets[new_path]
653
return self.base_tree.get_symlink_target(file_id)
655
def get_kind(self, file_id):
656
if file_id in self._kinds:
657
return self._kinds[file_id]
658
return self.base_tree.inventory[file_id].kind
660
def is_executable(self, file_id):
661
path = self.id2path(file_id)
662
if path in self._executable:
663
return self._executable[path]
665
return self.base_tree.inventory[file_id].executable
667
def get_last_changed(self, file_id):
668
path = self.id2path(file_id)
669
if path in self._last_changed:
670
return self._last_changed[path]
671
return self.base_tree.inventory[file_id].revision
673
def get_size_and_sha1(self, file_id):
674
"""Return the size and sha1 hash of the given file id.
675
If the file was not locally modified, this is extracted
676
from the base_tree. Rather than re-reading the file.
678
new_path = self.id2path(file_id)
681
if new_path not in self.patches:
682
# If the entry does not have a patch, then the
683
# contents must be the same as in the base_tree
684
ie = self.base_tree.inventory[file_id]
685
if ie.text_size is None:
686
return ie.text_size, ie.text_sha1
687
return int(ie.text_size), ie.text_sha1
688
fileobj = self.get_file(file_id)
689
content = fileobj.read()
690
return len(content), sha_string(content)
692
def _get_inventory(self):
693
"""Build up the inventory entry for the BundleTree.
695
This need to be called before ever accessing self.inventory
697
from os.path import dirname, basename
699
assert self.base_tree is not None
700
base_inv = self.base_tree.inventory
701
inv = Inventory(None, self.revision_id)
703
def add_entry(file_id):
704
path = self.id2path(file_id)
710
parent_path = dirname(path)
711
parent_id = self.path2id(parent_path)
713
kind = self.get_kind(file_id)
714
revision_id = self.get_last_changed(file_id)
716
name = basename(path)
717
if kind == 'directory':
718
ie = InventoryDirectory(file_id, name, parent_id)
720
ie = InventoryFile(file_id, name, parent_id)
721
ie.executable = self.is_executable(file_id)
722
elif kind == 'symlink':
723
ie = InventoryLink(file_id, name, parent_id)
724
ie.symlink_target = self.get_symlink_target(file_id)
725
ie.revision = revision_id
727
if kind in ('directory', 'symlink'):
728
ie.text_size, ie.text_sha1 = None, None
730
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
731
if (ie.text_size is None) and (kind == 'file'):
732
raise BzrError('Got a text_size of None for file_id %r' % file_id)
735
sorted_entries = self.sorted_path_id()
736
for path, file_id in sorted_entries:
741
# Have to overload the inherited inventory property
742
# because _get_inventory is only called in the parent.
743
# Reading the docs, property() objects do not use
744
# overloading, they use the function as it was defined
746
inventory = property(_get_inventory)
749
for path, entry in self.inventory.iter_entries():
752
def sorted_path_id(self):
754
for result in self._new_id.iteritems():
756
for id in self.base_tree:
757
path = self.id2path(id)
760
paths.append((path, id))
765
def patched_file(file_patch, original):
766
"""Produce a file-like object with the patched version of a text"""
767
from bzrlib.patches import iter_patched
768
from bzrlib.iterablefile import IterableFile
770
return IterableFile(())
771
# string.splitlines(True) also splits on '\r', but the iter_patched code
772
# only expects to iterate over '\n' style lines
773
return IterableFile(iter_patched(original,
774
StringIO(file_patch).readlines()))