1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
20
from cStringIO import StringIO
28
from bzrlib.errors import (TestamentMismatch, BzrError,
29
MalformedHeader, MalformedPatches, NotABundle)
30
from bzrlib.inventory import (Inventory, InventoryEntry,
31
InventoryDirectory, InventoryFile,
33
from bzrlib.osutils import sha_file, sha_string, pathjoin
34
from bzrlib.revision import Revision, NULL_REVISION
35
from bzrlib.testament import StrictTestament
36
from bzrlib.trace import mutter, warning
37
import bzrlib.transport
38
from bzrlib.tree import Tree
39
import bzrlib.urlutils
40
from bzrlib.xml5 import serializer_v5
43
class RevisionInfo(object):
44
"""Gets filled out for each revision object that is read.
46
def __init__(self, revision_id):
47
self.revision_id = revision_id
53
self.inventory_sha1 = None
55
self.parent_ids = None
58
self.properties = None
59
self.tree_actions = None
62
return pprint.pformat(self.__dict__)
64
def as_revision(self):
65
rev = Revision(revision_id=self.revision_id,
66
committer=self.committer,
67
timestamp=float(self.timestamp),
68
timezone=int(self.timezone),
69
inventory_sha1=self.inventory_sha1,
70
message='\n'.join(self.message))
73
rev.parent_ids.extend(self.parent_ids)
76
for property in self.properties:
77
key_end = property.find(': ')
78
assert key_end is not None
79
key = property[:key_end].encode('utf-8')
80
value = property[key_end+2:].encode('utf-8')
81
rev.properties[key] = value
86
class BundleInfo(object):
87
"""This contains the meta information. Stuff that allows you to
88
recreate the revision or inventory XML.
95
# A list of RevisionInfo objects
98
# The next entries are created during complete_info() and
99
# other post-read functions.
101
# A list of real Revision objects
102
self.real_revisions = []
104
self.timestamp = None
108
return pprint.pformat(self.__dict__)
110
def complete_info(self):
111
"""This makes sure that all information is properly
112
split up, based on the assumptions that can be made
113
when information is missing.
115
from bzrlib.bundle.serializer import unpack_highres_date
116
# Put in all of the guessable information.
117
if not self.timestamp and self.date:
118
self.timestamp, self.timezone = unpack_highres_date(self.date)
120
self.real_revisions = []
121
for rev in self.revisions:
122
if rev.timestamp is None:
123
if rev.date is not None:
124
rev.timestamp, rev.timezone = \
125
unpack_highres_date(rev.date)
127
rev.timestamp = self.timestamp
128
rev.timezone = self.timezone
129
if rev.message is None and self.message:
130
rev.message = self.message
131
if rev.committer is None and self.committer:
132
rev.committer = self.committer
133
self.real_revisions.append(rev.as_revision())
135
def get_base(self, revision):
136
revision_info = self.get_revision_info(revision.revision_id)
137
if revision_info.base_id is not None:
138
if revision_info.base_id == NULL_REVISION:
141
return revision_info.base_id
142
if len(revision.parent_ids) == 0:
143
# There is no base listed, and
144
# the lowest revision doesn't have a parent
145
# so this is probably against the empty tree
146
# and thus base truly is None
149
return revision.parent_ids[-1]
151
def _get_target(self):
152
"""Return the target revision."""
153
if len(self.real_revisions) > 0:
154
return self.real_revisions[0].revision_id
155
elif len(self.revisions) > 0:
156
return self.revisions[0].revision_id
159
target = property(_get_target, doc='The target revision id')
161
def get_revision(self, revision_id):
162
for r in self.real_revisions:
163
if r.revision_id == revision_id:
165
raise KeyError(revision_id)
167
def get_revision_info(self, revision_id):
168
for r in self.revisions:
169
if r.revision_id == revision_id:
171
raise KeyError(revision_id)
173
def revision_tree(self, repository, revision_id, base=None):
174
revision = self.get_revision(revision_id)
175
base = self.get_base(revision)
176
assert base != revision_id
177
self._validate_references_from_repository(repository)
178
revision_info = self.get_revision_info(revision_id)
179
inventory_revision_id = revision_id
180
bundle_tree = BundleTree(repository.revision_tree(base),
181
inventory_revision_id)
182
self._update_tree(bundle_tree, revision_id)
184
inv = bundle_tree.inventory
185
self._validate_inventory(inv, revision_id)
186
self._validate_revision(inv, revision_id)
190
def _validate_references_from_repository(self, repository):
191
"""Now that we have a repository which should have some of the
192
revisions we care about, go through and validate all of them
197
def add_sha(d, revision_id, sha1):
198
if revision_id is None:
200
raise BzrError('A Null revision should always'
201
'have a null sha1 hash')
204
# This really should have been validated as part
205
# of _validate_revisions but lets do it again
206
if sha1 != d[revision_id]:
207
raise BzrError('** Revision %r referenced with 2 different'
208
' sha hashes %s != %s' % (revision_id,
209
sha1, d[revision_id]))
211
d[revision_id] = sha1
213
# All of the contained revisions were checked
214
# in _validate_revisions
216
for rev_info in self.revisions:
217
checked[rev_info.revision_id] = True
218
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
220
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
221
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
225
for revision_id, sha1 in rev_to_sha.iteritems():
226
if repository.has_revision(revision_id):
227
testament = StrictTestament.from_revision(repository,
229
local_sha1 = self._testament_sha1_from_revision(repository,
231
if sha1 != local_sha1:
232
raise BzrError('sha1 mismatch. For revision id {%s}'
233
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
236
elif revision_id not in checked:
237
missing[revision_id] = sha1
239
for inv_id, sha1 in inv_to_sha.iteritems():
240
if repository.has_revision(inv_id):
241
# Note: branch.get_inventory_sha1() just returns the value that
242
# is stored in the revision text, and that value may be out
243
# of date. This is bogus, because that means we aren't
244
# validating the actual text, just that we wrote and read the
245
# string. But for now, what the hell.
246
local_sha1 = repository.get_inventory_sha1(inv_id)
247
if sha1 != local_sha1:
248
raise BzrError('sha1 mismatch. For inventory id {%s}'
249
'local: %s, bundle: %s' %
250
(inv_id, local_sha1, sha1))
255
# I don't know if this is an error yet
256
warning('Not all revision hashes could be validated.'
257
' Unable validate %d hashes' % len(missing))
258
mutter('Verified %d sha hashes for the bundle.' % count)
260
def _validate_inventory(self, inv, revision_id):
261
"""At this point we should have generated the BundleTree,
262
so build up an inventory, and make sure the hashes match.
265
assert inv is not None
267
# Now we should have a complete inventory entry.
268
s = serializer_v5.write_inventory_to_string(inv)
270
# Target revision is the last entry in the real_revisions list
271
rev = self.get_revision(revision_id)
272
assert rev.revision_id == revision_id
273
if sha1 != rev.inventory_sha1:
274
open(',,bogus-inv', 'wb').write(s)
275
warning('Inventory sha hash mismatch for revision %s. %s'
276
' != %s' % (revision_id, sha1, rev.inventory_sha1))
278
def _validate_revision(self, inventory, revision_id):
279
"""Make sure all revision entries match their checksum."""
281
# This is a mapping from each revision id to it's sha hash
284
rev = self.get_revision(revision_id)
285
rev_info = self.get_revision_info(revision_id)
286
assert rev.revision_id == rev_info.revision_id
287
assert rev.revision_id == revision_id
288
sha1 = self._testament_sha1(rev, inventory)
289
if sha1 != rev_info.sha1:
290
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
291
if rev.revision_id in rev_to_sha1:
292
raise BzrError('Revision {%s} given twice in the list'
294
rev_to_sha1[rev.revision_id] = sha1
296
def _update_tree(self, bundle_tree, revision_id):
297
"""This fills out a BundleTree based on the information
300
:param bundle_tree: A BundleTree to update with the new information.
303
def get_rev_id(last_changed, path, kind):
304
if last_changed is not None:
305
changed_revision_id = osutils.safe_revision_id(last_changed)
307
changed_revision_id = revision_id
308
bundle_tree.note_last_changed(path, changed_revision_id)
309
return changed_revision_id
311
def extra_info(info, new_path):
314
for info_item in info:
316
name, value = info_item.split(':', 1)
318
raise 'Value %r has no colon' % info_item
319
if name == 'last-changed':
321
elif name == 'executable':
322
assert value in ('yes', 'no'), value
323
val = (value == 'yes')
324
bundle_tree.note_executable(new_path, val)
325
elif name == 'target':
326
bundle_tree.note_target(new_path, value)
327
elif name == 'encoding':
329
return last_changed, encoding
331
def do_patch(path, lines, encoding):
332
if encoding is not None:
333
assert encoding == 'base64'
334
patch = base64.decodestring(''.join(lines))
336
patch = ''.join(lines)
337
bundle_tree.note_patch(path, patch)
339
def renamed(kind, extra, lines):
340
info = extra.split(' // ')
342
raise BzrError('renamed action lines need both a from and to'
345
if info[1].startswith('=> '):
346
new_path = info[1][3:]
350
bundle_tree.note_rename(old_path, new_path)
351
last_modified, encoding = extra_info(info[2:], new_path)
352
revision = get_rev_id(last_modified, new_path, kind)
354
do_patch(new_path, lines, encoding)
356
def removed(kind, extra, lines):
357
info = extra.split(' // ')
359
# TODO: in the future we might allow file ids to be
360
# given for removed entries
361
raise BzrError('removed action lines should only have the path'
364
bundle_tree.note_deletion(path)
366
def added(kind, extra, lines):
367
info = extra.split(' // ')
369
raise BzrError('add action lines require the path and file id'
372
raise BzrError('add action lines have fewer than 5 entries.'
375
if not info[1].startswith('file-id:'):
376
raise BzrError('The file-id should follow the path for an add'
378
# This will be Unicode because of how the stream is read. Turn it
379
# back into a utf8 file_id
380
file_id = osutils.safe_file_id(info[1][8:])
382
bundle_tree.note_id(file_id, path, kind)
383
# this will be overridden in extra_info if executable is specified.
384
bundle_tree.note_executable(path, False)
385
last_changed, encoding = extra_info(info[2:], path)
386
revision = get_rev_id(last_changed, path, kind)
387
if kind == 'directory':
389
do_patch(path, lines, encoding)
391
def modified(kind, extra, lines):
392
info = extra.split(' // ')
394
raise BzrError('modified action lines have at least'
395
'the path in them: %r' % extra)
398
last_modified, encoding = extra_info(info[1:], path)
399
revision = get_rev_id(last_modified, path, kind)
401
do_patch(path, lines, encoding)
409
for action_line, lines in \
410
self.get_revision_info(revision_id).tree_actions:
411
first = action_line.find(' ')
413
raise BzrError('Bogus action line'
414
' (no opening space): %r' % action_line)
415
second = action_line.find(' ', first+1)
417
raise BzrError('Bogus action line'
418
' (missing second space): %r' % action_line)
419
action = action_line[:first]
420
kind = action_line[first+1:second]
421
if kind not in ('file', 'directory', 'symlink'):
422
raise BzrError('Bogus action line'
423
' (invalid object kind %r): %r' % (kind, action_line))
424
extra = action_line[second+1:]
426
if action not in valid_actions:
427
raise BzrError('Bogus action line'
428
' (unrecognized action): %r' % action_line)
429
valid_actions[action](kind, extra, lines)
432
class BundleTree(Tree):
433
def __init__(self, base_tree, revision_id):
434
self.base_tree = base_tree
435
self._renamed = {} # Mapping from old_path => new_path
436
self._renamed_r = {} # new_path => old_path
437
self._new_id = {} # new_path => new_id
438
self._new_id_r = {} # new_id => new_path
439
self._kinds = {} # new_id => kind
440
self._last_changed = {} # new_id => revision_id
441
self._executable = {} # new_id => executable value
443
self._targets = {} # new path => new symlink target
445
self.contents_by_id = True
446
self.revision_id = revision_id
447
self._inventory = None
450
return pprint.pformat(self.__dict__)
452
def note_rename(self, old_path, new_path):
453
"""A file/directory has been renamed from old_path => new_path"""
454
assert new_path not in self._renamed
455
assert old_path not in self._renamed_r
456
self._renamed[new_path] = old_path
457
self._renamed_r[old_path] = new_path
459
def note_id(self, new_id, new_path, kind='file'):
460
"""Files that don't exist in base need a new id."""
461
self._new_id[new_path] = new_id
462
self._new_id_r[new_id] = new_path
463
self._kinds[new_id] = kind
465
def note_last_changed(self, file_id, revision_id):
466
if (file_id in self._last_changed
467
and self._last_changed[file_id] != revision_id):
468
raise BzrError('Mismatched last-changed revision for file_id {%s}'
469
': %s != %s' % (file_id,
470
self._last_changed[file_id],
472
self._last_changed[file_id] = revision_id
474
def note_patch(self, new_path, patch):
475
"""There is a patch for a given filename."""
476
self.patches[new_path] = patch
478
def note_target(self, new_path, target):
479
"""The symlink at the new path has the given target"""
480
self._targets[new_path] = target
482
def note_deletion(self, old_path):
483
"""The file at old_path has been deleted."""
484
self.deleted.append(old_path)
486
def note_executable(self, new_path, executable):
487
self._executable[new_path] = executable
489
def old_path(self, new_path):
490
"""Get the old_path (path in the base_tree) for the file at new_path"""
491
assert new_path[:1] not in ('\\', '/')
492
old_path = self._renamed.get(new_path)
493
if old_path is not None:
495
dirname,basename = os.path.split(new_path)
496
# dirname is not '' doesn't work, because
497
# dirname may be a unicode entry, and is
498
# requires the objects to be identical
500
old_dir = self.old_path(dirname)
504
old_path = pathjoin(old_dir, basename)
507
#If the new path wasn't in renamed, the old one shouldn't be in
509
if old_path in self._renamed_r:
513
def new_path(self, old_path):
514
"""Get the new_path (path in the target_tree) for the file at old_path
517
assert old_path[:1] not in ('\\', '/')
518
new_path = self._renamed_r.get(old_path)
519
if new_path is not None:
521
if new_path in self._renamed:
523
dirname,basename = os.path.split(old_path)
525
new_dir = self.new_path(dirname)
529
new_path = pathjoin(new_dir, basename)
532
#If the old path wasn't in renamed, the new one shouldn't be in
534
if new_path in self._renamed:
538
def path2id(self, path):
539
"""Return the id of the file present at path in the target tree."""
540
file_id = self._new_id.get(path)
541
if file_id is not None:
543
old_path = self.old_path(path)
546
if old_path in self.deleted:
548
if getattr(self.base_tree, 'path2id', None) is not None:
549
return self.base_tree.path2id(old_path)
551
return self.base_tree.inventory.path2id(old_path)
553
def id2path(self, file_id):
554
"""Return the new path in the target tree of the file with id file_id"""
555
path = self._new_id_r.get(file_id)
558
old_path = self.base_tree.id2path(file_id)
561
if old_path in self.deleted:
563
return self.new_path(old_path)
565
def old_contents_id(self, file_id):
566
"""Return the id in the base_tree for the given file_id.
567
Return None if the file did not exist in base.
569
if self.contents_by_id:
570
if self.base_tree.has_id(file_id):
574
new_path = self.id2path(file_id)
575
return self.base_tree.path2id(new_path)
577
def get_file(self, file_id):
578
"""Return a file-like object containing the new contents of the
579
file given by file_id.
581
TODO: It might be nice if this actually generated an entry
582
in the text-store, so that the file contents would
585
base_id = self.old_contents_id(file_id)
586
if (base_id is not None and
587
base_id != self.base_tree.inventory.root.file_id):
588
patch_original = self.base_tree.get_file(base_id)
590
patch_original = None
591
file_patch = self.patches.get(self.id2path(file_id))
592
if file_patch is None:
593
if (patch_original is None and
594
self.get_kind(file_id) == 'directory'):
596
assert patch_original is not None, "None: %s" % file_id
597
return patch_original
599
assert not file_patch.startswith('\\'), \
600
'Malformed patch for %s, %r' % (file_id, file_patch)
601
return patched_file(file_patch, patch_original)
603
def get_symlink_target(self, file_id):
604
new_path = self.id2path(file_id)
606
return self._targets[new_path]
608
return self.base_tree.get_symlink_target(file_id)
610
def get_kind(self, file_id):
611
if file_id in self._kinds:
612
return self._kinds[file_id]
613
return self.base_tree.inventory[file_id].kind
615
def is_executable(self, file_id):
616
path = self.id2path(file_id)
617
if path in self._executable:
618
return self._executable[path]
620
return self.base_tree.inventory[file_id].executable
622
def get_last_changed(self, file_id):
623
path = self.id2path(file_id)
624
if path in self._last_changed:
625
return self._last_changed[path]
626
return self.base_tree.inventory[file_id].revision
628
def get_size_and_sha1(self, file_id):
629
"""Return the size and sha1 hash of the given file id.
630
If the file was not locally modified, this is extracted
631
from the base_tree. Rather than re-reading the file.
633
new_path = self.id2path(file_id)
636
if new_path not in self.patches:
637
# If the entry does not have a patch, then the
638
# contents must be the same as in the base_tree
639
ie = self.base_tree.inventory[file_id]
640
if ie.text_size is None:
641
return ie.text_size, ie.text_sha1
642
return int(ie.text_size), ie.text_sha1
643
fileobj = self.get_file(file_id)
644
content = fileobj.read()
645
return len(content), sha_string(content)
647
def _get_inventory(self):
648
"""Build up the inventory entry for the BundleTree.
650
This need to be called before ever accessing self.inventory
652
from os.path import dirname, basename
654
assert self.base_tree is not None
655
base_inv = self.base_tree.inventory
656
inv = Inventory(None, self.revision_id)
658
def add_entry(file_id):
659
path = self.id2path(file_id)
665
parent_path = dirname(path)
666
parent_id = self.path2id(parent_path)
668
kind = self.get_kind(file_id)
669
revision_id = self.get_last_changed(file_id)
671
name = basename(path)
672
if kind == 'directory':
673
ie = InventoryDirectory(file_id, name, parent_id)
675
ie = InventoryFile(file_id, name, parent_id)
676
ie.executable = self.is_executable(file_id)
677
elif kind == 'symlink':
678
ie = InventoryLink(file_id, name, parent_id)
679
ie.symlink_target = self.get_symlink_target(file_id)
680
ie.revision = revision_id
682
if kind in ('directory', 'symlink'):
683
ie.text_size, ie.text_sha1 = None, None
685
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
686
if (ie.text_size is None) and (kind == 'file'):
687
raise BzrError('Got a text_size of None for file_id %r' % file_id)
690
sorted_entries = self.sorted_path_id()
691
for path, file_id in sorted_entries:
696
# Have to overload the inherited inventory property
697
# because _get_inventory is only called in the parent.
698
# Reading the docs, property() objects do not use
699
# overloading, they use the function as it was defined
701
inventory = property(_get_inventory)
704
for path, entry in self.inventory.iter_entries():
707
def sorted_path_id(self):
709
for result in self._new_id.iteritems():
711
for id in self.base_tree:
712
path = self.id2path(id)
715
paths.append((path, id))
720
def patched_file(file_patch, original):
721
"""Produce a file-like object with the patched version of a text"""
722
from bzrlib.patches import iter_patched
723
from bzrlib.iterablefile import IterableFile
725
return IterableFile(())
726
# string.splitlines(True) also splits on '\r', but the iter_patched code
727
# only expects to iterate over '\n' style lines
728
return IterableFile(iter_patched(original,
729
StringIO(file_patch).readlines()))