1
# Copyright (C) 2006 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
20
from cStringIO import StringIO
25
from bzrlib.errors import (TestamentMismatch, BzrError,
26
MalformedHeader, MalformedPatches, NotABundle)
27
from bzrlib.inventory import (Inventory, InventoryEntry,
28
InventoryDirectory, InventoryFile,
30
from bzrlib.osutils import sha_file, sha_string, pathjoin
31
from bzrlib.revision import Revision, NULL_REVISION
32
from bzrlib.testament import StrictTestament
33
from bzrlib.trace import mutter, warning
34
import bzrlib.transport
35
from bzrlib.tree import Tree
36
import bzrlib.urlutils
37
from bzrlib.xml5 import serializer_v5
40
class RevisionInfo(object):
41
"""Gets filled out for each revision object that is read.
43
def __init__(self, revision_id):
44
self.revision_id = revision_id
50
self.inventory_sha1 = None
52
self.parent_ids = None
55
self.properties = None
56
self.tree_actions = None
59
return pprint.pformat(self.__dict__)
61
def as_revision(self):
62
rev = Revision(revision_id=self.revision_id,
63
committer=self.committer,
64
timestamp=float(self.timestamp),
65
timezone=int(self.timezone),
66
inventory_sha1=self.inventory_sha1,
67
message='\n'.join(self.message))
70
rev.parent_ids.extend(self.parent_ids)
73
for property in self.properties:
74
key_end = property.find(': ')
75
assert key_end is not None
76
key = property[:key_end].encode('utf-8')
77
value = property[key_end+2:].encode('utf-8')
78
rev.properties[key] = value
83
class BundleInfo(object):
84
"""This contains the meta information. Stuff that allows you to
85
recreate the revision or inventory XML.
92
# A list of RevisionInfo objects
95
# The next entries are created during complete_info() and
96
# other post-read functions.
98
# A list of real Revision objects
99
self.real_revisions = []
101
self.timestamp = None
105
return pprint.pformat(self.__dict__)
107
def complete_info(self):
108
"""This makes sure that all information is properly
109
split up, based on the assumptions that can be made
110
when information is missing.
112
from bzrlib.bundle.serializer import unpack_highres_date
113
# Put in all of the guessable information.
114
if not self.timestamp and self.date:
115
self.timestamp, self.timezone = unpack_highres_date(self.date)
117
self.real_revisions = []
118
for rev in self.revisions:
119
if rev.timestamp is None:
120
if rev.date is not None:
121
rev.timestamp, rev.timezone = \
122
unpack_highres_date(rev.date)
124
rev.timestamp = self.timestamp
125
rev.timezone = self.timezone
126
if rev.message is None and self.message:
127
rev.message = self.message
128
if rev.committer is None and self.committer:
129
rev.committer = self.committer
130
self.real_revisions.append(rev.as_revision())
132
def get_base(self, revision):
133
revision_info = self.get_revision_info(revision.revision_id)
134
if revision_info.base_id is not None:
135
if revision_info.base_id == NULL_REVISION:
138
return revision_info.base_id
139
if len(revision.parent_ids) == 0:
140
# There is no base listed, and
141
# the lowest revision doesn't have a parent
142
# so this is probably against the empty tree
143
# and thus base truly is None
146
return revision.parent_ids[-1]
148
def _get_target(self):
149
"""Return the target revision."""
150
if len(self.real_revisions) > 0:
151
return self.real_revisions[0].revision_id
152
elif len(self.revisions) > 0:
153
return self.revisions[0].revision_id
156
target = property(_get_target, doc='The target revision id')
158
def get_revision(self, revision_id):
159
for r in self.real_revisions:
160
if r.revision_id == revision_id:
162
raise KeyError(revision_id)
164
def get_revision_info(self, revision_id):
165
for r in self.revisions:
166
if r.revision_id == revision_id:
168
raise KeyError(revision_id)
170
def revision_tree(self, repository, revision_id, base=None):
171
revision = self.get_revision(revision_id)
172
base = self.get_base(revision)
173
assert base != revision_id
174
self._validate_references_from_repository(repository)
175
revision_info = self.get_revision_info(revision_id)
176
inventory_revision_id = revision_id
177
bundle_tree = BundleTree(repository.revision_tree(base),
178
inventory_revision_id)
179
self._update_tree(bundle_tree, revision_id)
181
inv = bundle_tree.inventory
182
self._validate_inventory(inv, revision_id)
183
self._validate_revision(inv, revision_id)
187
def _validate_references_from_repository(self, repository):
188
"""Now that we have a repository which should have some of the
189
revisions we care about, go through and validate all of them
194
def add_sha(d, revision_id, sha1):
195
if revision_id is None:
197
raise BzrError('A Null revision should always'
198
'have a null sha1 hash')
201
# This really should have been validated as part
202
# of _validate_revisions but lets do it again
203
if sha1 != d[revision_id]:
204
raise BzrError('** Revision %r referenced with 2 different'
205
' sha hashes %s != %s' % (revision_id,
206
sha1, d[revision_id]))
208
d[revision_id] = sha1
210
# All of the contained revisions were checked
211
# in _validate_revisions
213
for rev_info in self.revisions:
214
checked[rev_info.revision_id] = True
215
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
217
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
218
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
222
for revision_id, sha1 in rev_to_sha.iteritems():
223
if repository.has_revision(revision_id):
224
testament = StrictTestament.from_revision(repository,
226
local_sha1 = self._testament_sha1_from_revision(repository,
228
if sha1 != local_sha1:
229
raise BzrError('sha1 mismatch. For revision id {%s}'
230
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
233
elif revision_id not in checked:
234
missing[revision_id] = sha1
236
for inv_id, sha1 in inv_to_sha.iteritems():
237
if repository.has_revision(inv_id):
238
# Note: branch.get_inventory_sha1() just returns the value that
239
# is stored in the revision text, and that value may be out
240
# of date. This is bogus, because that means we aren't
241
# validating the actual text, just that we wrote and read the
242
# string. But for now, what the hell.
243
local_sha1 = repository.get_inventory_sha1(inv_id)
244
if sha1 != local_sha1:
245
raise BzrError('sha1 mismatch. For inventory id {%s}'
246
'local: %s, bundle: %s' %
247
(inv_id, local_sha1, sha1))
252
# I don't know if this is an error yet
253
warning('Not all revision hashes could be validated.'
254
' Unable validate %d hashes' % len(missing))
255
mutter('Verified %d sha hashes for the bundle.' % count)
257
def _validate_inventory(self, inv, revision_id):
258
"""At this point we should have generated the BundleTree,
259
so build up an inventory, and make sure the hashes match.
262
assert inv is not None
264
# Now we should have a complete inventory entry.
265
s = serializer_v5.write_inventory_to_string(inv)
267
# Target revision is the last entry in the real_revisions list
268
rev = self.get_revision(revision_id)
269
assert rev.revision_id == revision_id
270
if sha1 != rev.inventory_sha1:
271
open(',,bogus-inv', 'wb').write(s)
272
warning('Inventory sha hash mismatch for revision %s. %s'
273
' != %s' % (revision_id, sha1, rev.inventory_sha1))
275
def _validate_revision(self, inventory, revision_id):
276
"""Make sure all revision entries match their checksum."""
278
# This is a mapping from each revision id to it's sha hash
281
rev = self.get_revision(revision_id)
282
rev_info = self.get_revision_info(revision_id)
283
assert rev.revision_id == rev_info.revision_id
284
assert rev.revision_id == revision_id
285
sha1 = self._testament_sha1(rev, inventory)
286
if sha1 != rev_info.sha1:
287
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
288
if rev.revision_id in rev_to_sha1:
289
raise BzrError('Revision {%s} given twice in the list'
291
rev_to_sha1[rev.revision_id] = sha1
293
def _update_tree(self, bundle_tree, revision_id):
294
"""This fills out a BundleTree based on the information
297
:param bundle_tree: A BundleTree to update with the new information.
300
def get_rev_id(last_changed, path, kind):
301
if last_changed is not None:
302
changed_revision_id = last_changed.decode('utf-8')
304
changed_revision_id = revision_id
305
bundle_tree.note_last_changed(path, changed_revision_id)
306
return changed_revision_id
308
def extra_info(info, new_path):
311
for info_item in info:
313
name, value = info_item.split(':', 1)
315
raise 'Value %r has no colon' % info_item
316
if name == 'last-changed':
318
elif name == 'executable':
319
assert value in ('yes', 'no'), value
320
val = (value == 'yes')
321
bundle_tree.note_executable(new_path, val)
322
elif name == 'target':
323
bundle_tree.note_target(new_path, value)
324
elif name == 'encoding':
326
return last_changed, encoding
328
def do_patch(path, lines, encoding):
329
if encoding is not None:
330
assert encoding == 'base64'
331
patch = base64.decodestring(''.join(lines))
333
patch = ''.join(lines)
334
bundle_tree.note_patch(path, patch)
336
def renamed(kind, extra, lines):
337
info = extra.split(' // ')
339
raise BzrError('renamed action lines need both a from and to'
342
if info[1].startswith('=> '):
343
new_path = info[1][3:]
347
bundle_tree.note_rename(old_path, new_path)
348
last_modified, encoding = extra_info(info[2:], new_path)
349
revision = get_rev_id(last_modified, new_path, kind)
351
do_patch(new_path, lines, encoding)
353
def removed(kind, extra, lines):
354
info = extra.split(' // ')
356
# TODO: in the future we might allow file ids to be
357
# given for removed entries
358
raise BzrError('removed action lines should only have the path'
361
bundle_tree.note_deletion(path)
363
def added(kind, extra, lines):
364
info = extra.split(' // ')
366
raise BzrError('add action lines require the path and file id'
369
raise BzrError('add action lines have fewer than 5 entries.'
372
if not info[1].startswith('file-id:'):
373
raise BzrError('The file-id should follow the path for an add'
375
file_id = info[1][8:]
377
bundle_tree.note_id(file_id, path, kind)
378
# this will be overridden in extra_info if executable is specified.
379
bundle_tree.note_executable(path, False)
380
last_changed, encoding = extra_info(info[2:], path)
381
revision = get_rev_id(last_changed, path, kind)
382
if kind == 'directory':
384
do_patch(path, lines, encoding)
386
def modified(kind, extra, lines):
387
info = extra.split(' // ')
389
raise BzrError('modified action lines have at least'
390
'the path in them: %r' % extra)
393
last_modified, encoding = extra_info(info[1:], path)
394
revision = get_rev_id(last_modified, path, kind)
396
do_patch(path, lines, encoding)
404
for action_line, lines in \
405
self.get_revision_info(revision_id).tree_actions:
406
first = action_line.find(' ')
408
raise BzrError('Bogus action line'
409
' (no opening space): %r' % action_line)
410
second = action_line.find(' ', first+1)
412
raise BzrError('Bogus action line'
413
' (missing second space): %r' % action_line)
414
action = action_line[:first]
415
kind = action_line[first+1:second]
416
if kind not in ('file', 'directory', 'symlink'):
417
raise BzrError('Bogus action line'
418
' (invalid object kind %r): %r' % (kind, action_line))
419
extra = action_line[second+1:]
421
if action not in valid_actions:
422
raise BzrError('Bogus action line'
423
' (unrecognized action): %r' % action_line)
424
valid_actions[action](kind, extra, lines)
427
class BundleTree(Tree):
428
def __init__(self, base_tree, revision_id):
429
self.base_tree = base_tree
430
self._renamed = {} # Mapping from old_path => new_path
431
self._renamed_r = {} # new_path => old_path
432
self._new_id = {} # new_path => new_id
433
self._new_id_r = {} # new_id => new_path
434
self._kinds = {} # new_id => kind
435
self._last_changed = {} # new_id => revision_id
436
self._executable = {} # new_id => executable value
438
self._targets = {} # new path => new symlink target
440
self.contents_by_id = True
441
self.revision_id = revision_id
442
self._inventory = None
445
return pprint.pformat(self.__dict__)
447
def note_rename(self, old_path, new_path):
448
"""A file/directory has been renamed from old_path => new_path"""
449
assert new_path not in self._renamed
450
assert old_path not in self._renamed_r
451
self._renamed[new_path] = old_path
452
self._renamed_r[old_path] = new_path
454
def note_id(self, new_id, new_path, kind='file'):
455
"""Files that don't exist in base need a new id."""
456
self._new_id[new_path] = new_id
457
self._new_id_r[new_id] = new_path
458
self._kinds[new_id] = kind
460
def note_last_changed(self, file_id, revision_id):
461
if (file_id in self._last_changed
462
and self._last_changed[file_id] != revision_id):
463
raise BzrError('Mismatched last-changed revision for file_id {%s}'
464
': %s != %s' % (file_id,
465
self._last_changed[file_id],
467
self._last_changed[file_id] = revision_id
469
def note_patch(self, new_path, patch):
470
"""There is a patch for a given filename."""
471
self.patches[new_path] = patch
473
def note_target(self, new_path, target):
474
"""The symlink at the new path has the given target"""
475
self._targets[new_path] = target
477
def note_deletion(self, old_path):
478
"""The file at old_path has been deleted."""
479
self.deleted.append(old_path)
481
def note_executable(self, new_path, executable):
482
self._executable[new_path] = executable
484
def old_path(self, new_path):
485
"""Get the old_path (path in the base_tree) for the file at new_path"""
486
assert new_path[:1] not in ('\\', '/')
487
old_path = self._renamed.get(new_path)
488
if old_path is not None:
490
dirname,basename = os.path.split(new_path)
491
# dirname is not '' doesn't work, because
492
# dirname may be a unicode entry, and is
493
# requires the objects to be identical
495
old_dir = self.old_path(dirname)
499
old_path = pathjoin(old_dir, basename)
502
#If the new path wasn't in renamed, the old one shouldn't be in
504
if old_path in self._renamed_r:
508
def new_path(self, old_path):
509
"""Get the new_path (path in the target_tree) for the file at old_path
512
assert old_path[:1] not in ('\\', '/')
513
new_path = self._renamed_r.get(old_path)
514
if new_path is not None:
516
if new_path in self._renamed:
518
dirname,basename = os.path.split(old_path)
520
new_dir = self.new_path(dirname)
524
new_path = pathjoin(new_dir, basename)
527
#If the old path wasn't in renamed, the new one shouldn't be in
529
if new_path in self._renamed:
533
def path2id(self, path):
534
"""Return the id of the file present at path in the target tree."""
535
file_id = self._new_id.get(path)
536
if file_id is not None:
538
old_path = self.old_path(path)
541
if old_path in self.deleted:
543
if getattr(self.base_tree, 'path2id', None) is not None:
544
return self.base_tree.path2id(old_path)
546
return self.base_tree.inventory.path2id(old_path)
548
def id2path(self, file_id):
549
"""Return the new path in the target tree of the file with id file_id"""
550
path = self._new_id_r.get(file_id)
553
old_path = self.base_tree.id2path(file_id)
556
if old_path in self.deleted:
558
return self.new_path(old_path)
560
def old_contents_id(self, file_id):
561
"""Return the id in the base_tree for the given file_id.
562
Return None if the file did not exist in base.
564
if self.contents_by_id:
565
if self.base_tree.has_id(file_id):
569
new_path = self.id2path(file_id)
570
return self.base_tree.path2id(new_path)
572
def get_file(self, file_id):
573
"""Return a file-like object containing the new contents of the
574
file given by file_id.
576
TODO: It might be nice if this actually generated an entry
577
in the text-store, so that the file contents would
580
base_id = self.old_contents_id(file_id)
581
if (base_id is not None and
582
base_id != self.base_tree.inventory.root.file_id):
583
patch_original = self.base_tree.get_file(base_id)
585
patch_original = None
586
file_patch = self.patches.get(self.id2path(file_id))
587
if file_patch is None:
588
if (patch_original is None and
589
self.get_kind(file_id) == 'directory'):
591
assert patch_original is not None, "None: %s" % file_id
592
return patch_original
594
assert not file_patch.startswith('\\'), \
595
'Malformed patch for %s, %r' % (file_id, file_patch)
596
return patched_file(file_patch, patch_original)
598
def get_symlink_target(self, file_id):
599
new_path = self.id2path(file_id)
601
return self._targets[new_path]
603
return self.base_tree.get_symlink_target(file_id)
605
def get_kind(self, file_id):
606
if file_id in self._kinds:
607
return self._kinds[file_id]
608
return self.base_tree.inventory[file_id].kind
610
def is_executable(self, file_id):
611
path = self.id2path(file_id)
612
if path in self._executable:
613
return self._executable[path]
615
return self.base_tree.inventory[file_id].executable
617
def get_last_changed(self, file_id):
618
path = self.id2path(file_id)
619
if path in self._last_changed:
620
return self._last_changed[path]
621
return self.base_tree.inventory[file_id].revision
623
def get_size_and_sha1(self, file_id):
624
"""Return the size and sha1 hash of the given file id.
625
If the file was not locally modified, this is extracted
626
from the base_tree. Rather than re-reading the file.
628
new_path = self.id2path(file_id)
631
if new_path not in self.patches:
632
# If the entry does not have a patch, then the
633
# contents must be the same as in the base_tree
634
ie = self.base_tree.inventory[file_id]
635
if ie.text_size is None:
636
return ie.text_size, ie.text_sha1
637
return int(ie.text_size), ie.text_sha1
638
fileobj = self.get_file(file_id)
639
content = fileobj.read()
640
return len(content), sha_string(content)
642
def _get_inventory(self):
643
"""Build up the inventory entry for the BundleTree.
645
This need to be called before ever accessing self.inventory
647
from os.path import dirname, basename
649
assert self.base_tree is not None
650
base_inv = self.base_tree.inventory
651
root_id = base_inv.root.file_id
653
# New inventories have a unique root_id
654
inv = Inventory(root_id, self.revision_id)
656
inv = Inventory(revision_id=self.revision_id)
657
inv.root.revision = self.get_last_changed(root_id)
659
def add_entry(file_id):
660
path = self.id2path(file_id)
663
parent_path = dirname(path)
664
if parent_path == u'':
667
parent_id = self.path2id(parent_path)
669
kind = self.get_kind(file_id)
670
revision_id = self.get_last_changed(file_id)
672
name = basename(path)
673
if kind == 'directory':
674
ie = InventoryDirectory(file_id, name, parent_id)
676
ie = InventoryFile(file_id, name, parent_id)
677
ie.executable = self.is_executable(file_id)
678
elif kind == 'symlink':
679
ie = InventoryLink(file_id, name, parent_id)
680
ie.symlink_target = self.get_symlink_target(file_id)
681
ie.revision = revision_id
683
if kind in ('directory', 'symlink'):
684
ie.text_size, ie.text_sha1 = None, None
686
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
687
if (ie.text_size is None) and (kind == 'file'):
688
raise BzrError('Got a text_size of None for file_id %r' % file_id)
691
sorted_entries = self.sorted_path_id()
692
for path, file_id in sorted_entries:
693
if file_id == inv.root.file_id:
699
# Have to overload the inherited inventory property
700
# because _get_inventory is only called in the parent.
701
# Reading the docs, property() objects do not use
702
# overloading, they use the function as it was defined
704
inventory = property(_get_inventory)
707
for path, entry in self.inventory.iter_entries():
710
def sorted_path_id(self):
712
for result in self._new_id.iteritems():
714
for id in self.base_tree:
715
path = self.id2path(id)
718
paths.append((path, id))
723
def patched_file(file_patch, original):
724
"""Produce a file-like object with the patched version of a text"""
725
from bzrlib.patches import iter_patched
726
from bzrlib.iterablefile import IterableFile
728
return IterableFile(())
729
# string.splitlines(True) also splits on '\r', but the iter_patched code
730
# only expects to iterate over '\n' style lines
731
return IterableFile(iter_patched(original,
732
StringIO(file_patch).readlines()))