1
# Copyright (C) 2006 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
20
from cStringIO import StringIO
25
from bzrlib.errors import (TestamentMismatch, BzrError,
26
MalformedHeader, MalformedPatches, NotABundle)
27
from bzrlib.inventory import (Inventory, InventoryEntry,
28
InventoryDirectory, InventoryFile,
30
from bzrlib.osutils import sha_file, sha_string, pathjoin
31
from bzrlib.revision import Revision, NULL_REVISION
32
from bzrlib.testament import StrictTestament
33
from bzrlib.trace import mutter, warning
34
import bzrlib.transport
35
from bzrlib.tree import Tree
36
import bzrlib.urlutils
37
from bzrlib.xml5 import serializer_v5
40
class RevisionInfo(object):
41
"""Gets filled out for each revision object that is read.
43
def __init__(self, revision_id):
44
self.revision_id = revision_id
50
self.inventory_sha1 = None
52
self.parent_ids = None
55
self.properties = None
56
self.tree_actions = None
59
return pprint.pformat(self.__dict__)
61
def as_revision(self):
62
rev = Revision(revision_id=self.revision_id,
63
committer=self.committer,
64
timestamp=float(self.timestamp),
65
timezone=int(self.timezone),
66
inventory_sha1=self.inventory_sha1,
67
message='\n'.join(self.message))
70
rev.parent_ids.extend(self.parent_ids)
73
for property in self.properties:
74
key_end = property.find(': ')
75
assert key_end is not None
76
key = property[:key_end].encode('utf-8')
77
value = property[key_end+2:].encode('utf-8')
78
rev.properties[key] = value
83
class BundleInfo(object):
84
"""This contains the meta information. Stuff that allows you to
85
recreate the revision or inventory XML.
92
# A list of RevisionInfo objects
95
# The next entries are created during complete_info() and
96
# other post-read functions.
98
# A list of real Revision objects
99
self.real_revisions = []
101
self.timestamp = None
105
return pprint.pformat(self.__dict__)
107
def complete_info(self):
108
"""This makes sure that all information is properly
109
split up, based on the assumptions that can be made
110
when information is missing.
112
from bzrlib.bundle.serializer import unpack_highres_date
113
# Put in all of the guessable information.
114
if not self.timestamp and self.date:
115
self.timestamp, self.timezone = unpack_highres_date(self.date)
117
self.real_revisions = []
118
for rev in self.revisions:
119
if rev.timestamp is None:
120
if rev.date is not None:
121
rev.timestamp, rev.timezone = \
122
unpack_highres_date(rev.date)
124
rev.timestamp = self.timestamp
125
rev.timezone = self.timezone
126
if rev.message is None and self.message:
127
rev.message = self.message
128
if rev.committer is None and self.committer:
129
rev.committer = self.committer
130
self.real_revisions.append(rev.as_revision())
132
def get_base(self, revision):
133
revision_info = self.get_revision_info(revision.revision_id)
134
if revision_info.base_id is not None:
135
if revision_info.base_id == NULL_REVISION:
138
return revision_info.base_id
139
if len(revision.parent_ids) == 0:
140
# There is no base listed, and
141
# the lowest revision doesn't have a parent
142
# so this is probably against the empty tree
143
# and thus base truly is None
146
return revision.parent_ids[-1]
148
def _get_target(self):
149
"""Return the target revision."""
150
if len(self.real_revisions) > 0:
151
return self.real_revisions[0].revision_id
152
elif len(self.revisions) > 0:
153
return self.revisions[0].revision_id
156
target = property(_get_target, doc='The target revision id')
158
def get_revision(self, revision_id):
159
for r in self.real_revisions:
160
if r.revision_id == revision_id:
162
raise KeyError(revision_id)
164
def get_revision_info(self, revision_id):
165
for r in self.revisions:
166
if r.revision_id == revision_id:
168
raise KeyError(revision_id)
170
def revision_tree(self, repository, revision_id, base=None):
171
revision = self.get_revision(revision_id)
172
base = self.get_base(revision)
173
assert base != revision_id
174
self._validate_references_from_repository(repository)
175
revision_info = self.get_revision_info(revision_id)
176
inventory_revision_id = revision_id
177
bundle_tree = BundleTree(repository.revision_tree(base),
178
inventory_revision_id)
179
self._update_tree(bundle_tree, revision_id)
181
inv = bundle_tree.inventory
182
self._validate_inventory(inv, revision_id)
183
self._validate_revision(inv, revision_id)
187
def _validate_references_from_repository(self, repository):
188
"""Now that we have a repository which should have some of the
189
revisions we care about, go through and validate all of them
194
def add_sha(d, revision_id, sha1):
195
if revision_id is None:
197
raise BzrError('A Null revision should always'
198
'have a null sha1 hash')
201
# This really should have been validated as part
202
# of _validate_revisions but lets do it again
203
if sha1 != d[revision_id]:
204
raise BzrError('** Revision %r referenced with 2 different'
205
' sha hashes %s != %s' % (revision_id,
206
sha1, d[revision_id]))
208
d[revision_id] = sha1
210
# All of the contained revisions were checked
211
# in _validate_revisions
213
for rev_info in self.revisions:
214
checked[rev_info.revision_id] = True
215
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
217
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
218
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
222
for revision_id, sha1 in rev_to_sha.iteritems():
223
if repository.has_revision(revision_id):
224
testament = StrictTestament.from_revision(repository,
226
local_sha1 = testament.as_sha1()
227
if sha1 != local_sha1:
228
raise BzrError('sha1 mismatch. For revision id {%s}'
229
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
232
elif revision_id not in checked:
233
missing[revision_id] = sha1
235
for inv_id, sha1 in inv_to_sha.iteritems():
236
if repository.has_revision(inv_id):
237
# Note: branch.get_inventory_sha1() just returns the value that
238
# is stored in the revision text, and that value may be out
239
# of date. This is bogus, because that means we aren't
240
# validating the actual text, just that we wrote and read the
241
# string. But for now, what the hell.
242
local_sha1 = repository.get_inventory_sha1(inv_id)
243
if sha1 != local_sha1:
244
raise BzrError('sha1 mismatch. For inventory id {%s}'
245
'local: %s, bundle: %s' %
246
(inv_id, local_sha1, sha1))
251
# I don't know if this is an error yet
252
warning('Not all revision hashes could be validated.'
253
' Unable validate %d hashes' % len(missing))
254
mutter('Verified %d sha hashes for the bundle.' % count)
256
def _validate_inventory(self, inv, revision_id):
257
"""At this point we should have generated the BundleTree,
258
so build up an inventory, and make sure the hashes match.
261
assert inv is not None
263
# Now we should have a complete inventory entry.
264
s = serializer_v5.write_inventory_to_string(inv)
266
# Target revision is the last entry in the real_revisions list
267
rev = self.get_revision(revision_id)
268
assert rev.revision_id == revision_id
269
if sha1 != rev.inventory_sha1:
270
open(',,bogus-inv', 'wb').write(s)
271
warning('Inventory sha hash mismatch for revision %s. %s'
272
' != %s' % (revision_id, sha1, rev.inventory_sha1))
274
def _validate_revision(self, inventory, revision_id):
275
"""Make sure all revision entries match their checksum."""
277
# This is a mapping from each revision id to it's sha hash
280
rev = self.get_revision(revision_id)
281
rev_info = self.get_revision_info(revision_id)
282
assert rev.revision_id == rev_info.revision_id
283
assert rev.revision_id == revision_id
284
sha1 = StrictTestament(rev, inventory).as_sha1()
285
if sha1 != rev_info.sha1:
286
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
287
if rev_to_sha1.has_key(rev.revision_id):
288
raise BzrError('Revision {%s} given twice in the list'
290
rev_to_sha1[rev.revision_id] = sha1
292
def _update_tree(self, bundle_tree, revision_id):
293
"""This fills out a BundleTree based on the information
296
:param bundle_tree: A BundleTree to update with the new information.
299
def get_rev_id(last_changed, path, kind):
300
if last_changed is not None:
301
changed_revision_id = last_changed.decode('utf-8')
303
changed_revision_id = revision_id
304
bundle_tree.note_last_changed(path, changed_revision_id)
305
return changed_revision_id
307
def extra_info(info, new_path):
310
for info_item in info:
312
name, value = info_item.split(':', 1)
314
raise 'Value %r has no colon' % info_item
315
if name == 'last-changed':
317
elif name == 'executable':
318
assert value in ('yes', 'no'), value
319
val = (value == 'yes')
320
bundle_tree.note_executable(new_path, val)
321
elif name == 'target':
322
bundle_tree.note_target(new_path, value)
323
elif name == 'encoding':
325
return last_changed, encoding
327
def do_patch(path, lines, encoding):
328
if encoding is not None:
329
assert encoding == 'base64'
330
patch = base64.decodestring(''.join(lines))
332
patch = ''.join(lines)
333
bundle_tree.note_patch(path, patch)
335
def renamed(kind, extra, lines):
336
info = extra.split(' // ')
338
raise BzrError('renamed action lines need both a from and to'
341
if info[1].startswith('=> '):
342
new_path = info[1][3:]
346
bundle_tree.note_rename(old_path, new_path)
347
last_modified, encoding = extra_info(info[2:], new_path)
348
revision = get_rev_id(last_modified, new_path, kind)
350
do_patch(new_path, lines, encoding)
352
def removed(kind, extra, lines):
353
info = extra.split(' // ')
355
# TODO: in the future we might allow file ids to be
356
# given for removed entries
357
raise BzrError('removed action lines should only have the path'
360
bundle_tree.note_deletion(path)
362
def added(kind, extra, lines):
363
info = extra.split(' // ')
365
raise BzrError('add action lines require the path and file id'
368
raise BzrError('add action lines have fewer than 5 entries.'
371
if not info[1].startswith('file-id:'):
372
raise BzrError('The file-id should follow the path for an add'
374
file_id = info[1][8:]
376
bundle_tree.note_id(file_id, path, kind)
377
# this will be overridden in extra_info if executable is specified.
378
bundle_tree.note_executable(path, False)
379
last_changed, encoding = extra_info(info[2:], path)
380
revision = get_rev_id(last_changed, path, kind)
381
if kind == 'directory':
383
do_patch(path, lines, encoding)
385
def modified(kind, extra, lines):
386
info = extra.split(' // ')
388
raise BzrError('modified action lines have at least'
389
'the path in them: %r' % extra)
392
last_modified, encoding = extra_info(info[1:], path)
393
revision = get_rev_id(last_modified, path, kind)
395
do_patch(path, lines, encoding)
403
for action_line, lines in \
404
self.get_revision_info(revision_id).tree_actions:
405
first = action_line.find(' ')
407
raise BzrError('Bogus action line'
408
' (no opening space): %r' % action_line)
409
second = action_line.find(' ', first+1)
411
raise BzrError('Bogus action line'
412
' (missing second space): %r' % action_line)
413
action = action_line[:first]
414
kind = action_line[first+1:second]
415
if kind not in ('file', 'directory', 'symlink'):
416
raise BzrError('Bogus action line'
417
' (invalid object kind %r): %r' % (kind, action_line))
418
extra = action_line[second+1:]
420
if action not in valid_actions:
421
raise BzrError('Bogus action line'
422
' (unrecognized action): %r' % action_line)
423
valid_actions[action](kind, extra, lines)
426
class BundleTree(Tree):
427
def __init__(self, base_tree, revision_id):
428
self.base_tree = base_tree
429
self._renamed = {} # Mapping from old_path => new_path
430
self._renamed_r = {} # new_path => old_path
431
self._new_id = {} # new_path => new_id
432
self._new_id_r = {} # new_id => new_path
433
self._kinds = {} # new_id => kind
434
self._last_changed = {} # new_id => revision_id
435
self._executable = {} # new_id => executable value
437
self._targets = {} # new path => new symlink target
439
self.contents_by_id = True
440
self.revision_id = revision_id
441
self._inventory = None
444
return pprint.pformat(self.__dict__)
446
def note_rename(self, old_path, new_path):
447
"""A file/directory has been renamed from old_path => new_path"""
448
assert not self._renamed.has_key(new_path)
449
assert not self._renamed_r.has_key(old_path)
450
self._renamed[new_path] = old_path
451
self._renamed_r[old_path] = new_path
453
def note_id(self, new_id, new_path, kind='file'):
454
"""Files that don't exist in base need a new id."""
455
self._new_id[new_path] = new_id
456
self._new_id_r[new_id] = new_path
457
self._kinds[new_id] = kind
459
def note_last_changed(self, file_id, revision_id):
460
if (self._last_changed.has_key(file_id)
461
and self._last_changed[file_id] != revision_id):
462
raise BzrError('Mismatched last-changed revision for file_id {%s}'
463
': %s != %s' % (file_id,
464
self._last_changed[file_id],
466
self._last_changed[file_id] = revision_id
468
def note_patch(self, new_path, patch):
469
"""There is a patch for a given filename."""
470
self.patches[new_path] = patch
472
def note_target(self, new_path, target):
473
"""The symlink at the new path has the given target"""
474
self._targets[new_path] = target
476
def note_deletion(self, old_path):
477
"""The file at old_path has been deleted."""
478
self.deleted.append(old_path)
480
def note_executable(self, new_path, executable):
481
self._executable[new_path] = executable
483
def old_path(self, new_path):
484
"""Get the old_path (path in the base_tree) for the file at new_path"""
485
assert new_path[:1] not in ('\\', '/')
486
old_path = self._renamed.get(new_path)
487
if old_path is not None:
489
dirname,basename = os.path.split(new_path)
490
# dirname is not '' doesn't work, because
491
# dirname may be a unicode entry, and is
492
# requires the objects to be identical
494
old_dir = self.old_path(dirname)
498
old_path = pathjoin(old_dir, basename)
501
#If the new path wasn't in renamed, the old one shouldn't be in
503
if self._renamed_r.has_key(old_path):
507
def new_path(self, old_path):
508
"""Get the new_path (path in the target_tree) for the file at old_path
511
assert old_path[:1] not in ('\\', '/')
512
new_path = self._renamed_r.get(old_path)
513
if new_path is not None:
515
if self._renamed.has_key(new_path):
517
dirname,basename = os.path.split(old_path)
519
new_dir = self.new_path(dirname)
523
new_path = pathjoin(new_dir, basename)
526
#If the old path wasn't in renamed, the new one shouldn't be in
528
if self._renamed.has_key(new_path):
532
def path2id(self, path):
533
"""Return the id of the file present at path in the target tree."""
534
file_id = self._new_id.get(path)
535
if file_id is not None:
537
old_path = self.old_path(path)
540
if old_path in self.deleted:
542
if hasattr(self.base_tree, 'path2id'):
543
return self.base_tree.path2id(old_path)
545
return self.base_tree.inventory.path2id(old_path)
547
def id2path(self, file_id):
548
"""Return the new path in the target tree of the file with id file_id"""
549
path = self._new_id_r.get(file_id)
552
old_path = self.base_tree.id2path(file_id)
555
if old_path in self.deleted:
557
return self.new_path(old_path)
559
def old_contents_id(self, file_id):
560
"""Return the id in the base_tree for the given file_id.
561
Return None if the file did not exist in base.
563
if self.contents_by_id:
564
if self.base_tree.has_id(file_id):
568
new_path = self.id2path(file_id)
569
return self.base_tree.path2id(new_path)
571
def get_file(self, file_id):
572
"""Return a file-like object containing the new contents of the
573
file given by file_id.
575
TODO: It might be nice if this actually generated an entry
576
in the text-store, so that the file contents would
579
base_id = self.old_contents_id(file_id)
580
if base_id is not None:
581
patch_original = self.base_tree.get_file(base_id)
583
patch_original = None
584
file_patch = self.patches.get(self.id2path(file_id))
585
if file_patch is None:
586
if (patch_original is None and
587
self.get_kind(file_id) == 'directory'):
589
assert patch_original is not None, "None: %s" % file_id
590
return patch_original
592
assert not file_patch.startswith('\\'), \
593
'Malformed patch for %s, %r' % (file_id, file_patch)
594
return patched_file(file_patch, patch_original)
596
def get_symlink_target(self, file_id):
597
new_path = self.id2path(file_id)
599
return self._targets[new_path]
601
return self.base_tree.get_symlink_target(file_id)
603
def get_kind(self, file_id):
604
if file_id in self._kinds:
605
return self._kinds[file_id]
606
return self.base_tree.inventory[file_id].kind
608
def is_executable(self, file_id):
609
path = self.id2path(file_id)
610
if path in self._executable:
611
return self._executable[path]
613
return self.base_tree.inventory[file_id].executable
615
def get_last_changed(self, file_id):
616
path = self.id2path(file_id)
617
if path in self._last_changed:
618
return self._last_changed[path]
619
return self.base_tree.inventory[file_id].revision
621
def get_size_and_sha1(self, file_id):
622
"""Return the size and sha1 hash of the given file id.
623
If the file was not locally modified, this is extracted
624
from the base_tree. Rather than re-reading the file.
626
new_path = self.id2path(file_id)
629
if new_path not in self.patches:
630
# If the entry does not have a patch, then the
631
# contents must be the same as in the base_tree
632
ie = self.base_tree.inventory[file_id]
633
if ie.text_size is None:
634
return ie.text_size, ie.text_sha1
635
return int(ie.text_size), ie.text_sha1
636
fileobj = self.get_file(file_id)
637
content = fileobj.read()
638
return len(content), sha_string(content)
640
def _get_inventory(self):
641
"""Build up the inventory entry for the BundleTree.
643
This need to be called before ever accessing self.inventory
645
from os.path import dirname, basename
647
assert self.base_tree is not None
648
base_inv = self.base_tree.inventory
649
root_id = base_inv.root.file_id
651
# New inventories have a unique root_id
652
inv = Inventory(root_id, self.revision_id)
654
inv = Inventory(revision_id=self.revision_id)
655
inv.root.revision = self.get_last_changed(root_id)
657
def add_entry(file_id):
658
path = self.id2path(file_id)
661
parent_path = dirname(path)
662
if parent_path == u'':
665
parent_id = self.path2id(parent_path)
667
kind = self.get_kind(file_id)
668
revision_id = self.get_last_changed(file_id)
670
name = basename(path)
671
if kind == 'directory':
672
ie = InventoryDirectory(file_id, name, parent_id)
674
ie = InventoryFile(file_id, name, parent_id)
675
ie.executable = self.is_executable(file_id)
676
elif kind == 'symlink':
677
ie = InventoryLink(file_id, name, parent_id)
678
ie.symlink_target = self.get_symlink_target(file_id)
679
ie.revision = revision_id
681
if kind in ('directory', 'symlink'):
682
ie.text_size, ie.text_sha1 = None, None
684
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
685
if (ie.text_size is None) and (kind == 'file'):
686
raise BzrError('Got a text_size of None for file_id %r' % file_id)
689
sorted_entries = self.sorted_path_id()
690
for path, file_id in sorted_entries:
691
if file_id == inv.root.file_id:
697
# Have to overload the inherited inventory property
698
# because _get_inventory is only called in the parent.
699
# Reading the docs, property() objects do not use
700
# overloading, they use the function as it was defined
702
inventory = property(_get_inventory)
705
for path, entry in self.inventory.iter_entries():
708
def sorted_path_id(self):
710
for result in self._new_id.iteritems():
712
for id in self.base_tree:
713
path = self.id2path(id)
716
paths.append((path, id))
721
def patched_file(file_patch, original):
722
"""Produce a file-like object with the patched version of a text"""
723
from bzrlib.patches import iter_patched
724
from bzrlib.iterablefile import IterableFile
726
return IterableFile(())
727
# string.splitlines(True) also splits on '\r', but the iter_patched code
728
# only expects to iterate over '\n' style lines
729
return IterableFile(iter_patched(original,
730
StringIO(file_patch).readlines()))