67
67
# the possible relationships.
72
71
from difflib import SequenceMatcher
74
73
from bzrlib.trace import mutter
75
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
76
WeaveRevisionNotPresent, WeaveRevisionAlreadyPresent)
77
import bzrlib.errors as errors
74
from bzrlib.errors import WeaveError, WeaveFormatError, WeaveParentMismatch, \
75
WeaveRevisionNotPresent, WeaveRevisionAlreadyPresent
78
76
from bzrlib.tsort import topo_sort
485
483
yield lineno, istack[-1], dset, l
489
raise WeaveFormatError("unclosed insertion blocks "
490
"at end of weave: %s" % istack)
492
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
495
488
def _extract(self, versions):
496
489
"""Yield annotation of lines in included set.
555
548
def get_iter(self, name_or_index):
556
549
"""Yield lines for the specified version."""
557
550
incls = [self.maybe_lookup(name_or_index)]
562
# We don't have sha1 sums for multiple entries
564
551
for origin, lineno, line in self._extract(incls):
569
expected_sha1 = self._sha1s[index]
570
measured_sha1 = cur_sha.hexdigest()
571
if measured_sha1 != expected_sha1:
572
raise errors.WeaveInvalidChecksum(
573
'file %s, revision %s, expected: %s, measured %s'
574
% (self._weave_name, self._names[index],
575
expected_sha1, measured_sha1))
578
555
def get_text(self, name_or_index):
590
def get_sha1(self, name):
591
"""Get the stored sha1 sum for the given revision.
593
:param name: The name of the version to lookup
595
return self._sha1s[self.lookup(name)]
597
567
def mash_iter(self, included):
598
568
"""Return composed version of multiple included versions."""
599
569
included = map(self.maybe_lookup, included)
629
600
raise WeaveFormatError("invalid included version %d for index %d"
630
601
% (inclusions[-1], version))
632
# try extracting all versions; parallel extraction is used
603
# try extracting all versions; this is a bit slow and parallel
604
# extraction could be used
633
605
nv = self.numversions()
634
sha1s = [sha.new() for i in range(nv)]
635
texts = [[] for i in range(nv)]
638
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
639
# The problem is that set membership is much more expensive
641
for p in self._parents[i]:
642
new_inc.update(inclusions[p])
644
#assert set(new_inc) == self.inclusions([i]), 'failed %s != %s' % (new_inc, self.inclusions([i]))
645
inclusions.append(new_inc)
647
nlines = len(self._weave)
649
update_text = 'checking weave'
651
short_name = os.path.basename(self._weave_name)
652
update_text = 'checking %s' % (short_name,)
653
update_text = update_text[:25]
655
for lineno, insert, deleteset, line in self._walk():
606
for version in range(nv):
657
progress_bar.update(update_text, lineno, nlines)
659
for j, j_inc in enumerate(inclusions):
660
# The active inclusion must be an ancestor,
661
# and no ancestors must have deleted this line,
662
# because we don't support resurrection.
663
if (insert in j_inc) and not (deleteset & j_inc):
664
sha1s[j].update(line)
666
for version in range(nv):
667
hd = sha1s[version].hexdigest()
608
progress_bar.update('checking text', version, nv)
610
for l in self.get_iter(version):
668
613
expected = self._sha1s[version]
669
614
if hd != expected:
670
raise errors.WeaveInvalidChecksum(
671
"mismatched sha1 for version %s: "
672
"got %s, expected %s"
673
% (self._names[version], hd, expected))
615
raise WeaveError("mismatched sha1 for version %d; "
616
"got %s, expected %s"
617
% (version, hd, expected))
675
619
# TODO: check insertions are properly nested, that there are
676
620
# no lines outside of insertion blocks, that deletions are
677
621
# properly paired, etc.
679
624
def _delta(self, included, lines):
680
625
"""Return changes from basis to new revision.
810
755
It is illegal for the two weaves to contain different values
811
756
or different parents for any version. See also reweave().
813
:param other: The other weave to pull into this one
814
:param pb: An optional progress bar
815
:param msg: An optional message to display for progress
817
758
if other.numversions() == 0:
818
759
return # nothing to update, easy
819
760
# two loops so that we do not change ourselves before verifying it
821
762
# work through in index order to make sure we get all dependencies
824
763
for other_idx, name in enumerate(other._names):
825
764
self._check_version_consistent(other, other_idx, name)
826
sha1 = other._sha1s[other_idx]
830
if name in self._name_map:
831
idx = self.lookup(name)
832
n1 = set(map(other.idx_to_name, other._parents[other_idx]))
833
n2 = set(map(self.idx_to_name, self._parents[idx]))
834
if sha1 == self._sha1s[idx] and n1 == n2:
837
names_to_join.append((other_idx, name))
843
768
time0 = time.time( )
844
for other_idx, name in names_to_join:
769
for other_idx, name in enumerate(other._names):
845
770
# TODO: If all the parents of the other version are already
846
771
# present then we can avoid some work by just taking the delta
847
772
# and adjusting the offsets.
848
773
new_parents = self._imported_parents(other, other_idx)
849
774
sha1 = other._sha1s[other_idx]
778
if name in self._names:
779
idx = self.lookup(name)
780
n1 = map(other.idx_to_name, other._parents[other_idx] )
781
n2 = map(self.idx_to_name, self._parents[other_idx] )
782
if sha1 == self._sha1s[idx] and n1 == n2:
854
pb.update(msg, merged, len(names_to_join))
856
786
lines = other.get_lines(other_idx)
857
787
self.add(name, new_parents, lines, sha1)
859
789
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
860
merged, processed, self._weave_name, time.time( )-time0))
790
merged,processed,self._weave_name, time.time( )-time0))
862
795
def _imported_parents(self, other, other_idx):
863
796
"""Return list of parents in self corresponding to indexes in other."""
892
825
self_parents = self._parents[this_idx]
893
826
other_parents = other._parents[other_idx]
894
n1 = set([self._names[i] for i in self_parents])
895
n2 = set([other._names[i] for i in other_parents])
827
n1 = [self._names[i] for i in self_parents]
828
n2 = [other._names[i] for i in other_parents]
897
832
raise WeaveParentMismatch("inconsistent parents "
898
833
"for version {%s}: %s vs %s" % (name, n1, n2))
904
def reweave(self, other, pb=None, msg=None):
905
"""Reweave self with other.
907
:param other: The other weave to merge
908
:param pb: An optional progress bar, indicating how far done we are
909
:param msg: An optional message for the progress
911
new_weave = reweave(self, other, pb=pb, msg=msg)
839
def reweave(self, other):
840
"""Reweave self with other."""
841
new_weave = reweave(self, other)
912
842
for attr in self.__slots__:
913
843
setattr(self, attr, getattr(new_weave, attr))
916
def reweave(wa, wb, pb=None, msg=None):
917
847
"""Combine two weaves and return the result.
919
849
This works even if a revision R has different parents in
938
865
mutter("combined parents: %r", combined_parents)
939
866
order = topo_sort(combined_parents.iteritems())
940
867
mutter("order to reweave: %r", order)
945
for idx, name in enumerate(order):
947
pb.update(msg, idx, len(order))
948
869
if name in wa._name_map:
949
870
lines = wa.get_lines(name)
950
871
if name in wb._name_map:
951
lines_b = wb.get_lines(name)
953
mutter('Weaves differ on content. rev_id {%s}', name)
954
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
956
lines = list(difflib.unified_diff(lines, lines_b,
957
wa._weave_name, wb._weave_name))
958
mutter('lines:\n%s', ''.join(lines))
959
raise errors.WeaveTextDiffers(name, wa, wb)
872
assert lines == wb.get_lines(name)
961
874
lines = wb.get_lines(name)
962
875
wr.add(name, combined_parents[name], lines)