50
50
# have slight specializations for different ways its used: annotate,
51
51
# basis for add, get, etc.
53
# TODO: Probably the API should work only in names to hide the integer
54
# indexes from the user.
53
# TODO: Perhaps the API should work only in names to hide the integer
54
# indexes from the user?
56
56
# TODO: Is there any potential performance win by having an add()
57
57
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Reweave can possibly be made faster by remembering diffs
61
# where the basis and destination are unchanged.
63
# FIXME: Sometimes we will be given a parents list for a revision
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
66
# be done fairly efficiently because the sequence numbers constrain
67
# the possible relationships.
60
# TODO: Have a way to go back and insert a revision V1 that is a parent
61
# of an already-stored revision V2. This means some lines previously
62
# counted as new in V2 will be discovered to have actually come from V1.
63
# It is probably necessary to insert V1, then compute a whole new diff
64
# from the mashed ancestors to V2. This must be repeated for every
65
# direct child of V1. The deltas from V2 to its descendents won't change,
66
# although their location within the weave may change. It may be possible
67
# to just adjust the location of those instructions rather than
68
# re-weaving the whole thing. This is expected to be a fairly rare
69
# operation, only used when inserting data that was previously a ghost.
72
75
from difflib import SequenceMatcher
74
77
from bzrlib.trace import mutter
75
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
76
WeaveRevisionNotPresent, WeaveRevisionAlreadyPresent)
77
import bzrlib.errors as errors
78
from bzrlib.tsort import topo_sort
80
class WeaveError(Exception):
81
"""Exception in processing weave"""
84
class WeaveFormatError(WeaveError):
85
"""Weave invariant violated"""
88
class WeaveParentMismatch(WeaveError):
89
"""Parents are mismatched between two revisions."""
81
92
class Weave(object):
82
93
"""weave - versioned text file storage.
234
239
def idx_to_name(self, version):
235
240
return self._names[version]
237
243
def _check_repeated_add(self, name, parents, text, sha1):
238
244
"""Check that a duplicated add is OK.
240
246
If it is, return the (old) index; otherwise raise an exception.
242
248
idx = self.lookup(name)
243
if sorted(self._parents[idx]) != sorted(parents) \
244
or sha1 != self._sha1s[idx]:
245
raise WeaveRevisionAlreadyPresent(name, self)
249
if sorted(self._parents[idx]) != sorted(parents):
250
raise WeaveError("name \"%s\" already present in weave "
251
"with different parents" % name)
252
if sha1 != self._sha1s[idx]:
253
raise WeaveError("name \"%s\" already present in weave "
254
"with different text" % name)
248
259
def add(self, name, parents, text, sha1=None):
249
260
"""Add a single text on top of the weave.
481
raise WeaveFormatError('unexpected instruction %r' % v)
493
raise WeaveFormatError('unexpected instruction %r'
483
496
assert isinstance(l, basestring)
485
498
yield lineno, istack[-1], dset, l
489
raise WeaveFormatError("unclosed insertion blocks "
490
"at end of weave: %s" % istack)
492
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
495
503
def _extract(self, versions):
496
504
"""Yield annotation of lines in included set.
544
552
result.append((istack[-1], lineno, l))
547
raise WeaveFormatError("unclosed insertion blocks "
548
"at end of weave: %s" % istack)
556
raise WFE("unclosed insertion blocks at end of weave",
550
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
559
raise WFE("unclosed deletion blocks at end of weave",
555
566
def get_iter(self, name_or_index):
556
567
"""Yield lines for the specified version."""
557
568
incls = [self.maybe_lookup(name_or_index)]
562
# We don't have sha1 sums for multiple entries
564
569
for origin, lineno, line in self._extract(incls):
569
expected_sha1 = self._sha1s[index]
570
measured_sha1 = cur_sha.hexdigest()
571
if measured_sha1 != expected_sha1:
572
raise errors.WeaveInvalidChecksum(
573
'file %s, revision %s, expected: %s, measured %s'
574
% (self._weave_name, self._names[index],
575
expected_sha1, measured_sha1))
578
573
def get_text(self, name_or_index):
629
618
raise WeaveFormatError("invalid included version %d for index %d"
630
619
% (inclusions[-1], version))
632
# try extracting all versions; parallel extraction is used
621
# try extracting all versions; this is a bit slow and parallel
622
# extraction could be used
633
623
nv = self.numversions()
634
sha1s = [sha.new() for i in range(nv)]
635
texts = [[] for i in range(nv)]
638
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
639
# The problem is that set membership is much more expensive
641
for p in self._parents[i]:
642
new_inc.update(inclusions[p])
644
#assert set(new_inc) == self.inclusions([i]), 'failed %s != %s' % (new_inc, self.inclusions([i]))
645
inclusions.append(new_inc)
647
nlines = len(self._weave)
649
update_text = 'checking weave'
651
short_name = os.path.basename(self._weave_name)
652
update_text = 'checking %s' % (short_name,)
653
update_text = update_text[:25]
655
for lineno, insert, deleteset, line in self._walk():
624
for version in range(nv):
657
progress_bar.update(update_text, lineno, nlines)
659
for j, j_inc in enumerate(inclusions):
660
# The active inclusion must be an ancestor,
661
# and no ancestors must have deleted this line,
662
# because we don't support resurrection.
663
if (insert in j_inc) and not (deleteset & j_inc):
664
sha1s[j].update(line)
666
for version in range(nv):
667
hd = sha1s[version].hexdigest()
626
progress_bar.update('checking text', version, nv)
628
for l in self.get_iter(version):
668
631
expected = self._sha1s[version]
669
632
if hd != expected:
670
raise errors.WeaveInvalidChecksum(
671
"mismatched sha1 for version %s: "
672
"got %s, expected %s"
673
% (self._names[version], hd, expected))
633
raise WeaveError("mismatched sha1 for version %d; "
634
"got %s, expected %s"
635
% (version, hd, expected))
675
637
# TODO: check insertions are properly nested, that there are
676
638
# no lines outside of insertion blocks, that deletions are
677
639
# properly paired, etc.
643
def merge(self, merge_versions):
644
"""Automerge and mark conflicts between versions.
646
This returns a sequence, each entry describing alternatives
647
for a chunk of the file. Each of the alternatives is given as
650
If there is a chunk of the file where there's no diagreement,
651
only one alternative is given.
653
# approach: find the included versions common to all the
655
raise NotImplementedError()
679
659
def _delta(self, included, lines):
680
660
"""Return changes from basis to new revision.
817
792
# work through in index order to make sure we get all dependencies
818
793
for other_idx, name in enumerate(other._names):
819
self._check_version_consistent(other, other_idx, name)
794
if self._check_version_consistent(other, other_idx, name):
824
796
for other_idx, name in enumerate(other._names):
825
# TODO: If all the parents of the other version are already
797
# TODO: If all the parents of the other version are already
826
798
# present then we can avoid some work by just taking the delta
827
799
# and adjusting the offsets.
828
800
new_parents = self._imported_parents(other, other_idx)
801
lines = other.get_lines(other_idx)
829
802
sha1 = other._sha1s[other_idx]
833
if name in self._names:
834
idx = self.lookup(name)
835
n1 = map(other.idx_to_name, other._parents[other_idx] )
836
n2 = map(self.idx_to_name, self._parents[other_idx] )
837
if sha1 == self._sha1s[idx] and n1 == n2:
841
lines = other.get_lines(other_idx)
842
803
self.add(name, new_parents, lines, sha1)
844
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
845
merged,processed,self._weave_name, time.time( )-time0))
850
806
def _imported_parents(self, other, other_idx):
851
807
"""Return list of parents in self corresponding to indexes in other."""
902
def _make_reweave_order(wa_order, wb_order, combined_parents):
903
"""Return an order to reweave versions respecting parents."""
907
next_a = next_b = None
908
len_a = len(wa_order)
909
len_b = len(wb_order)
910
while ia < len(wa_order) or ib < len(wb_order):
912
next_a = wa_order[ia]
916
if combined_parents[next_a].issubset(done):
918
result.append(next_a)
922
next_b = wb_order[ib]
926
elif combined_parents[next_b].issubset(done):
928
result.append(next_b)
931
raise WeaveError("don't know how to reweave at {%s} and {%s}"
946
936
def weave_toc(w):
947
937
"""Show the weave's table-of-contents"""
948
938
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')