50
50
# have slight specializations for different ways its used: annotate,
51
51
# basis for add, get, etc.
53
# TODO: Perhaps the API should work only in names to hide the integer
54
# indexes from the user?
53
# TODO: Probably the API should work only in names to hide the integer
54
# indexes from the user.
56
56
# TODO: Is there any potential performance win by having an add()
57
57
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Have a way to go back and insert a revision V1 that is a parent
61
# of an already-stored revision V2. This means some lines previously
62
# counted as new in V2 will be discovered to have actually come from V1.
63
# It is probably necessary to insert V1, then compute a whole new diff
64
# from the mashed ancestors to V2. This must be repeated for every
65
# direct child of V1. The deltas from V2 to its descendents won't change,
66
# although their location within the weave may change. It may be possible
67
# to just adjust the location of those instructions rather than
68
# re-weaving the whole thing. This is expected to be a fairly rare
69
# operation, only used when inserting data that was previously a ghost.
60
# TODO: Reweave can possibly be made faster by remembering diffs
61
# where the basis and destination are unchanged.
63
# FIXME: Sometimes we will be given a parents list for a revision
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
66
# be done fairly efficiently because the sequence numbers constrain
67
# the possible relationships.
75
72
from difflib import SequenceMatcher
80
class WeaveError(Exception):
81
"""Exception in processing weave"""
84
class WeaveFormatError(WeaveError):
85
"""Weave invariant violated"""
74
from bzrlib.trace import mutter
75
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
76
WeaveRevisionNotPresent, WeaveRevisionAlreadyPresent)
77
import bzrlib.errors as errors
78
from bzrlib.tsort import topo_sort
88
81
class Weave(object):
89
82
"""weave - versioned text file storage.
235
234
def idx_to_name(self, version):
236
235
return self._names[version]
239
237
def _check_repeated_add(self, name, parents, text, sha1):
240
238
"""Check that a duplicated add is OK.
242
240
If it is, return the (old) index; otherwise raise an exception.
244
242
idx = self.lookup(name)
245
if sorted(self._parents[idx]) != sorted(parents):
246
raise WeaveError("name \"%s\" already present in weave "
247
"with different parents" % name)
248
if sha1 != self._sha1s[idx]:
249
raise WeaveError("name \"%s\" already present in weave "
250
"with different text" % name)
243
if sorted(self._parents[idx]) != sorted(parents) \
244
or sha1 != self._sha1s[idx]:
245
raise WeaveRevisionAlreadyPresent(name, self)
255
248
def add(self, name, parents, text, sha1=None):
256
249
"""Add a single text on top of the weave.
484
raise WeaveFormatError('unexpected instruction %r'
481
raise WeaveFormatError('unexpected instruction %r' % v)
487
483
assert isinstance(l, basestring)
489
485
yield lineno, istack[-1], dset, l
489
raise WeaveFormatError("unclosed insertion blocks "
490
"at end of weave: %s" % istack)
492
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
494
495
def _extract(self, versions):
495
496
"""Yield annotation of lines in included set.
543
544
result.append((istack[-1], lineno, l))
547
raise WFE("unclosed insertion blocks at end of weave",
547
raise WeaveFormatError("unclosed insertion blocks "
548
"at end of weave: %s" % istack)
550
raise WFE("unclosed deletion blocks at end of weave",
550
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
557
555
def get_iter(self, name_or_index):
558
556
"""Yield lines for the specified version."""
559
557
incls = [self.maybe_lookup(name_or_index)]
562
# We don't have sha1 sums for multiple entries
560
564
for origin, lineno, line in self._extract(incls):
569
expected_sha1 = self._sha1s[index]
570
measured_sha1 = cur_sha.hexdigest()
571
if measured_sha1 != expected_sha1:
572
raise errors.WeaveInvalidChecksum(
573
'file %s, revision %s, expected: %s, measured %s'
574
% (self._weave_name, self._names[index],
575
expected_sha1, measured_sha1))
564
578
def get_text(self, name_or_index):
609
629
raise WeaveFormatError("invalid included version %d for index %d"
610
630
% (inclusions[-1], version))
612
# try extracting all versions; this is a bit slow and parallel
613
# extraction could be used
632
# try extracting all versions; parallel extraction is used
614
633
nv = self.numversions()
634
sha1s = [sha.new() for i in range(nv)]
635
texts = [[] for i in range(nv)]
638
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
639
# The problem is that set membership is much more expensive
641
for p in self._parents[i]:
642
new_inc.update(inclusions[p])
644
#assert set(new_inc) == self.inclusions([i]), 'failed %s != %s' % (new_inc, self.inclusions([i]))
645
inclusions.append(new_inc)
647
nlines = len(self._weave)
649
update_text = 'checking weave'
651
short_name = os.path.basename(self._weave_name)
652
update_text = 'checking %s' % (short_name,)
653
update_text = update_text[:25]
655
for lineno, insert, deleteset, line in self._walk():
657
progress_bar.update(update_text, lineno, nlines)
659
for j, j_inc in enumerate(inclusions):
660
# The active inclusion must be an ancestor,
661
# and no ancestors must have deleted this line,
662
# because we don't support resurrection.
663
if (insert in j_inc) and not (deleteset & j_inc):
664
sha1s[j].update(line)
615
666
for version in range(nv):
617
progress_bar.update('checking text', version, nv)
619
for l in self.get_iter(version):
667
hd = sha1s[version].hexdigest()
622
668
expected = self._sha1s[version]
623
669
if hd != expected:
624
raise WeaveError("mismatched sha1 for version %d; "
625
"got %s, expected %s"
626
% (version, hd, expected))
670
raise errors.WeaveInvalidChecksum(
671
"mismatched sha1 for version %s: "
672
"got %s, expected %s"
673
% (self._names[version], hd, expected))
628
675
# TODO: check insertions are properly nested, that there are
629
676
# no lines outside of insertion blocks, that deletions are
630
677
# properly paired, etc.
634
def merge(self, merge_versions):
635
"""Automerge and mark conflicts between versions.
637
This returns a sequence, each entry describing alternatives
638
for a chunk of the file. Each of the alternatives is given as
641
If there is a chunk of the file where there's no diagreement,
642
only one alternative is given.
644
# approach: find the included versions common to all the
646
raise NotImplementedError()
650
679
def _delta(self, included, lines):
651
680
"""Return changes from basis to new revision.
774
808
retrieved from self after this call.
776
810
It is illegal for the two weaves to contain different values
811
or different parents for any version. See also reweave().
779
813
if other.numversions() == 0:
780
814
return # nothing to update, easy
815
# two loops so that we do not change ourselves before verifying it
781
817
# work through in index order to make sure we get all dependencies
782
818
for other_idx, name in enumerate(other._names):
783
# TODO: If all the parents of the other version are already
819
self._check_version_consistent(other, other_idx, name)
824
for other_idx, name in enumerate(other._names):
825
# TODO: If all the parents of the other version are already
784
826
# present then we can avoid some work by just taking the delta
785
827
# and adjusting the offsets.
786
if self._check_version_consistent(other, other_idx, name):
788
828
new_parents = self._imported_parents(other, other_idx)
829
sha1 = other._sha1s[other_idx]
833
if name in self._names:
834
idx = self.lookup(name)
835
n1 = map(other.idx_to_name, other._parents[other_idx] )
836
n2 = map(self.idx_to_name, self._parents[other_idx] )
837
if sha1 == self._sha1s[idx] and n1 == n2:
789
841
lines = other.get_lines(other_idx)
790
sha1 = other._sha1s[other_idx]
791
842
self.add(name, new_parents, lines, sha1)
844
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
845
merged,processed,self._weave_name, time.time( )-time0))
794
850
def _imported_parents(self, other, other_idx):
795
851
"""Return list of parents in self corresponding to indexes in other."""
831
raise WeaveError("inconsistent parents for version {%s}: "
887
raise WeaveParentMismatch("inconsistent parents "
888
"for version {%s}: %s vs %s" % (name, n1, n2))
835
890
return True # ok!
894
def reweave(self, other):
895
"""Reweave self with other."""
896
new_weave = reweave(self, other)
897
for attr in self.__slots__:
898
setattr(self, attr, getattr(new_weave, attr))
902
"""Combine two weaves and return the result.
904
This works even if a revision R has different parents in
905
wa and wb. In the resulting weave all the parents are given.
907
This is done by just building up a new weave, maintaining ordering
908
of the versions in the two inputs. More efficient approaches
909
might be possible but it should only be necessary to do
910
this operation rarely, when a new previously ghost version is
915
queue_a = range(wa.numversions())
916
queue_b = range(wb.numversions())
917
# first determine combined parents of all versions
918
# map from version name -> all parent names
919
combined_parents = _reweave_parent_graphs(wa, wb)
920
mutter("combined parents: %r", combined_parents)
921
order = topo_sort(combined_parents.iteritems())
922
mutter("order to reweave: %r", order)
924
if name in wa._name_map:
925
lines = wa.get_lines(name)
926
if name in wb._name_map:
927
assert lines == wb.get_lines(name)
929
lines = wb.get_lines(name)
930
wr.add(name, combined_parents[name], lines)
934
def _reweave_parent_graphs(wa, wb):
935
"""Return combined parent ancestry for two weaves.
937
Returned as a list of (version_name, set(parent_names))"""
939
for weave in [wa, wb]:
940
for idx, name in enumerate(weave._names):
941
p = combined.setdefault(name, set())
942
p.update(map(weave.idx_to_name, weave._parents[idx]))
840
946
def weave_toc(w):
841
947
"""Show the weave's table-of-contents"""
1192
def lsprofile_main(argv):
1193
from bzrlib.lsprof import profile
1194
ret,stats = profile(main, argv)
1086
1200
if __name__ == '__main__':
1088
1202
if '--profile' in sys.argv:
1089
1203
args = sys.argv[:]
1090
1204
args.remove('--profile')
1091
1205
sys.exit(profile_main(args))
1206
elif '--lsprof' in sys.argv:
1208
args.remove('--lsprof')
1209
sys.exit(lsprofile_main(args))
1093
1211
sys.exit(main(sys.argv))