22
22
"""Weave - storage of related text file versions"""
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
28
# making _extract build and return a list, rather than being a generator
31
# with python -O, r923 does 2000 versions in 36.87s
33
# with optimizations to avoid mutating lists - 35.75! I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
37
# r931, which avoids using a generator for extract, does 36.98s
39
# with memoized inclusions, takes 41.49s; not very good
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
46
# with delta folded in and mutation of the list, 36.13s
48
# with all this and simplification of add code, 33s
54
# TODO: Perhaps have copy method for Weave instances?
25
56
# XXX: If we do weaves this way, will a merge still behave the same
26
57
# way if it's done in a different order? That's a pretty desirable
57
88
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Have a way to go back and insert a revision V1 that is a parent
61
# of an already-stored revision V2. This means some lines previously
62
# counted as new in V2 will be discovered to have actually come from V1.
63
# It is probably necessary to insert V1, then compute a whole new diff
64
# from the mashed ancestors to V2. This must be repeated for every
65
# direct child of V1. The deltas from V2 to its descendents won't change,
66
# although their location within the weave may change. It may be possible
67
# to just adjust the location of those instructions rather than
68
# re-weaving the whole thing. This is expected to be a fairly rare
69
# operation, only used when inserting data that was previously a ghost.
75
94
from difflib import SequenceMatcher
97
from bzrlib.osutils import sha_strings
80
100
class WeaveError(Exception):
186
206
self._weave_name = weave_name
190
"""Return a deep copy of self.
192
The copy can be modified without affecting the original weave."""
194
other._weave = self._weave[:]
195
other._parents = self._parents[:]
196
other._sha1s = self._sha1s[:]
197
other._names = self._names[:]
198
other._name_map = self._name_map.copy()
199
other._weave_name = self._weave_name
202
209
def __eq__(self, other):
203
210
if not isinstance(other, Weave):
378
380
return new_version
380
def add_identical(self, old_rev_id, new_rev_id, parents):
381
"""Add an identical text to old_rev_id as new_rev_id."""
382
old_lines = self.get(self.lookup(old_rev_id))
383
self.add(new_rev_id, parents, old_lines)
385
383
def inclusions(self, versions):
386
384
"""Return set of all ancestors of given version(s)."""
769
def join(self, other):
770
"""Integrate versions from other into this weave.
772
The resulting weave contains all the history of both weaves;
773
any version you could retrieve from either self or other can be
774
retrieved from self after this call.
776
It is illegal for the two weaves to contain different values
779
if other.numversions() == 0:
780
return # nothing to update, easy
781
# work through in index order to make sure we get all dependencies
782
for other_idx, name in enumerate(other._names):
783
# TODO: If all the parents of the other version are already
784
# present then we can avoid some work by just taking the delta
785
# and adjusting the offsets.
786
if self._check_version_consistent(other, other_idx, name):
788
new_parents = self._imported_parents(other, other_idx)
789
lines = other.get_lines(other_idx)
790
sha1 = other._sha1s[other_idx]
791
self.add(name, new_parents, lines, sha1)
794
def _imported_parents(self, other, other_idx):
795
"""Return list of parents in self corresponding to indexes in other."""
797
for parent_idx in other._parents[other_idx]:
798
parent_name = other._names[parent_idx]
799
if parent_name not in self._names:
800
# should not be possible
801
raise WeaveError("missing parent {%s} of {%s} in %r"
802
% (parent_name, other._name_map[other_idx], self))
803
new_parents.append(self._name_map[parent_name])
806
def _check_version_consistent(self, other, other_idx, name):
807
"""Check if a version in consistent in this and other.
809
To be consistent it must have:
812
* the same direct parents (by name, not index, and disregarding
815
If present & correct return True;
816
if not present in self return False;
817
if inconsistent raise error."""
818
this_idx = self._name_map.get(name, -1)
820
if self._sha1s[this_idx] != other._sha1s[other_idx]:
821
raise WeaveError("inconsistent texts for version {%s} "
822
"when joining weaves"
824
self_parents = self._parents[this_idx]
825
other_parents = other._parents[other_idx]
826
n1 = [self._names[i] for i in self_parents]
827
n2 = [other._names[i] for i in other_parents]
831
raise WeaveError("inconsistent parents for version {%s}: "
840
773
def weave_toc(w):
854
def weave_stats(weave_file, pb):
787
def weave_stats(weave_file):
788
from bzrlib.progress import ProgressBar
855
789
from bzrlib.weavefile import read_weave
857
793
wf = file(weave_file, 'rb')
858
794
w = read_weave(wf)
859
795
# FIXME: doesn't work on pipes