22
22
"""Weave - storage of related text file versions"""
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
28
# making _extract build and return a list, rather than being a generator
31
# with python -O, r923 does 2000 versions in 36.87s
33
# with optimizations to avoid mutating lists - 35.75! I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
37
# r931, which avoids using a generator for extract, does 36.98s
39
# with memoized inclusions, takes 41.49s; not very good
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
46
# with delta folded in and mutation of the list, 36.13s
48
# with all this and simplification of add code, 33s
54
# TODO: Perhaps have copy method for Weave instances?
56
25
# XXX: If we do weaves this way, will a merge still behave the same
57
26
# way if it's done in a different order? That's a pretty desirable
84
53
# TODO: Perhaps the API should work only in names to hide the integer
85
54
# indexes from the user?
56
# TODO: Is there any potential performance win by having an add()
57
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Have a way to go back and insert a revision V1 that is a parent
61
# of an already-stored revision V2. This means some lines previously
62
# counted as new in V2 will be discovered to have actually come from V1.
63
# It is probably necessary to insert V1, then compute a whole new diff
64
# from the mashed ancestors to V2. This must be repeated for every
65
# direct child of V1. The deltas from V2 to its descendents won't change,
66
# although their location within the weave may change. It may be possible
67
# to just adjust the location of those instructions rather than
68
# re-weaving the whole thing. This is expected to be a fairly rare
69
# operation, only used when inserting data that was previously a ghost.
75
from difflib import SequenceMatcher
183
170
For each name, the version number.
173
Descriptive name of this weave; typically the filename if known.
186
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map']
177
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
180
def __init__(self, weave_name=None):
190
182
self._parents = []
193
185
self._name_map = {}
186
self._weave_name = weave_name
190
"""Return a deep copy of self.
192
The copy can be modified without affecting the original weave."""
194
other._weave = self._weave[:]
195
other._parents = self._parents[:]
196
other._sha1s = self._sha1s[:]
197
other._names = self._names[:]
198
other._name_map = self._name_map.copy()
199
other._weave_name = self._weave_name
196
202
def __eq__(self, other):
197
203
if not isinstance(other, Weave):
205
211
return not self.__eq__(other)
214
def maybe_lookup(self, name_or_index):
215
"""Convert possible symbolic name to index, or pass through indexes."""
216
if isinstance(name_or_index, (int, long)):
219
return self.lookup(name_or_index)
208
222
def lookup(self, name):
223
"""Convert symbolic version name to index."""
210
225
return self._name_map[name]
212
raise WeaveError("name %s not present in weave" % name)
215
def add(self, name, parents, text):
227
raise WeaveError("name %r not present in weave %r" %
228
(name, self._weave_name))
231
def iter_names(self):
232
"""Yield a list of all names in this weave."""
233
return iter(self._names)
235
def idx_to_name(self, version):
236
return self._names[version]
239
def _check_repeated_add(self, name, parents, text, sha1):
240
"""Check that a duplicated add is OK.
242
If it is, return the (old) index; otherwise raise an exception.
244
idx = self.lookup(name)
245
if sorted(self._parents[idx]) != sorted(parents):
246
raise WeaveError("name \"%s\" already present in weave "
247
"with different parents" % name)
248
if sha1 != self._sha1s[idx]:
249
raise WeaveError("name \"%s\" already present in weave "
250
"with different text" % name)
255
def add(self, name, parents, text, sha1=None):
216
256
"""Add a single text on top of the weave.
218
258
Returns the index number of the newly added version.
225
265
List or set of direct parent version numbers.
228
Sequence of lines to be added in the new version."""
268
Sequence of lines to be added in the new version.
270
sha -- SHA-1 of the file, if known. This is trusted to be
273
from bzrlib.osutils import sha_strings
230
275
assert isinstance(name, basestring)
277
sha1 = sha_strings(text)
231
278
if name in self._name_map:
232
raise WeaveError("name %r already present in weave" % name)
279
return self._check_repeated_add(name, parents, text, sha1)
281
parents = map(self.maybe_lookup, parents)
234
282
self._check_versions(parents)
235
283
## self._check_lines(text)
236
284
new_version = len(self._parents)
243
287
# if we abort after here the (in-memory) weave will be corrupt because only
244
288
# some fields are updated
293
337
#print 'basis_lines:', basis_lines
294
338
#print 'new_lines: ', lines
296
from difflib import SequenceMatcher
297
340
s = SequenceMatcher(None, basis_lines, text)
299
342
# offset gives the number of lines that have been inserted
335
378
return new_version
380
def add_identical(self, old_rev_id, new_rev_id, parents):
381
"""Add an identical text to old_rev_id as new_rev_id."""
382
old_lines = self.get(self.lookup(old_rev_id))
383
self.add(new_rev_id, parents, old_lines)
338
385
def inclusions(self, versions):
339
386
"""Return set of all ancestors of given version(s)."""
340
387
i = set(versions)
345
# include all its parents
346
i.update(self._parents[v])
350
raise ValueError("version %d not present in weave" % v)
388
for v in xrange(max(versions), 0, -1):
390
# include all its parents
391
i.update(self._parents[v])
393
## except IndexError:
394
## raise ValueError("version %d not present in weave" % v)
397
def parents(self, version):
398
return self._parents[version]
353
401
def minimal_parents(self, version):
393
441
raise IndexError("invalid version number %r" % i)
396
def annotate(self, index):
397
return list(self.annotate_iter(index))
400
def annotate_iter(self, version):
444
def annotate(self, name_or_index):
445
return list(self.annotate_iter(name_or_index))
448
def annotate_iter(self, name_or_index):
401
449
"""Yield list of (index-id, line) pairs for the specified version.
403
451
The index indicates when the line originated in the weave."""
404
for origin, lineno, text in self._extract([version]):
452
incls = [self.maybe_lookup(name_or_index)]
453
for origin, lineno, text in self._extract(incls):
405
454
yield origin, text
504
def get_iter(self, version):
557
def get_iter(self, name_or_index):
505
558
"""Yield lines for the specified version."""
506
for origin, lineno, line in self._extract([version]):
559
incls = [self.maybe_lookup(name_or_index)]
560
for origin, lineno, line in self._extract(incls):
510
def get(self, index):
511
return list(self.get_iter(index))
564
def get_text(self, name_or_index):
565
return ''.join(self.get_iter(name_or_index))
566
assert isinstance(version, int)
569
def get_lines(self, name_or_index):
570
return list(self.get_iter(name_or_index))
514
576
def mash_iter(self, included):
515
577
"""Return composed version of multiple included versions."""
578
included = map(self.maybe_lookup, included)
516
579
for origin, lineno, text in self._extract(included):
769
def join(self, other):
770
"""Integrate versions from other into this weave.
772
The resulting weave contains all the history of both weaves;
773
any version you could retrieve from either self or other can be
774
retrieved from self after this call.
776
It is illegal for the two weaves to contain different values
779
if other.numversions() == 0:
780
return # nothing to update, easy
781
# work through in index order to make sure we get all dependencies
782
for other_idx, name in enumerate(other._names):
783
# TODO: If all the parents of the other version are already
784
# present then we can avoid some work by just taking the delta
785
# and adjusting the offsets.
786
if self._check_version_consistent(other, other_idx, name):
788
new_parents = self._imported_parents(other, other_idx)
789
lines = other.get_lines(other_idx)
790
sha1 = other._sha1s[other_idx]
791
self.add(name, new_parents, lines, sha1)
794
def _imported_parents(self, other, other_idx):
795
"""Return list of parents in self corresponding to indexes in other."""
797
for parent_idx in other._parents[other_idx]:
798
parent_name = other._names[parent_idx]
799
if parent_name not in self._names:
800
# should not be possible
801
raise WeaveError("missing parent {%s} of {%s} in %r"
802
% (parent_name, other._name_map[other_idx], self))
803
new_parents.append(self._name_map[parent_name])
806
def _check_version_consistent(self, other, other_idx, name):
807
"""Check if a version in consistent in this and other.
809
To be consistent it must have:
812
* the same direct parents (by name, not index, and disregarding
815
If present & correct return True;
816
if not present in self return False;
817
if inconsistent raise error."""
818
this_idx = self._name_map.get(name, -1)
820
if self._sha1s[this_idx] != other._sha1s[other_idx]:
821
raise WeaveError("inconsistent texts for version {%s} "
822
"when joining weaves"
824
self_parents = self._parents[this_idx]
825
other_parents = other._parents[other_idx]
826
n1 = [self._names[i] for i in self_parents]
827
n2 = [other._names[i] for i in other_parents]
831
raise WeaveError("inconsistent parents for version {%s}: "
712
840
def weave_toc(w):
726
def weave_stats(weave_file):
727
from bzrlib.progress import ProgressBar
854
def weave_stats(weave_file, pb):
728
855
from bzrlib.weavefile import read_weave
732
857
wf = file(weave_file, 'rb')
733
858
w = read_weave(wf)
734
859
# FIXME: doesn't work on pipes
849
982
sys.stdout.writelines(w.mash_iter(map(int, argv[3:])))
985
from difflib import unified_diff
988
v1, v2 = map(int, argv[3:5])
991
diff_gen = unified_diff(lines1, lines2,
992
'%s version %d' % (fn, v1),
993
'%s version %d' % (fn, v2))
994
sys.stdout.writelines(diff_gen)
851
996
elif cmd == 'annotate':
853
998
# newline is added to all lines regardless; too hard to get