22
22
"""Weave - storage of related text file versions"""
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
28
# making _extract build and return a list, rather than being a generator
31
# with python -O, r923 does 2000 versions in 36.87s
33
# with optimizations to avoid mutating lists - 35.75! I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
37
# r931, which avoids using a generator for extract, does 36.98s
39
# with memoized inclusions, takes 41.49s; not very good
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
46
# with delta folded in and mutation of the list, 36.13s
48
# with all this and simplification of add code, 33s
54
# TODO: Perhaps have copy method for Weave instances?
25
56
# XXX: If we do weaves this way, will a merge still behave the same
26
57
# way if it's done in a different order? That's a pretty desirable
53
84
# TODO: Perhaps the API should work only in names to hide the integer
54
85
# indexes from the user?
56
# TODO: Is there any potential performance win by having an add()
57
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Have a way to go back and insert a revision V1 that is a parent
61
# of an already-stored revision V2. This means some lines previously
62
# counted as new in V2 will be discovered to have actually come from V1.
63
# It is probably necessary to insert V1, then compute a whole new diff
64
# from the mashed ancestors to V2. This must be repeated for every
65
# direct child of V1. The deltas from V2 to its descendents won't change,
66
# although their location within the weave may change. It may be possible
67
# to just adjust the location of those instructions rather than
68
# re-weaving the whole thing. This is expected to be a fairly rare
69
# operation, only used when inserting data that was previously a ghost.
75
from difflib import SequenceMatcher
170
183
For each name, the version number.
173
Descriptive name of this weave; typically the filename if known.
177
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
186
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map']
180
def __init__(self, weave_name=None):
182
190
self._parents = []
185
193
self._name_map = {}
186
self._weave_name = weave_name
190
"""Return a deep copy of self.
192
The copy can be modified without affecting the original weave."""
194
other._weave = self._weave[:]
195
other._parents = self._parents[:]
196
other._sha1s = self._sha1s[:]
197
other._names = self._names[:]
198
other._name_map = self._name_map.copy()
199
other._weave_name = self._weave_name
202
196
def __eq__(self, other):
203
197
if not isinstance(other, Weave):
211
205
return not self.__eq__(other)
214
def maybe_lookup(self, name_or_index):
215
"""Convert possible symbolic name to index, or pass through indexes."""
216
if isinstance(name_or_index, (int, long)):
219
return self.lookup(name_or_index)
222
208
def lookup(self, name):
223
"""Convert symbolic version name to index."""
225
210
return self._name_map[name]
227
raise WeaveError("name %r not present in weave %r" %
228
(name, self._weave_name))
231
def iter_names(self):
232
"""Yield a list of all names in this weave."""
233
return iter(self._names)
235
def idx_to_name(self, version):
236
return self._names[version]
239
def _check_repeated_add(self, name, parents, text, sha1):
240
"""Check that a duplicated add is OK.
242
If it is, return the (old) index; otherwise raise an exception.
244
idx = self.lookup(name)
245
if sorted(self._parents[idx]) != sorted(parents):
246
raise WeaveError("name \"%s\" already present in weave "
247
"with different parents" % name)
248
if sha1 != self._sha1s[idx]:
249
raise WeaveError("name \"%s\" already present in weave "
250
"with different text" % name)
255
def add(self, name, parents, text, sha1=None):
212
raise WeaveError("name %s not present in weave" % name)
215
def add(self, name, parents, text):
256
216
"""Add a single text on top of the weave.
258
218
Returns the index number of the newly added version.
265
225
List or set of direct parent version numbers.
268
Sequence of lines to be added in the new version.
270
sha -- SHA-1 of the file, if known. This is trusted to be
273
from bzrlib.osutils import sha_strings
228
Sequence of lines to be added in the new version."""
275
230
assert isinstance(name, basestring)
277
sha1 = sha_strings(text)
278
231
if name in self._name_map:
279
return self._check_repeated_add(name, parents, text, sha1)
281
parents = map(self.maybe_lookup, parents)
232
raise WeaveError("name %r already present in weave" % name)
282
234
self._check_versions(parents)
283
235
## self._check_lines(text)
284
236
new_version = len(self._parents)
287
243
# if we abort after here the (in-memory) weave will be corrupt because only
288
244
# some fields are updated
337
293
#print 'basis_lines:', basis_lines
338
294
#print 'new_lines: ', lines
296
from difflib import SequenceMatcher
340
297
s = SequenceMatcher(None, basis_lines, text)
342
299
# offset gives the number of lines that have been inserted
378
335
return new_version
380
def add_identical(self, old_rev_id, new_rev_id, parents):
381
"""Add an identical text to old_rev_id as new_rev_id."""
382
old_lines = self.get(self.lookup(old_rev_id))
383
self.add(new_rev_id, parents, old_lines)
385
338
def inclusions(self, versions):
386
339
"""Return set of all ancestors of given version(s)."""
387
340
i = set(versions)
388
for v in xrange(max(versions), 0, -1):
390
# include all its parents
391
i.update(self._parents[v])
393
## except IndexError:
394
## raise ValueError("version %d not present in weave" % v)
397
def parents(self, version):
398
return self._parents[version]
345
# include all its parents
346
i.update(self._parents[v])
350
raise ValueError("version %d not present in weave" % v)
401
353
def minimal_parents(self, version):
441
393
raise IndexError("invalid version number %r" % i)
444
def annotate(self, name_or_index):
445
return list(self.annotate_iter(name_or_index))
448
def annotate_iter(self, name_or_index):
396
def annotate(self, index):
397
return list(self.annotate_iter(index))
400
def annotate_iter(self, version):
449
401
"""Yield list of (index-id, line) pairs for the specified version.
451
403
The index indicates when the line originated in the weave."""
452
incls = [self.maybe_lookup(name_or_index)]
453
for origin, lineno, text in self._extract(incls):
404
for origin, lineno, text in self._extract([version]):
454
405
yield origin, text
557
def get_iter(self, name_or_index):
504
def get_iter(self, version):
558
505
"""Yield lines for the specified version."""
559
incls = [self.maybe_lookup(name_or_index)]
560
for origin, lineno, line in self._extract(incls):
506
for origin, lineno, line in self._extract([version]):
564
def get_text(self, name_or_index):
565
return ''.join(self.get_iter(name_or_index))
566
assert isinstance(version, int)
569
def get_lines(self, name_or_index):
570
return list(self.get_iter(name_or_index))
510
def get(self, index):
511
return list(self.get_iter(index))
576
514
def mash_iter(self, included):
577
515
"""Return composed version of multiple included versions."""
578
included = map(self.maybe_lookup, included)
579
516
for origin, lineno, text in self._extract(included):
769
def join(self, other):
770
"""Integrate versions from other into this weave.
772
The resulting weave contains all the history of both weaves;
773
any version you could retrieve from either self or other can be
774
retrieved from self after this call.
776
It is illegal for the two weaves to contain different values
779
if other.numversions() == 0:
780
return # nothing to update, easy
781
# work through in index order to make sure we get all dependencies
782
for other_idx, name in enumerate(other._names):
783
# TODO: If all the parents of the other version are already
784
# present then we can avoid some work by just taking the delta
785
# and adjusting the offsets.
786
if self._check_version_consistent(other, other_idx, name):
788
new_parents = self._imported_parents(other, other_idx)
789
lines = other.get_lines(other_idx)
790
sha1 = other._sha1s[other_idx]
791
self.add(name, new_parents, lines, sha1)
794
def _imported_parents(self, other, other_idx):
795
"""Return list of parents in self corresponding to indexes in other."""
797
for parent_idx in other._parents[other_idx]:
798
parent_name = other._names[parent_idx]
799
if parent_name not in self._names:
800
# should not be possible
801
raise WeaveError("missing parent {%s} of {%s} in %r"
802
% (parent_name, other._name_map[other_idx], self))
803
new_parents.append(self._name_map[parent_name])
806
def _check_version_consistent(self, other, other_idx, name):
807
"""Check if a version in consistent in this and other.
809
To be consistent it must have:
812
* the same direct parents (by name, not index, and disregarding
815
If present & correct return True;
816
if not present in self return False;
817
if inconsistent raise error."""
818
this_idx = self._name_map.get(name, -1)
820
if self._sha1s[this_idx] != other._sha1s[other_idx]:
821
raise WeaveError("inconsistent texts for version {%s} "
822
"when joining weaves"
824
self_parents = self._parents[this_idx]
825
other_parents = other._parents[other_idx]
826
n1 = [self._names[i] for i in self_parents]
827
n2 = [other._names[i] for i in other_parents]
831
raise WeaveError("inconsistent parents for version {%s}: "
840
712
def weave_toc(w):
854
def weave_stats(weave_file, pb):
726
def weave_stats(weave_file):
727
from bzrlib.progress import ProgressBar
855
728
from bzrlib.weavefile import read_weave
857
732
wf = file(weave_file, 'rb')
858
733
w = read_weave(wf)
859
734
# FIXME: doesn't work on pipes
982
849
sys.stdout.writelines(w.mash_iter(map(int, argv[3:])))
985
from difflib import unified_diff
988
v1, v2 = map(int, argv[3:5])
991
diff_gen = unified_diff(lines1, lines2,
992
'%s version %d' % (fn, v1),
993
'%s version %d' % (fn, v2))
994
sys.stdout.writelines(diff_gen)
996
851
elif cmd == 'annotate':
998
853
# newline is added to all lines regardless; too hard to get