22
22
"""Weave - storage of related text file versions"""
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
28
# making _extract build and return a list, rather than being a generator
31
# with python -O, r923 does 2000 versions in 36.87s
33
# with optimizations to avoid mutating lists - 35.75! I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
37
# r931, which avoids using a generator for extract, does 36.98s
39
# with memoized inclusions, takes 41.49s; not very good
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
46
# with delta folded in and mutation of the list, 36.13s
48
# with all this and simplification of add code, 33s
51
# TODO: Perhaps have copy method for Weave instances?
53
25
# XXX: If we do weaves this way, will a merge still behave the same
54
26
# way if it's done in a different order? That's a pretty desirable
74
44
# TODO: Parallel-extract that passes back each line along with a
75
45
# description of which revisions include it. Nice for checking all
46
# shas or calculating stats in parallel.
48
# TODO: Using a single _extract routine and then processing the output
49
# is probably inefficient. It's simple enough that we can afford to
50
# have slight specializations for different ways its used: annotate,
51
# basis for add, get, etc.
53
# TODO: Perhaps the API should work only in names to hide the integer
54
# indexes from the user?
56
# TODO: Is there any potential performance win by having an add()
57
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Have a way to go back and insert a revision V1 that is a parent
61
# of an already-stored revision V2. This means some lines previously
62
# counted as new in V2 will be discovered to have actually come from V1.
63
# It is probably necessary to insert V1, then compute a whole new diff
64
# from the mashed ancestors to V2. This must be repeated for every
65
# direct child of V1. The deltas from V2 to its descendents won't change,
66
# although their location within the weave may change. It may be possible
67
# to just adjust the location of those instructions rather than
68
# re-weaving the whole thing. This is expected to be a fairly rare
69
# operation, only used when inserting data that was previously a ghost.
75
from difflib import SequenceMatcher
160
161
each version; the parent's parents are implied.
163
List of hex SHA-1 of each version, or None if not recorded.
164
List of hex SHA-1 of each version.
167
List of symbolic names for each version. Each should be unique.
170
For each name, the version number.
173
Descriptive name of this weave; typically the filename if known.
166
__slots__ = ['_weave', '_parents', '_sha1s']
177
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
180
def __init__(self, weave_name=None):
170
182
self._parents = []
186
self._weave_name = weave_name
190
"""Return a deep copy of self.
192
The copy can be modified without affecting the original weave."""
194
other._weave = self._weave[:]
195
other._parents = self._parents[:]
196
other._sha1s = self._sha1s[:]
197
other._names = self._names[:]
198
other._name_map = self._name_map.copy()
199
other._weave_name = self._weave_name
174
202
def __eq__(self, other):
175
203
if not isinstance(other, Weave):
177
205
return self._parents == other._parents \
178
and self._weave == other._weave
206
and self._weave == other._weave \
207
and self._sha1s == other._sha1s
181
210
def __ne__(self, other):
182
211
return not self.__eq__(other)
185
def add(self, parents, text):
214
def maybe_lookup(self, name_or_index):
215
"""Convert possible symbolic name to index, or pass through indexes."""
216
if isinstance(name_or_index, (int, long)):
219
return self.lookup(name_or_index)
222
def lookup(self, name):
223
"""Convert symbolic version name to index."""
225
return self._name_map[name]
227
raise WeaveError("name %r not present in weave %r" %
228
(name, self._weave_name))
231
def iter_names(self):
232
"""Yield a list of all names in this weave."""
233
return iter(self._names)
235
def idx_to_name(self, version):
236
return self._names[version]
239
def _check_repeated_add(self, name, parents, text, sha1):
240
"""Check that a duplicated add is OK.
242
If it is, return the (old) index; otherwise raise an exception.
244
idx = self.lookup(name)
245
if sorted(self._parents[idx]) != sorted(parents):
246
raise WeaveError("name \"%s\" already present in weave "
247
"with different parents" % name)
248
if sha1 != self._sha1s[idx]:
249
raise WeaveError("name \"%s\" already present in weave "
250
"with different text" % name)
255
def add(self, name, parents, text, sha1=None):
186
256
"""Add a single text on top of the weave.
188
258
Returns the index number of the newly added version.
261
Symbolic name for this version.
262
(Typically the revision-id of the revision that added it.)
191
265
List or set of direct parent version numbers.
194
Sequence of lines to be added in the new version."""
268
Sequence of lines to be added in the new version.
270
sha -- SHA-1 of the file, if known. This is trusted to be
273
from bzrlib.osutils import sha_strings
275
assert isinstance(name, basestring)
277
sha1 = sha_strings(text)
278
if name in self._name_map:
279
return self._check_repeated_add(name, parents, text, sha1)
281
parents = map(self.maybe_lookup, parents)
196
282
self._check_versions(parents)
197
283
## self._check_lines(text)
198
284
new_version = len(self._parents)
206
# if we abort after here the weave will be corrupt
207
self._parents.append(frozenset(parents))
287
# if we abort after here the (in-memory) weave will be corrupt because only
288
# some fields are updated
289
self._parents.append(parents[:])
208
290
self._sha1s.append(sha1)
291
self._names.append(name)
292
self._name_map[name] = new_version
287
371
# we don't destroy ourselves
289
373
self._weave[i:i] = ([('{', new_version)]
291
+ [('}', new_version)])
292
376
offset += 2 + (j2 - j1)
294
378
return new_version
380
def add_identical(self, old_rev_id, new_rev_id, parents):
381
"""Add an identical text to old_rev_id as new_rev_id."""
382
old_lines = self.get(self.lookup(old_rev_id))
383
self.add(new_rev_id, parents, old_lines)
297
385
def inclusions(self, versions):
298
386
"""Return set of all ancestors of given version(s)."""
299
387
i = set(versions)
304
# include all its parents
305
i.update(self._parents[v])
309
raise ValueError("version %d not present in weave" % v)
388
for v in xrange(max(versions), 0, -1):
390
# include all its parents
391
i.update(self._parents[v])
393
## except IndexError:
394
## raise ValueError("version %d not present in weave" % v)
397
def parents(self, version):
398
return self._parents[version]
312
401
def minimal_parents(self, version):
352
441
raise IndexError("invalid version number %r" % i)
355
def annotate(self, index):
356
return list(self.annotate_iter(index))
359
def annotate_iter(self, version):
444
def annotate(self, name_or_index):
445
return list(self.annotate_iter(name_or_index))
448
def annotate_iter(self, name_or_index):
360
449
"""Yield list of (index-id, line) pairs for the specified version.
362
451
The index indicates when the line originated in the weave."""
363
for origin, lineno, text in self._extract([version]):
452
incls = [self.maybe_lookup(name_or_index)]
453
for origin, lineno, text in self._extract(incls):
364
454
yield origin, text
464
def get_iter(self, version):
557
def get_iter(self, name_or_index):
465
558
"""Yield lines for the specified version."""
466
for origin, lineno, line in self._extract([version]):
559
incls = [self.maybe_lookup(name_or_index)]
560
for origin, lineno, line in self._extract(incls):
470
def get(self, index):
471
return list(self.get_iter(index))
564
def get_text(self, name_or_index):
565
return ''.join(self.get_iter(name_or_index))
566
assert isinstance(version, int)
569
def get_lines(self, name_or_index):
570
return list(self.get_iter(name_or_index))
474
576
def mash_iter(self, included):
475
577
"""Return composed version of multiple included versions."""
578
included = map(self.maybe_lookup, included)
476
579
for origin, lineno, text in self._extract(included):
674
"""Show some text information about the weave."""
675
print '%6s %40s %20s' % ('ver', 'sha1', 'parents')
676
for i in (6, 40, 20):
769
def join(self, other):
770
"""Integrate versions from other into this weave.
772
The resulting weave contains all the history of both weaves;
773
any version you could retrieve from either self or other can be
774
retrieved from self after this call.
776
It is illegal for the two weaves to contain different values
779
if other.numversions() == 0:
780
return # nothing to update, easy
781
# work through in index order to make sure we get all dependencies
782
for other_idx, name in enumerate(other._names):
783
# TODO: If all the parents of the other version are already
784
# present then we can avoid some work by just taking the delta
785
# and adjusting the offsets.
786
if self._check_version_consistent(other, other_idx, name):
788
new_parents = self._imported_parents(other, other_idx)
789
lines = other.get_lines(other_idx)
790
sha1 = other._sha1s[other_idx]
791
self.add(name, new_parents, lines, sha1)
794
def _imported_parents(self, other, other_idx):
795
"""Return list of parents in self corresponding to indexes in other."""
797
for parent_idx in other._parents[other_idx]:
798
parent_name = other._names[parent_idx]
799
if parent_name not in self._names:
800
# should not be possible
801
raise WeaveError("missing parent {%s} of {%s} in %r"
802
% (parent_name, other._name_map[other_idx], self))
803
new_parents.append(self._name_map[parent_name])
806
def _check_version_consistent(self, other, other_idx, name):
807
"""Check if a version in consistent in this and other.
809
If present & correct return True;
810
if not present in self return False;
811
if inconsistent raise error."""
812
this_idx = self._name_map.get(name, -1)
814
if self._sha1s[this_idx] != other._sha1s[other_idx]:
815
raise WeaveError("inconsistent texts for version {%s} in %r and %r"
816
% (name, self, other))
817
elif set(self._parents[this_idx]) != set(other._parents[other_idx]):
818
raise WeaveError("inconsistent parents for version {%s} in %r and %r"
819
% (name, self, other))
827
"""Show the weave's table-of-contents"""
828
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
829
for i in (6, 50, 10, 10):
679
832
for i in range(w.numversions()):
680
833
sha1 = w._sha1s[i]
681
print '%6d %40s %s' % (i, sha1, ' '.join(map(str, w._parents[i])))
685
def weave_stats(weave_file):
686
from bzrlib.progress import ProgressBar
835
parent_str = ' '.join(map(str, w._parents[i]))
836
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
840
def weave_stats(weave_file, pb):
687
841
from bzrlib.weavefile import read_weave
691
843
wf = file(weave_file, 'rb')
692
844
w = read_weave(wf)
693
845
# FIXME: doesn't work on pipes
731
886
Display composite of all selected versions.
732
887
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
733
888
Auto-merge two versions and display conflicts.
889
weave diff WEAVEFILE VERSION1 VERSION2
890
Show differences between two versions.
737
894
% weave init foo.weave
739
% weave add foo.weave < foo.txt
896
% weave add foo.weave ver0 < foo.txt
742
899
(create updated version)
744
901
% weave get foo.weave 0 | diff -u - foo.txt
745
% weave add foo.weave 0 < foo.txt
902
% weave add foo.weave ver1 0 < foo.txt
748
905
% weave get foo.weave 0 > foo.txt (create forked version)
750
% weave add foo.weave 0 < foo.txt
907
% weave add foo.weave ver2 0 < foo.txt
753
910
% weave merge foo.weave 1 2 > foo.txt (merge them)
754
911
% vi foo.txt (resolve conflicts)
755
% weave add foo.weave 1 2 < foo.txt (commit merged version)
912
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)