22
22
"""Weave - storage of related text file versions"""
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
28
# making _extract build and return a list, rather than being a generator
31
# with python -O, r923 does 2000 versions in 36.87s
33
# with optimizations to avoid mutating lists - 35.75! I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
37
# r931, which avoids using a generator for extract, does 36.98s
39
# with memoized inclusions, takes 41.49s; not very good
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
46
# with delta folded in and mutation of the list, 36.13s
48
# with all this and simplification of add code, 33s
54
25
# TODO: Perhaps have copy method for Weave instances?
75
46
# TODO: Parallel-extract that passes back each line along with a
76
47
# description of which revisions include it. Nice for checking all
48
# shas or calculating stats in parallel.
79
50
# TODO: Using a single _extract routine and then processing the output
80
51
# is probably inefficient. It's simple enough that we can afford to
84
55
# TODO: Perhaps the API should work only in names to hide the integer
85
56
# indexes from the user?
58
# TODO: Is there any potential performance win by having an add()
59
# variant that is passed a pre-cooked version of the single basis
90
from cStringIO import StringIO
65
from difflib import SequenceMatcher
93
70
class WeaveError(Exception):
211
188
return not self.__eq__(other)
191
def maybe_lookup(self, name_or_index):
192
"""Convert possible symbolic name to index, or pass through indexes."""
193
if isinstance(name_or_index, (int, long)):
196
return self.lookup(name_or_index)
214
199
def lookup(self, name):
200
"""Convert symbolic version name to index."""
216
202
return self._name_map[name]
218
raise WeaveError("name %s not present in weave %s" %
204
raise WeaveError("name %r not present in weave %r" %
219
205
(name, self._weave_name))
222
def add(self, name, parents, text):
208
def idx_to_name(self, version):
209
return self._names[version]
212
def _check_repeated_add(self, name, parents, text, sha1):
213
"""Check that a duplicated add is OK.
215
If it is, return the (old) index; otherwise raise an exception.
217
idx = self.lookup(name)
218
if sorted(self._parents[idx]) != sorted(parents):
219
raise WeaveError("name \"%s\" already present in weave "
220
"with different parents" % name)
221
if sha1 != self._sha1s[idx]:
222
raise WeaveError("name \"%s\" already present in weave "
223
"with different text" % name)
228
def add(self, name, parents, text, sha1=None):
223
229
"""Add a single text on top of the weave.
225
231
Returns the index number of the newly added version.
232
238
List or set of direct parent version numbers.
235
Sequence of lines to be added in the new version."""
241
Sequence of lines to be added in the new version.
243
sha -- SHA-1 of the file, if known. This is trusted to be
246
from bzrlib.osutils import sha_strings
237
248
assert isinstance(name, basestring)
250
sha1 = sha_strings(text)
238
251
if name in self._name_map:
239
raise WeaveError("name %r already present in weave" % name)
252
return self._check_repeated_add(name, parents, text, sha1)
254
parents = map(self.maybe_lookup, parents)
241
255
self._check_versions(parents)
242
256
## self._check_lines(text)
243
257
new_version = len(self._parents)
250
260
# if we abort after here the (in-memory) weave will be corrupt because only
251
261
# some fields are updated
300
310
#print 'basis_lines:', basis_lines
301
311
#print 'new_lines: ', lines
303
from difflib import SequenceMatcher
304
313
s = SequenceMatcher(None, basis_lines, text)
306
315
# offset gives the number of lines that have been inserted
342
351
return new_version
353
def add_identical(self, old_rev_id, new_rev_id, parents):
354
"""Add an identical text to old_rev_id as new_rev_id."""
355
old_lines = self.get(self.lookup(old_rev_id))
356
self.add(new_rev_id, parents, old_lines)
345
358
def inclusions(self, versions):
346
359
"""Return set of all ancestors of given version(s)."""
347
360
i = set(versions)
352
# include all its parents
353
i.update(self._parents[v])
357
raise ValueError("version %d not present in weave" % v)
361
for v in xrange(max(versions), 0, -1):
363
# include all its parents
364
i.update(self._parents[v])
366
## except IndexError:
367
## raise ValueError("version %d not present in weave" % v)
370
def parents(self, version):
371
return self._parents[version]
360
374
def minimal_parents(self, version):
400
414
raise IndexError("invalid version number %r" % i)
403
def annotate(self, index):
404
return list(self.annotate_iter(index))
407
def annotate_iter(self, version):
417
def annotate(self, name_or_index):
418
return list(self.annotate_iter(name_or_index))
421
def annotate_iter(self, name_or_index):
408
422
"""Yield list of (index-id, line) pairs for the specified version.
410
424
The index indicates when the line originated in the weave."""
411
for origin, lineno, text in self._extract([version]):
425
incls = [self.maybe_lookup(name_or_index)]
426
for origin, lineno, text in self._extract(incls):
412
427
yield origin, text
515
def get_iter(self, version):
530
def get_iter(self, name_or_index):
516
531
"""Yield lines for the specified version."""
517
for origin, lineno, line in self._extract([version]):
532
incls = [self.maybe_lookup(name_or_index)]
533
for origin, lineno, line in self._extract(incls):
521
def get_text(self, version):
537
def get_text(self, name_or_index):
538
return ''.join(self.get_iter(name_or_index))
522
539
assert isinstance(version, int)
524
s.writelines(self.get_iter(version))
528
def get(self, index):
529
return list(self.get_iter(index))
542
def get_lines(self, name_or_index):
543
return list(self.get_iter(name_or_index))
532
549
def mash_iter(self, included):
533
550
"""Return composed version of multiple included versions."""
551
included = map(self.maybe_lookup, included)
534
552
for origin, lineno, text in self._extract(included):
744
def weave_stats(weave_file):
745
from bzrlib.progress import ProgressBar
762
def weave_stats(weave_file, pb):
746
763
from bzrlib.weavefile import read_weave
750
765
wf = file(weave_file, 'rb')
751
766
w = read_weave(wf)
752
767
# FIXME: doesn't work on pipes
757
772
for i in range(vers):
758
773
pb.update('checking sizes', i, vers)
759
for line in w.get_iter(i):
774
for origin, lineno, line in w._extract([i]):
760
775
total += len(line)
793
808
Display composite of all selected versions.
794
809
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
795
810
Auto-merge two versions and display conflicts.
811
weave diff WEAVEFILE VERSION1 VERSION2
812
Show differences between two versions.
826
from weavefile import write_weave, read_weave
846
# in case we're run directly from the subdirectory
847
sys.path.append('..')
849
from bzrlib.weavefile import write_weave, read_weave
827
850
from bzrlib.progress import ProgressBar
867
890
sys.stdout.writelines(w.mash_iter(map(int, argv[3:])))
893
from difflib import unified_diff
896
v1, v2 = map(int, argv[3:5])
899
diff_gen = unified_diff(lines1, lines2,
900
'%s version %d' % (fn, v1),
901
'%s version %d' % (fn, v2))
902
sys.stdout.writelines(diff_gen)
869
904
elif cmd == 'annotate':
871
906
# newline is added to all lines regardless; too hard to get