22
22
"""Weave - storage of related text file versions"""
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
28
# making _extract build and return a list, rather than being a generator
31
# with python -O, r923 does 2000 versions in 36.87s
33
# with optimizations to avoid mutating lists - 35.75! I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
37
# r931, which avoids using a generator for extract, does 36.98s
39
# with memoized inclusions, takes 41.49s; not very good
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
46
# with delta folded in and mutation of the list, 36.13s
48
# with all this and simplification of add code, 33s
54
24
# TODO: Perhaps have copy method for Weave instances?
56
26
# XXX: If we do weaves this way, will a merge still behave the same
165
129
should be no way to get an earlier version deleting a later
169
Text of the weave; list of control instruction tuples and strings.
172
136
List of parents, indexed by version number.
173
137
It is only necessary to store the minimal set of parents for
174
138
each version; the parent's parents are implied.
177
List of hex SHA-1 of each version.
180
List of symbolic names for each version. Each should be unique.
183
For each name, the version number.
141
List of hex SHA-1 of each version, or None if not recorded.
186
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map']
188
143
def __init__(self):
196
149
def __eq__(self, other):
197
150
if not isinstance(other, Weave):
199
return self._parents == other._parents \
200
and self._weave == other._weave \
201
and self._sha1s == other._sha1s
152
return self._v == other._v \
153
and self._l == other._l
204
156
def __ne__(self, other):
205
157
return not self.__eq__(other)
208
def lookup(self, name):
210
return self._name_map[name]
212
raise WeaveError("name %s not present in weave" % name)
215
def add(self, name, parents, text):
160
def add(self, parents, text):
216
161
"""Add a single text on top of the weave.
218
163
Returns the index number of the newly added version.
221
Symbolic name for this version.
222
(Typically the revision-id of the revision that added it.)
225
166
List or set of direct parent version numbers.
228
169
Sequence of lines to be added in the new version."""
230
assert isinstance(name, basestring)
231
if name in self._name_map:
232
raise WeaveError("name %r already present in weave" % name)
234
self._check_versions(parents)
170
## self._check_versions(parents)
235
171
## self._check_lines(text)
236
new_version = len(self._parents)
240
178
sha1 = s.hexdigest()
243
# if we abort after here the (in-memory) weave will be corrupt because only
244
# some fields are updated
245
self._parents.append(parents[:])
182
ancestors = self.inclusions(parents)
183
delta = self._delta(ancestors, text)
185
# offset gives the number of lines that have been inserted
186
# into the weave up to the current point; if the original edit instruction
187
# says to change line A then we actually change (A+offset)
190
for i1, i2, newlines in delta:
193
assert i2 <= len(self._l)
195
# the deletion and insertion are handled separately.
196
# first delete the region.
198
self._l.insert(i1+offset, ('[', idx))
199
self._l.insert(i2+offset+1, (']', idx))
204
# there may have been a deletion spanning up to
205
# i2; we want to insert after this region to make sure
206
# we don't destroy ourselves
208
self._l[i:i] = [('{', idx)] \
211
offset += 2 + len(newlines)
213
self._addversion(parents)
215
# special case; adding with no parents revision; can do this
216
# more quickly by just appending unconditionally
217
self._l.append(('{', idx))
219
self._l.append(('}', idx))
221
self._addversion(None)
246
223
self._sha1s.append(sha1)
247
self._names.append(name)
248
self._name_map[name] = new_version
252
# special case; adding with no parents revision; can do
253
# this more quickly by just appending unconditionally.
254
# even more specially, if we're adding an empty text we
255
# need do nothing at all.
257
self._weave.append(('{', new_version))
258
self._weave.extend(text)
259
self._weave.append(('}', None))
263
if len(parents) == 1:
264
pv = list(parents)[0]
265
if sha1 == self._sha1s[pv]:
266
# special case: same as the single parent
270
ancestors = self.inclusions(parents)
274
# basis a list of (origin, lineno, line)
277
for origin, lineno, line in self._extract(ancestors):
278
basis_lineno.append(lineno)
279
basis_lines.append(line)
281
# another small special case: a merge, producing the same text
283
if text == basis_lines:
286
# add a sentinal, because we can also match against the final line
287
basis_lineno.append(len(self._weave))
289
# XXX: which line of the weave should we really consider
290
# matches the end of the file? the current code says it's the
291
# last line of the weave?
293
#print 'basis_lines:', basis_lines
294
#print 'new_lines: ', lines
296
from difflib import SequenceMatcher
297
s = SequenceMatcher(None, basis_lines, text)
299
# offset gives the number of lines that have been inserted
300
# into the weave up to the current point; if the original edit instruction
301
# says to change line A then we actually change (A+offset)
304
for tag, i1, i2, j1, j2 in s.get_opcodes():
305
# i1,i2 are given in offsets within basis_lines; we need to map them
306
# back to offsets within the entire weave
307
#print 'raw match', tag, i1, i2, j1, j2
311
i1 = basis_lineno[i1]
312
i2 = basis_lineno[i2]
314
assert 0 <= j1 <= j2 <= len(text)
316
#print tag, i1, i2, j1, j2
318
# the deletion and insertion are handled separately.
319
# first delete the region.
321
self._weave.insert(i1+offset, ('[', new_version))
322
self._weave.insert(i2+offset+1, (']', new_version))
326
# there may have been a deletion spanning up to
327
# i2; we want to insert after this region to make sure
328
# we don't destroy ourselves
330
self._weave[i:i] = ([('{', new_version)]
333
offset += 2 + (j2 - j1)
338
228
def inclusions(self, versions):
514
369
def mash_iter(self, included):
515
370
"""Return composed version of multiple included versions."""
371
included = frozenset(included)
516
372
for origin, lineno, text in self._extract(included):
520
376
def dump(self, to_file):
521
377
from pprint import pprint
522
print >>to_file, "Weave._weave = ",
523
pprint(self._weave, to_file)
524
print >>to_file, "Weave._parents = ",
525
pprint(self._parents, to_file)
378
print >>to_file, "Weave._l = ",
379
pprint(self._l, to_file)
380
print >>to_file, "Weave._v = ",
381
pprint(self._v, to_file)
529
385
def numversions(self):
530
l = len(self._parents)
531
387
assert l == len(self._sha1s)
536
return self.numversions()
539
391
def check(self, progress_bar=None):
540
392
# check no circular inclusions
541
393
for version in range(self.numversions()):
542
inclusions = list(self._parents[version])
394
inclusions = list(self._v[version])
544
396
inclusions.sort()
545
397
if inclusions[-1] >= version:
604
457
If line1=line2, this is a pure insert; if newlines=[] this is a
605
458
pure delete. (Similar to difflib.)
610
def plan_merge(self, ver_a, ver_b):
611
"""Return pseudo-annotation indicating how the two versions merge.
613
This is computed between versions a and b and their common
616
Weave lines present in none of them are skipped entirely.
618
inc_a = self.inclusions([ver_a])
619
inc_b = self.inclusions([ver_b])
620
inc_c = inc_a & inc_b
622
for lineno, insert, deleteset, line in self._walk():
623
if deleteset & inc_c:
624
# killed in parent; can't be in either a or b
625
# not relevant to our work
626
yield 'killed-base', line
627
elif insert in inc_c:
628
# was inserted in base
629
killed_a = bool(deleteset & inc_a)
630
killed_b = bool(deleteset & inc_b)
631
if killed_a and killed_b:
632
yield 'killed-both', line
634
yield 'killed-a', line
636
yield 'killed-b', line
638
yield 'unchanged', line
639
elif insert in inc_a:
640
if deleteset & inc_a:
641
yield 'ghost-a', line
645
elif insert in inc_b:
646
if deleteset & inc_b:
647
yield 'ghost-b', line
651
# not in either revision
652
yield 'irrelevant', line
654
yield 'unchanged', '' # terminator
658
def weave_merge(self, plan):
663
for state, line in plan:
664
if state == 'unchanged' or state == 'killed-both':
665
# resync and flush queued conflicts changes if any
666
if not lines_a and not lines_b:
668
elif ch_a and not ch_b:
670
for l in lines_a: yield l
671
elif ch_b and not ch_a:
672
for l in lines_b: yield l
673
elif lines_a == lines_b:
674
for l in lines_a: yield l
677
for l in lines_a: yield l
679
for l in lines_b: yield l
686
if state == 'unchanged':
689
elif state == 'killed-a':
692
elif state == 'killed-b':
695
elif state == 'new-a':
698
elif state == 'new-b':
702
assert state in ('irrelevant', 'ghost-a', 'ghost-b', 'killed-base',
713
"""Show the weave's table-of-contents"""
714
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
715
for i in (6, 50, 10, 10):
718
for i in range(w.numversions()):
721
parent_str = ' '.join(map(str, w._parents[i]))
722
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
726
def weave_stats(weave_file):
727
from bzrlib.progress import ProgressBar
728
from bzrlib.weavefile import read_weave
732
wf = file(weave_file, 'rb')
460
# basis a list of (origin, lineno, line)
463
for origin, lineno, line in self._extract(included):
464
basis_lineno.append(lineno)
465
basis_lines.append(line)
467
# add a sentinal, because we can also match against the final line
468
basis_lineno.append(len(self._l))
470
# XXX: which line of the weave should we really consider
471
# matches the end of the file? the current code says it's the
472
# last line of the weave?
474
from difflib import SequenceMatcher
475
s = SequenceMatcher(None, basis_lines, lines)
477
# TODO: Perhaps return line numbers from composed weave as well?
479
for tag, i1, i2, j1, j2 in s.get_opcodes():
480
##print tag, i1, i2, j1, j2
485
# i1,i2 are given in offsets within basis_lines; we need to map them
486
# back to offsets within the entire weave
487
real_i1 = basis_lineno[i1]
488
real_i2 = basis_lineno[i2]
492
assert j2 <= len(lines)
494
yield real_i1, real_i2, lines[j1:j2]
498
def weave_info(filename, out):
499
"""Show some text information about the weave."""
500
from weavefile import read_weave
501
wf = file(filename, 'rb')
733
502
w = read_weave(wf)
734
503
# FIXME: doesn't work on pipes
735
504
weave_size = wf.tell()
505
print >>out, "weave file size %d bytes" % weave_size
506
print >>out, "weave contains %d versions" % len(w._v)
739
for i in range(vers):
740
pb.update('checking sizes', i, vers)
741
for line in w.get_iter(i):
746
print 'versions %9d' % vers
747
print 'weave file %9d bytes' % weave_size
748
print 'total contents %9d bytes' % total
749
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
752
print 'average size %9d bytes' % avg
753
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
509
print '%6s %6s %8s %40s %20s' % ('ver', 'lines', 'bytes', 'sha1', 'parents')
510
for i in (6, 6, 8, 40, 20):
513
for i in range(len(w._v)):
516
bytes = sum((len(a) for a in text))
518
print '%6d %6d %8d %40s' % (i, lines, bytes, sha1),
524
print >>out, "versions total %d bytes" % total
525
print >>out, "compression ratio %.3f" % (float(total)/float(weave_size))