22
22
"""Weave - storage of related text file versions"""
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
28
# making _extract build and return a list, rather than being a generator
24
31
# TODO: Perhaps have copy method for Weave instances?
26
33
# XXX: If we do weaves this way, will a merge still behave the same
27
34
# way if it's done in a different order? That's a pretty desirable
30
# TODO: How to write these to disk? One option is cPickle, which
31
# would be fast but less friendly to C, and perhaps not portable. Another is
33
37
# TODO: Nothing here so far assumes the lines are really \n newlines,
34
38
# rather than being split up in some other way. We could accomodate
35
39
# binaries, perhaps by naively splitting on \n or perhaps using
36
40
# something like a rolling checksum.
38
# TODO: Perhaps track SHA-1 in the header for protection? This would
39
# be redundant with it being stored in the inventory, but perhaps
42
42
# TODO: Track version names as well as indexes.
44
# TODO: Probably do transitive expansion when specifying parents?
46
# TODO: Separate out some code to read and write weaves.
48
44
# TODO: End marker for each version so we can stop reading?
50
46
# TODO: Check that no insertion occurs inside a deletion that was
51
47
# active in the version of the insertion.
53
# TODO: Perhaps a special slower check() method that verifies more
54
# nesting constraints and the MD5 of each version?
62
from sets import Set, ImmutableSet
64
frozenset = ImmutableSet
49
# TODO: In addition to the SHA-1 check, perhaps have some code that
50
# checks structural constraints of the weave: ie that insertions are
51
# properly nested, that there is no text outside of an insertion, that
52
# insertions or deletions are not repeated, etc.
54
# TODO: Make the info command just show info, not extract everything:
55
# it can be much faster.
57
# TODO: Perhaps use long integers as sets instead of set objects; may
60
# TODO: Parallel-extract that passes back each line along with a
61
# description of which revisions include it. Nice for checking all
68
67
class WeaveError(Exception):
187
186
sha1 = s.hexdigest()
189
# TODO: It'd probably be faster to append things on to a new
190
# list rather than modifying the existing one, which is likely
191
# to cause a lot of copying.
191
194
ancestors = self.inclusions(parents)
192
195
delta = self._delta(ancestors, text)
310
313
yield origin, text
320
(lineno, insert, deletes, text)
321
for each literal line.
327
lineno = 0 # line of weave, 0-based
330
if isinstance(l, tuple):
343
raise WeaveFormatError('unexpected instruction %r'
346
assert isinstance(l, basestring)
348
yield lineno, istack[-1], dset, l
313
353
def _extract(self, versions):
314
354
"""Yield annotation of lines in included set.
378
423
def mash_iter(self, included):
379
424
"""Return composed version of multiple included versions."""
380
included = frozenset(included)
381
425
for origin, lineno, text in self._extract(included):
503
547
yield real_i1, real_i2, lines[j1:j2]
551
def plan_merge(self, ver_a, ver_b):
552
"""Return pseudo-annotation indicating how the two versions merge.
554
This is computed between versions a and b and their common
557
Weave lines present in none of them are skipped entirely.
559
inc_a = self.inclusions([ver_a])
560
inc_b = self.inclusions([ver_b])
561
inc_c = inc_a & inc_b
563
for lineno, insert, deleteset, line in self._walk():
564
if deleteset & inc_c:
565
# killed in parent; can't be in either a or b
566
# not relevant to our work
567
yield 'killed-base', line
568
elif insert in inc_c:
569
# was inserted in base
570
killed_a = bool(deleteset & inc_a)
571
killed_b = bool(deleteset & inc_b)
572
if killed_a and killed_b:
573
yield 'killed-both', line
575
yield 'killed-a', line
577
yield 'killed-b', line
579
yield 'unchanged', line
580
elif insert in inc_a:
581
if deleteset & inc_a:
582
yield 'ghost-a', line
586
elif insert in inc_b:
587
if deleteset & inc_b:
588
yield 'ghost-b', line
592
# not in either revision
593
yield 'irrelevant', line
595
yield 'unchanged', '' # terminator
599
def weave_merge(self, plan):
604
for state, line in plan:
605
if state == 'unchanged' or state == 'killed-both':
606
# resync and flush queued conflicts changes if any
607
if not lines_a and not lines_b:
609
elif ch_a and not ch_b:
611
for l in lines_a: yield l
612
elif ch_b and not ch_a:
613
for l in lines_b: yield l
614
elif lines_a == lines_b:
615
for l in lines_a: yield l
618
for l in lines_a: yield l
620
for l in lines_b: yield l
627
if state == 'unchanged':
630
elif state == 'killed-a':
633
elif state == 'killed-b':
636
elif state == 'new-a':
639
elif state == 'new-b':
643
assert state in ('irrelevant', 'ghost-a', 'ghost-b', 'killed-base',
507
653
def weave_info(filename, out):
508
654
"""Show some text information about the weave."""
652
798
print ' '.join(map(str, w._v[int(argv[3])]))
800
elif cmd == 'plan-merge':
802
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
804
print '%14s | %s' % (state, line),
654
806
elif cmd == 'merge':
808
p = w.plan_merge(int(argv[3]), int(argv[4]))
809
sys.stdout.writelines(w.weave_merge(p))
811
elif cmd == 'mash-merge':
655
812
if len(argv) != 5: