46
46
# TODO: Separate out some code to read and write weaves.
48
# TODO: End marker for each version so we can stop reading?
48
# TODO: End marker for each version?
50
50
# TODO: Check that no insertion occurs inside a deletion that was
51
51
# active in the version of the insertion.
53
# TODO: Perhaps a special slower check() method that verifies more
54
# nesting constraints and the MD5 of each version?
149
145
versions included in those versions are included transitively.
150
146
So new versions created from nothing list []; most versions
151
147
have a single entry; some have more.
154
List of hex SHA-1 of each version, or None if not recorded.
156
149
def __init__(self):
162
155
def __eq__(self, other):
183
176
Sequence of lines to be added in the new version."""
184
## self._check_versions(parents)
185
## self._check_lines(text)
177
self._check_versions(parents)
178
self._check_lines(text)
186
180
idx = len(self._v)
196
183
delta = self._delta(self.inclusions(parents), text)
308
296
# the stack and the other that processes the results -- but
309
297
# I'm not sure it's really needed.
311
# TODO: In fact, I think we only need to store the *count* of
312
# active insertions and deletions, and we can maintain that by
313
# just by just counting as we go along.
315
299
WFE = WeaveFormatError
317
301
for l in self._l:
318
302
if isinstance(l, tuple):
319
isactive = None # recalculate
322
305
if istack and (istack[-1] >= v):
358
340
raise WFE("literal at top level on line %d"
361
isactive = (istack[-1] in included) \
362
and not included.intersection(dset)
342
isactive = (istack[-1] in included) \
343
and not included.intersection(dset)
364
345
origin = istack[-1]
365
346
yield origin, lineno, l
383
364
return list(self.get_iter(index))
386
def mash_iter(self, included):
367
def merge_iter(self, included):
387
368
"""Return composed version of multiple included versions."""
388
369
included = frozenset(included)
389
370
for origin, lineno, text in self._extract(included):
398
379
pprint(self._v, to_file)
402
def numversions(self):
404
assert l == len(self._sha1s)
409
# check no circular inclusions
410
for version in range(self.numversions()):
411
inclusions = list(self._v[version])
414
if inclusions[-1] >= version:
383
for vers_info in self._v:
385
for vi in vers_info[0]:
386
if vi < 0 or vi >= index:
415
387
raise WeaveFormatError("invalid included version %d for index %d"
416
% (inclusions[-1], version))
418
# try extracting all versions; this is a bit slow and parallel
419
# extraction could be used
421
for version in range(self.numversions()):
423
for l in self.get_iter(version):
426
expected = self._sha1s[version]
428
raise WeaveError("mismatched sha1 for version %d; "
429
"got %s, expected %s"
430
% (version, hd, expected))
434
def merge(self, merge_versions):
435
"""Automerge and mark conflicts between versions.
437
This returns a sequence, each entry describing alternatives
438
for a chunk of the file. Each of the alternatives is given as
441
If there is a chunk of the file where there's no diagreement,
442
only one alternative is given.
445
# approach: find the included versions common to all the
447
raise NotImplementedError()
390
raise WeaveFormatError("repeated included version %d for index %d"
467
412
If line1=line2, this is a pure insert; if newlines=[] this is a
468
413
pure delete. (Similar to difflib.)
416
self._check_versions(included)
418
##from pprint import pprint
420
# first get basis for comparison
421
# basis holds (lineno, origin, line)
470
427
# basis a list of (origin, lineno, line)
473
for origin, lineno, line in self._extract(included):
474
basis_lineno.append(lineno)
475
basis_lines.append(line)
428
basis = list(self._extract(included))
430
# now make a parallel list with only the text, to pass to the differ
431
basis_lines = [line for (origin, lineno, line) in basis]
477
433
# add a sentinal, because we can also match against the final line
478
basis_lineno.append(len(self._l))
434
basis.append((None, len(self._l), None))
480
436
# XXX: which line of the weave should we really consider
481
437
# matches the end of the file? the current code says it's the
484
440
from difflib import SequenceMatcher
485
441
s = SequenceMatcher(None, basis_lines, lines)
443
##print 'basis sequence:'
487
446
# TODO: Perhaps return line numbers from composed weave as well?
489
448
for tag, i1, i2, j1, j2 in s.get_opcodes():
495
454
# i1,i2 are given in offsets within basis_lines; we need to map them
496
455
# back to offsets within the entire weave
497
real_i1 = basis_lineno[i1]
498
real_i2 = basis_lineno[i2]
456
real_i1 = basis[i1][1]
457
real_i2 = basis[i2][1]
508
def weave_info(filename, out):
509
"""Show some text information about the weave."""
510
from weavefile import read_weave
511
wf = file(filename, 'rb')
513
# FIXME: doesn't work on pipes
514
weave_size = wf.tell()
515
print >>out, "weave file size %d bytes" % weave_size
516
print >>out, "weave contains %d versions" % len(w._v)
519
print ' %8s %8s %8s %s' % ('version', 'lines', 'bytes', 'sha1')
520
print ' -------- -------- -------- ----------------------------------------'
521
for i in range(len(w._v)):
524
bytes = sum((len(a) for a in text))
526
print ' %8d %8d %8d %s' % (i, lines, bytes, sha1)
529
print >>out, "versions total %d bytes" % total
530
print >>out, "compression ratio %.3f" % (float(total)/float(weave_size))
537
from weavefile import write_weave_v1, read_weave
471
from weavefile import write_weave_v1, read_weave_v1
540
w = read_weave(file(argv[2], 'rb'))
474
w = read_weave_v1(file(argv[2], 'rb'))
541
475
# at the moment, based on everything in the file
542
476
parents = set(range(len(w._v)))
543
477
lines = sys.stdin.readlines()
552
486
write_weave_v1(w, file(fn, 'wb'))
553
487
elif cmd == 'get':
554
w = read_weave(file(argv[2], 'rb'))
555
sys.stdout.writelines(w.get_iter(int(argv[3])))
488
w = read_weave_v1(file(argv[2], 'rb'))
489
sys.stdout.writelines(w.getiter(int(argv[3])))
556
490
elif cmd == 'annotate':
557
w = read_weave(file(argv[2], 'rb'))
491
w = read_weave_v1(file(argv[2], 'rb'))
558
492
# newline is added to all lines regardless; too hard to get
559
493
# reasonable formatting otherwise
566
500
print '%5d | %s' % (origin, text)
569
weave_info(argv[2], sys.stdout)
571
w = read_weave(file(argv[2], 'rb'))
574
503
raise ValueError('unknown command %r' % cmd)