269
199
return self._parents == other._parents \
270
200
and self._weave == other._weave \
271
201
and self._sha1s == other._sha1s
273
204
def __ne__(self, other):
274
205
return not self.__eq__(other)
276
def _idx_to_name(self, version):
277
return self._names[version]
279
def _lookup(self, name):
280
"""Convert symbolic version name to index."""
281
self.check_not_reserved_id(name)
208
def lookup(self, name):
283
210
return self._name_map[name]
285
raise RevisionNotPresent(name, self._weave_name)
288
"""See VersionedFile.versions."""
289
return self._names[:]
291
def has_version(self, version_id):
292
"""See VersionedFile.has_version."""
293
return (version_id in self._name_map)
295
__contains__ = has_version
297
def get_record_stream(self, versions, ordering, include_delta_closure):
298
"""Get a stream of records for versions.
300
:param versions: The versions to include. Each version is a tuple
302
:param ordering: Either 'unordered' or 'topological'. A topologically
303
sorted stream has compression parents strictly before their
305
:param include_delta_closure: If True then the closure across any
306
compression parents will be included (in the opaque data).
307
:return: An iterator of ContentFactory objects, each of which is only
308
valid until the iterator is advanced.
310
if ordering == 'topological':
311
parents = self.get_parent_map(versions)
312
new_versions = topo_sort(parents)
313
new_versions.extend(set(versions).difference(set(parents)))
314
versions = new_versions
315
for version in versions:
317
yield WeaveContentFactory(version, self)
319
yield AbsentContentFactory((version,))
321
def get_parent_map(self, version_ids):
322
"""See VersionedFile.get_parent_map."""
324
for version_id in version_ids:
326
result[version_id] = tuple(
327
map(self._idx_to_name, self._parents[self._lookup(version_id)]))
328
except RevisionNotPresent:
332
def get_parents_with_ghosts(self, version_id):
333
raise NotImplementedError(self.get_parents_with_ghosts)
335
def insert_record_stream(self, stream):
336
"""Insert a record stream into this versioned file.
338
:param stream: A stream of records to insert.
340
:seealso VersionedFile.get_record_stream:
343
for record in stream:
344
# Raise an error when a record is missing.
345
if record.storage_kind == 'absent':
346
raise RevisionNotPresent([record.key[0]], self)
347
# adapt to non-tuple interface
348
parents = [parent[0] for parent in record.parents]
349
if record.storage_kind == 'fulltext':
350
self.add_lines(record.key[0], parents,
351
split_lines(record.get_bytes_as('fulltext')))
353
adapter_key = record.storage_kind, 'fulltext'
355
adapter = adapters[adapter_key]
357
adapter_factory = adapter_registry.get(adapter_key)
358
adapter = adapter_factory(self)
359
adapters[adapter_key] = adapter
360
lines = split_lines(adapter.get_bytes(
361
record, record.get_bytes_as(record.storage_kind)))
363
self.add_lines(record.key[0], parents, lines)
364
except RevisionAlreadyPresent:
367
def _check_repeated_add(self, name, parents, text, sha1):
368
"""Check that a duplicated add is OK.
370
If it is, return the (old) index; otherwise raise an exception.
372
idx = self._lookup(name)
373
if sorted(self._parents[idx]) != sorted(parents) \
374
or sha1 != self._sha1s[idx]:
375
raise RevisionAlreadyPresent(name, self._weave_name)
378
def _add_lines(self, version_id, parents, lines, parent_texts,
379
left_matching_blocks, nostore_sha, random_id, check_content):
380
"""See VersionedFile.add_lines."""
381
idx = self._add(version_id, lines, map(self._lookup, parents),
382
nostore_sha=nostore_sha)
383
return sha_strings(lines), sum(map(len, lines)), idx
385
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
212
raise WeaveError("name %s not present in weave" % name)
215
def add(self, name, parents, text):
386
216
"""Add a single text on top of the weave.
388
218
Returns the index number of the newly added version.
391
221
Symbolic name for this version.
392
222
(Typically the revision-id of the revision that added it.)
395
225
List or set of direct parent version numbers.
398
Sequence of lines to be added in the new version.
400
:param nostore_sha: See VersionedFile.add_lines.
402
self._check_lines_not_unicode(lines)
403
self._check_lines_are_lines(lines)
405
sha1 = sha_strings(lines)
406
if sha1 == nostore_sha:
407
raise errors.ExistingContent
408
if version_id in self._name_map:
409
return self._check_repeated_add(version_id, parents, lines, sha1)
228
Sequence of lines to be added in the new version."""
230
assert isinstance(name, basestring)
231
if name in self._name_map:
232
raise WeaveError("name %r already present in weave" % name)
411
234
self._check_versions(parents)
412
## self._check_lines(lines)
235
## self._check_lines(text)
413
236
new_version = len(self._parents)
415
243
# if we abort after here the (in-memory) weave will be corrupt because only
416
244
# some fields are updated
417
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
418
# - Robert Collins 20060226
419
245
self._parents.append(parents[:])
420
246
self._sha1s.append(sha1)
421
self._names.append(version_id)
422
self._name_map[version_id] = new_version
247
self._names.append(name)
248
self._name_map[name] = new_version
579
419
lineno = 0 # line of weave, 0-based
581
421
for l in self._weave:
582
if l.__class__ == tuple:
422
if isinstance(l, tuple):
586
istack.append(self._names[v])
590
dset.add(self._names[v])
592
dset.remove(self._names[v])
594
raise WeaveFormatError('unexpected instruction %r' % v)
596
yield lineno, istack[-1], frozenset(dset), l
435
raise WeaveFormatError('unexpected instruction %r'
438
assert isinstance(l, basestring)
440
yield lineno, istack[-1], dset, l
445
def _extract(self, versions):
446
"""Yield annotation of lines in included set.
448
Yields a sequence of tuples (origin, lineno, text), where
449
origin is the origin version, lineno the index in the weave,
450
and text the text of the line.
452
The set typically but not necessarily corresponds to a version.
454
included = self.inclusions(versions)
459
lineno = 0 # line of weave, 0-based
465
WFE = WeaveFormatError
467
for l in self._weave:
468
if isinstance(l, tuple):
472
assert v not in istack
486
assert isinstance(l, basestring)
488
isactive = (not dset) and istack and (istack[-1] in included)
490
result.append((istack[-1], lineno, l))
600
raise WeaveFormatError("unclosed insertion blocks "
601
"at end of weave: %s" % istack)
494
raise WFE("unclosed insertion blocks at end of weave",
603
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
497
raise WFE("unclosed deletion blocks at end of weave",
504
def get_iter(self, version):
505
"""Yield lines for the specified version."""
506
for origin, lineno, line in self._extract([version]):
510
def get(self, index):
511
return list(self.get_iter(index))
514
def mash_iter(self, included):
515
"""Return composed version of multiple included versions."""
516
for origin, lineno, text in self._extract(included):
520
def dump(self, to_file):
521
from pprint import pprint
522
print >>to_file, "Weave._weave = ",
523
pprint(self._weave, to_file)
524
print >>to_file, "Weave._parents = ",
525
pprint(self._parents, to_file)
529
def numversions(self):
530
l = len(self._parents)
531
assert l == len(self._sha1s)
536
return self.numversions()
539
def check(self, progress_bar=None):
540
# check no circular inclusions
541
for version in range(self.numversions()):
542
inclusions = list(self._parents[version])
545
if inclusions[-1] >= version:
546
raise WeaveFormatError("invalid included version %d for index %d"
547
% (inclusions[-1], version))
549
# try extracting all versions; this is a bit slow and parallel
550
# extraction could be used
551
nv = self.numversions()
552
for version in range(nv):
554
progress_bar.update('checking text', version, nv)
556
for l in self.get_iter(version):
559
expected = self._sha1s[version]
561
raise WeaveError("mismatched sha1 for version %d; "
562
"got %s, expected %s"
563
% (version, hd, expected))
565
# TODO: check insertions are properly nested, that there are
566
# no lines outside of insertion blocks, that deletions are
567
# properly paired, etc.
571
def merge(self, merge_versions):
572
"""Automerge and mark conflicts between versions.
574
This returns a sequence, each entry describing alternatives
575
for a chunk of the file. Each of the alternatives is given as
578
If there is a chunk of the file where there's no diagreement,
579
only one alternative is given.
582
# approach: find the included versions common to all the
584
raise NotImplementedError()
588
def _delta(self, included, lines):
589
"""Return changes from basis to new revision.
591
The old text for comparison is the union of included revisions.
593
This is used in inserting a new text.
595
Delta is returned as a sequence of
596
(weave1, weave2, newlines).
598
This indicates that weave1:weave2 of the old weave should be
599
replaced by the sequence of lines in newlines. Note that
600
these line numbers are positions in the total weave and don't
601
correspond to the lines in any extracted version, or even the
602
extracted union of included versions.
604
If line1=line2, this is a pure insert; if newlines=[] this is a
605
pure delete. (Similar to difflib.)
606
610
def plan_merge(self, ver_a, ver_b):
607
611
"""Return pseudo-annotation indicating how the two versions merge.
650
654
yield 'unchanged', '' # terminator
652
def _extract(self, versions):
653
"""Yield annotation of lines in included set.
655
Yields a sequence of tuples (origin, lineno, text), where
656
origin is the origin version, lineno the index in the weave,
657
and text the text of the line.
659
The set typically but not necessarily corresponds to a version.
662
if not isinstance(i, int):
665
included = self._inclusions(versions)
671
lineno = 0 # line of weave, 0-based
677
WFE = WeaveFormatError
680
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
681
# +285282 0 1676.8040 1676.8040 +<isinstance>
682
# 1.6 seconds in 'isinstance'.
683
# changing the first isinstance:
684
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
685
# +140414 0 762.8050 762.8050 +<isinstance>
686
# note that the inline time actually dropped (less function calls)
687
# and total processing time was halved.
688
# we're still spending ~1/4 of the method in isinstance though.
689
# so lets hard code the acceptable string classes we expect:
690
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
691
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
693
# yay, down to ~1/4 the initial extract time, and our inline time
694
# has shrunk again, with isinstance no longer dominating.
695
# tweaking the stack inclusion test to use a set gives:
696
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
697
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
699
# - a 5% win, or possibly just noise. However with large istacks that
700
# 'in' test could dominate, so I'm leaving this change in place -
701
# when its fast enough to consider profiling big datasets we can review.
706
for l in self._weave:
707
if l.__class__ == tuple:
714
iset.remove(istack.pop())
658
def weave_merge(self, plan):
663
for state, line in plan:
664
if state == 'unchanged' or state == 'killed-both':
665
# resync and flush queued conflicts changes if any
666
if not lines_a and not lines_b:
668
elif ch_a and not ch_b:
670
for l in lines_a: yield l
671
elif ch_b and not ch_a:
672
for l in lines_b: yield l
673
elif lines_a == lines_b:
674
for l in lines_a: yield l
722
raise AssertionError()
725
isactive = (not dset) and istack and (istack[-1] in included)
727
result.append((istack[-1], lineno, l))
730
raise WeaveFormatError("unclosed insertion blocks "
731
"at end of weave: %s" % istack)
733
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
737
def _maybe_lookup(self, name_or_index):
738
"""Convert possible symbolic name to index, or pass through indexes.
742
if isinstance(name_or_index, (int, long)):
745
return self._lookup(name_or_index)
747
def get_lines(self, version_id):
748
"""See VersionedFile.get_lines()."""
749
int_index = self._maybe_lookup(version_id)
750
result = [line for (origin, lineno, line) in self._extract([int_index])]
751
expected_sha1 = self._sha1s[int_index]
752
measured_sha1 = sha_strings(result)
753
if measured_sha1 != expected_sha1:
754
raise errors.WeaveInvalidChecksum(
755
'file %s, revision %s, expected: %s, measured %s'
756
% (self._weave_name, version_id,
757
expected_sha1, measured_sha1))
760
def get_sha1s(self, version_ids):
761
"""See VersionedFile.get_sha1s()."""
762
return [self._sha1s[self._lookup(v)] for v in version_ids]
764
def num_versions(self):
765
"""How many versions are in this weave?"""
766
l = len(self._parents)
769
__len__ = num_versions
771
def check(self, progress_bar=None):
772
# TODO evaluate performance hit of using string sets in this routine.
773
# TODO: check no circular inclusions
774
# TODO: create a nested progress bar
775
for version in range(self.num_versions()):
776
inclusions = list(self._parents[version])
779
if inclusions[-1] >= version:
780
raise WeaveFormatError("invalid included version %d for index %d"
781
% (inclusions[-1], version))
783
# try extracting all versions; parallel extraction is used
784
nv = self.num_versions()
789
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
790
# The problem is that set membership is much more expensive
791
name = self._idx_to_name(i)
792
sha1s[name] = sha.new()
794
new_inc = set([name])
795
for p in self._parents[i]:
796
new_inc.update(inclusions[self._idx_to_name(p)])
798
if set(new_inc) != set(self.get_ancestry(name)):
799
raise AssertionError(
801
% (set(new_inc), set(self.get_ancestry(name))))
802
inclusions[name] = new_inc
804
nlines = len(self._weave)
806
update_text = 'checking weave'
808
short_name = os.path.basename(self._weave_name)
809
update_text = 'checking %s' % (short_name,)
810
update_text = update_text[:25]
812
for lineno, insert, deleteset, line in self._walk_internal():
814
progress_bar.update(update_text, lineno, nlines)
816
for name, name_inclusions in inclusions.items():
817
# The active inclusion must be an ancestor,
818
# and no ancestors must have deleted this line,
819
# because we don't support resurrection.
820
if (insert in name_inclusions) and not (deleteset & name_inclusions):
821
sha1s[name].update(line)
824
version = self._idx_to_name(i)
825
hd = sha1s[version].hexdigest()
826
expected = self._sha1s[i]
828
raise errors.WeaveInvalidChecksum(
829
"mismatched sha1 for version %s: "
830
"got %s, expected %s"
831
% (version, hd, expected))
833
# TODO: check insertions are properly nested, that there are
834
# no lines outside of insertion blocks, that deletions are
835
# properly paired, etc.
837
def _join(self, other, pb, msg, version_ids, ignore_missing):
838
"""Worker routine for join()."""
839
if not other.versions():
840
return # nothing to update, easy
843
# versions is never none, InterWeave checks this.
846
# two loops so that we do not change ourselves before verifying it
848
# work through in index order to make sure we get all dependencies
851
# get the selected versions only that are in other.versions.
852
version_ids = set(other.versions()).intersection(set(version_ids))
853
# pull in the referenced graph.
854
version_ids = other.get_ancestry(version_ids)
855
pending_parents = other.get_parent_map(version_ids)
856
pending_graph = pending_parents.items()
857
if len(pending_graph) != len(version_ids):
858
raise RevisionNotPresent(
859
set(version_ids) - set(pending_parents.keys()), self)
860
for name in topo_sort(pending_graph):
861
other_idx = other._name_map[name]
862
# returns True if we have it, False if we need it.
863
if not self._check_version_consistent(other, other_idx, name):
864
names_to_join.append((other_idx, name))
872
for other_idx, name in names_to_join:
873
# TODO: If all the parents of the other version are already
874
# present then we can avoid some work by just taking the delta
875
# and adjusting the offsets.
876
new_parents = self._imported_parents(other, other_idx)
877
sha1 = other._sha1s[other_idx]
882
pb.update(msg, merged, len(names_to_join))
884
lines = other.get_lines(other_idx)
885
self._add(name, lines, new_parents, sha1)
887
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
888
merged, processed, self._weave_name, time.time()-time0))
890
def _imported_parents(self, other, other_idx):
891
"""Return list of parents in self corresponding to indexes in other."""
893
for parent_idx in other._parents[other_idx]:
894
parent_name = other._names[parent_idx]
895
if parent_name not in self._name_map:
896
# should not be possible
897
raise WeaveError("missing parent {%s} of {%s} in %r"
898
% (parent_name, other._name_map[other_idx], self))
899
new_parents.append(self._name_map[parent_name])
902
def _check_version_consistent(self, other, other_idx, name):
903
"""Check if a version in consistent in this and other.
905
To be consistent it must have:
908
* the same direct parents (by name, not index, and disregarding
911
If present & correct return True;
912
if not present in self return False;
913
if inconsistent raise error."""
914
this_idx = self._name_map.get(name, -1)
916
if self._sha1s[this_idx] != other._sha1s[other_idx]:
917
raise errors.WeaveTextDiffers(name, self, other)
918
self_parents = self._parents[this_idx]
919
other_parents = other._parents[other_idx]
920
n1 = set([self._names[i] for i in self_parents])
921
n2 = set([other._names[i] for i in other_parents])
922
if not self._compatible_parents(n1, n2):
923
raise WeaveParentMismatch("inconsistent parents "
924
"for version {%s}: %s vs %s" % (name, n1, n2))
930
def _reweave(self, other, pb, msg):
931
"""Reweave self with other - internal helper for join().
933
:param other: The other weave to merge
934
:param pb: An optional progress bar, indicating how far done we are
935
:param msg: An optional message for the progress
937
new_weave = _reweave(self, other, pb=pb, msg=msg)
938
self._copy_weave_content(new_weave)
940
def _copy_weave_content(self, otherweave):
941
"""adsorb the content from otherweave."""
942
for attr in self.__slots__:
943
if attr != '_weave_name':
944
setattr(self, attr, copy(getattr(otherweave, attr)))
947
class WeaveFile(Weave):
948
"""A WeaveFile represents a Weave on disk and writes on change."""
950
WEAVE_SUFFIX = '.weave'
952
def __init__(self, name, transport, filemode=None, create=False, access_mode='w', get_scope=None):
953
"""Create a WeaveFile.
955
:param create: If not True, only open an existing knit.
957
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope)
958
self._transport = transport
959
self._filemode = filemode
961
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
962
except errors.NoSuchFile:
968
def _add_lines(self, version_id, parents, lines, parent_texts,
969
left_matching_blocks, nostore_sha, random_id, check_content):
970
"""Add a version and save the weave."""
971
self.check_not_reserved_id(version_id)
972
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
973
parent_texts, left_matching_blocks, nostore_sha, random_id,
978
def copy_to(self, name, transport):
979
"""See VersionedFile.copy_to()."""
980
# as we are all in memory always, just serialise to the new place.
982
write_weave_v5(self, sio)
984
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
987
"""Save the weave."""
988
self._check_write_ok()
990
write_weave_v5(self, sio)
992
self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
998
"""See VersionedFile.get_suffixes()."""
999
return [WeaveFile.WEAVE_SUFFIX]
1001
def insert_record_stream(self, stream):
1002
super(WeaveFile, self).insert_record_stream(stream)
1005
@deprecated_method(one_five)
1006
def join(self, other, pb=None, msg=None, version_ids=None,
1007
ignore_missing=False):
1008
"""Join other into self and save."""
1009
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
1013
def _reweave(wa, wb, pb=None, msg=None):
1014
"""Combine two weaves and return the result.
1016
This works even if a revision R has different parents in
1017
wa and wb. In the resulting weave all the parents are given.
1019
This is done by just building up a new weave, maintaining ordering
1020
of the versions in the two inputs. More efficient approaches
1021
might be possible but it should only be necessary to do
1022
this operation rarely, when a new previously ghost version is
1025
:param pb: An optional progress bar, indicating how far done we are
1026
:param msg: An optional message for the progress
1030
queue_a = range(wa.num_versions())
1031
queue_b = range(wb.num_versions())
1032
# first determine combined parents of all versions
1033
# map from version name -> all parent names
1034
combined_parents = _reweave_parent_graphs(wa, wb)
1035
mutter("combined parents: %r", combined_parents)
1036
order = topo_sort(combined_parents.iteritems())
1037
mutter("order to reweave: %r", order)
1042
for idx, name in enumerate(order):
1044
pb.update(msg, idx, len(order))
1045
if name in wa._name_map:
1046
lines = wa.get_lines(name)
1047
if name in wb._name_map:
1048
lines_b = wb.get_lines(name)
1049
if lines != lines_b:
1050
mutter('Weaves differ on content. rev_id {%s}', name)
1051
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
1053
lines = list(difflib.unified_diff(lines, lines_b,
1054
wa._weave_name, wb._weave_name))
1055
mutter('lines:\n%s', ''.join(lines))
1056
raise errors.WeaveTextDiffers(name, wa, wb)
1058
lines = wb.get_lines(name)
1059
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])
1062
def _reweave_parent_graphs(wa, wb):
1063
"""Return combined parent ancestry for two weaves.
1065
Returned as a list of (version_name, set(parent_names))"""
1067
for weave in [wa, wb]:
1068
for idx, name in enumerate(weave._names):
1069
p = combined.setdefault(name, set())
1070
p.update(map(weave._idx_to_name, weave._parents[idx]))
677
for l in lines_a: yield l
679
for l in lines_b: yield l
686
if state == 'unchanged':
689
elif state == 'killed-a':
692
elif state == 'killed-b':
695
elif state == 'new-a':
698
elif state == 'new-b':
702
assert state in ('irrelevant', 'ghost-a', 'ghost-b', 'killed-base',
1074
712
def weave_toc(w):