268
277
def create_empty(self, name, transport, mode=None):
269
278
return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)
280
def _fix_parents(self, version, new_parents):
281
"""Fix the parents list for version.
283
This is done by appending a new version to the index
284
with identical data except for the parents list.
285
the parents list must be a superset of the current
288
current_values = self._index._cache[version]
289
assert set(current_values[4]).difference(set(new_parents)) == set()
290
self._index.add_version(version,
296
def get_graph_with_ghosts(self):
297
"""See VersionedFile.get_graph_with_ghosts()."""
298
graph_items = self._index.get_graph()
299
return dict(graph_items)
272
302
def get_suffixes():
273
303
"""See VersionedFile.get_suffixes()."""
274
304
return [DATA_SUFFIX, INDEX_SUFFIX]
306
def has_ghost(self, version_id):
307
"""True if there is a ghost reference in the file to version_id."""
309
if self.has_version(version_id):
311
# optimisable if needed by memoising the _ghosts set.
312
items = self._index.get_graph()
313
for node, parents in items:
314
for parent in parents:
315
if parent not in self._index._cache:
316
if parent == version_id:
276
320
def versions(self):
277
321
"""See VersionedFile.versions."""
278
322
return self._index.get_versions()
390
434
raise RevisionNotPresent(list(version_ids)[0], self.filename)
392
def add_lines(self, version_id, parents, lines):
436
def _add_lines_with_ghosts(self, version_id, parents, lines):
437
"""See VersionedFile.add_lines_with_ghosts()."""
438
self._check_add(version_id, lines)
439
return self._add(version_id, lines[:], parents, self.delta)
441
def _add_lines(self, version_id, parents, lines):
393
442
"""See VersionedFile.add_lines."""
443
self._check_add(version_id, lines)
444
self._check_versions_present(parents)
445
return self._add(version_id, lines[:], parents, self.delta)
447
def _check_add(self, version_id, lines):
448
"""check that version_id and lines are safe to add."""
394
449
assert self.writable, "knit is not opened for write"
395
450
### FIXME escape. RBC 20060228
396
451
if contains_whitespace(version_id):
466
528
"""See VersionedFile.get_lines()."""
467
529
return self._get_content(version_id).text()
531
def iter_lines_added_or_present_in_versions(self, version_ids=None):
532
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
533
if version_ids is None:
534
version_ids = self.versions()
535
# we dont care about inclusions, the caller cares.
536
# but we need to setup a list of records to visit.
537
# we need version_id, position, length
538
version_id_records = []
539
requested_versions = list(version_ids)
540
# filter for available versions
541
for version_id in requested_versions:
542
if not self.has_version(version_id):
543
raise RevisionNotPresent(version_id, self.filename)
544
# get a in-component-order queue:
546
for version_id in self.versions():
547
if version_id in requested_versions:
548
version_ids.append(version_id)
549
data_pos, length = self._index.get_position(version_id)
550
version_id_records.append((version_id, data_pos, length))
552
pb = bzrlib.ui.ui_factory.nested_progress_bar()
554
total = len(version_id_records)
556
pb.update('Walking content.', count, total)
557
for version_id, data, sha_value in \
558
self._data.read_records_iter(version_id_records):
559
pb.update('Walking content.', count, total)
560
method = self._index.get_method(version_id)
561
version_idx = self._index.lookup(version_id)
562
assert method in ('fulltext', 'line-delta')
563
if method == 'fulltext':
564
content = self.factory.parse_fulltext(data, version_idx)
565
for line in content.text():
568
delta = self.factory.parse_line_delta(data, version_idx)
569
for start, end, count, lines in delta:
570
for origin, line in lines:
573
pb.update('Walking content.', total, total)
576
pb.update('Walking content.', total, total)
469
580
def num_versions(self):
470
581
"""See VersionedFile.num_versions()."""
471
582
return self._index.num_versions()
616
746
# so - wc -l of a knit index is != the number of uniqe names
618
748
self._history = []
749
pb = bzrlib.ui.ui_factory.nested_progress_bar()
620
fp = self._transport.get(self._filename)
621
self.check_header(fp)
622
for rec in self._iter_index(fp):
623
self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),
624
[self._history[int(i)] for i in rec[4:]])
625
except NoSuchFile, e:
626
if mode != 'w' or not create:
754
pb.update('read knit index', count, total)
755
fp = self._transport.get(self._filename)
756
self.check_header(fp)
757
for rec in self._iter_index(fp):
760
pb.update('read knit index', count, total)
761
parents = self._parse_parents(rec[4:])
762
self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),
764
except NoSuchFile, e:
765
if mode != 'w' or not create:
769
pb.update('read knit index', total, total)
772
def _parse_parents(self, compressed_parents):
773
"""convert a list of string parent values into version ids.
775
ints are looked up in the index.
776
.FOO values are ghosts and converted in to FOO.
779
for value in compressed_parents:
780
if value.startswith('.'):
781
result.append(value[1:])
783
assert isinstance(value, str)
784
result.append(self._history[int(value)])
630
787
def get_graph(self):
774
979
contents, digest).
777
class ContinuousRange:
778
def __init__(self, rec_id, pos, size):
780
self.end_pos = pos + size
781
self.versions = [(rec_id, pos, size)]
783
def add(self, rec_id, pos, size):
784
if self.end_pos != pos:
786
self.end_pos = pos + size
787
self.versions.append((rec_id, pos, size))
791
for rec_id, pos, size in self.versions:
792
yield rec_id, fp.read(size)
794
fp = self._open_file()
796
# Loop through all records and try to collect as large
797
# continuous region as possible to read.
799
record_id, pos, size = records.pop(0)
800
continuous_range = ContinuousRange(record_id, pos, size)
802
record_id, pos, size = records[0]
803
if continuous_range.add(record_id, pos, size):
807
fp.seek(continuous_range.start_pos, 0)
808
for record_id, data in continuous_range.split(fp):
983
for version_id, pos, size in records:
984
if version_id not in self._records:
985
needed_records.append((version_id, pos, size))
987
if len(needed_records):
988
# We take it that the transport optimizes the fetching as good
989
# as possible (ie, reads continous ranges.)
990
response = self._transport.readv(self._filename,
991
[(pos, size) for version_id, pos, size in needed_records])
993
for (record_id, pos, size), (pos, data) in izip(iter(needed_records), response):
809
994
content, digest = self._parse_record(record_id, data)
810
yield record_id, content, digest
995
self._records[record_id] = (digest, content)
997
for version_id, pos, size in records:
998
yield version_id, copy(self._records[version_id][1]), copy(self._records[version_id][0])
814
1000
def read_records(self, records):
815
1001
"""Read records into a dictionary."""
850
1036
if not version_ids:
854
from bzrlib.progress import DummyProgress
857
version_ids = list(version_ids)
858
if None in version_ids:
859
version_ids.remove(None)
861
self.source_ancestry = set(self.source.get_ancestry(version_ids))
862
this_versions = set(self.target._index.get_versions())
863
needed_versions = self.source_ancestry - this_versions
864
cross_check_versions = self.source_ancestry.intersection(this_versions)
865
mismatched_versions = set()
866
for version in cross_check_versions:
867
# scan to include needed parents.
868
n1 = set(self.target.get_parents(version))
869
n2 = set(self.source.get_parents(version))
871
# FIXME TEST this check for cycles being introduced works
872
# the logic is we have a cycle if in our graph we are an
873
# ancestor of any of the n2 revisions.
879
parent_ancestors = self.source.get_ancestry(parent)
880
if version in parent_ancestors:
881
raise errors.GraphCycleError([parent, version])
882
# ensure this parent will be available later.
883
new_parents = n2.difference(n1)
884
needed_versions.update(new_parents.difference(this_versions))
885
mismatched_versions.add(version)
887
if not needed_versions and not cross_check_versions:
889
full_list = topo_sort(self.source._index.get_graph())
891
version_list = [i for i in full_list if (not self.target.has_version(i)
892
and i in needed_versions)]
895
for version_id in version_list:
896
data_pos, data_size = self.source._index.get_position(version_id)
897
records.append((version_id, data_pos, data_size))
900
for version_id, lines, digest \
901
in self.source._data.read_records_iter(records):
902
options = self.source._index.get_options(version_id)
903
parents = self.source._index.get_parents(version_id)
905
for parent in parents:
906
assert self.target.has_version(parent)
908
if self.target.factory.annotated:
909
# FIXME jrydberg: it should be possible to skip
910
# re-annotating components if we know that we are
911
# going to pull all revisions in the same order.
912
new_version_id = version_id
913
new_version_idx = self.target._index.num_versions()
914
if 'fulltext' in options:
915
lines = self.target._reannotate_fulltext(self.source, lines,
916
new_version_id, new_version_idx)
917
elif 'line-delta' in options:
918
lines = self.target._reannotate_line_delta(self.source, lines,
919
new_version_id, new_version_idx)
922
pb.update("Joining knit", count, len(version_list))
924
pos, size = self.target._data.add_record(version_id, digest, lines)
925
self.target._index.add_version(version_id, options, pos, size, parents)
927
for version in mismatched_versions:
928
n1 = set(self.target.get_parents(version))
929
n2 = set(self.source.get_parents(version))
930
# write a combined record to our history.
931
new_parents = self.target.get_parents(version) + list(n2.difference(n1))
932
current_values = self.target._index._cache[version]
933
self.target._index.add_version(version,
1039
pb = bzrlib.ui.ui_factory.nested_progress_bar()
1041
version_ids = list(version_ids)
1042
if None in version_ids:
1043
version_ids.remove(None)
1045
self.source_ancestry = set(self.source.get_ancestry(version_ids))
1046
this_versions = set(self.target._index.get_versions())
1047
needed_versions = self.source_ancestry - this_versions
1048
cross_check_versions = self.source_ancestry.intersection(this_versions)
1049
mismatched_versions = set()
1050
for version in cross_check_versions:
1051
# scan to include needed parents.
1052
n1 = set(self.target.get_parents_with_ghosts(version))
1053
n2 = set(self.source.get_parents_with_ghosts(version))
1055
# FIXME TEST this check for cycles being introduced works
1056
# the logic is we have a cycle if in our graph we are an
1057
# ancestor of any of the n2 revisions.
1063
parent_ancestors = self.source.get_ancestry(parent)
1064
if version in parent_ancestors:
1065
raise errors.GraphCycleError([parent, version])
1066
# ensure this parent will be available later.
1067
new_parents = n2.difference(n1)
1068
needed_versions.update(new_parents.difference(this_versions))
1069
mismatched_versions.add(version)
1071
if not needed_versions and not cross_check_versions:
1073
full_list = topo_sort(self.source.get_graph())
1075
version_list = [i for i in full_list if (not self.target.has_version(i)
1076
and i in needed_versions)]
1079
for version_id in version_list:
1080
data_pos, data_size = self.source._index.get_position(version_id)
1081
records.append((version_id, data_pos, data_size))
1084
for version_id, lines, digest \
1085
in self.source._data.read_records_iter(records):
1086
options = self.source._index.get_options(version_id)
1087
parents = self.source._index.get_parents_with_ghosts(version_id)
1089
for parent in parents:
1090
# if source has the parent, we must hav grabbed it first.
1091
assert (self.target.has_version(parent) or not
1092
self.source.has_version(parent))
1094
if self.target.factory.annotated:
1095
# FIXME jrydberg: it should be possible to skip
1096
# re-annotating components if we know that we are
1097
# going to pull all revisions in the same order.
1098
new_version_id = version_id
1099
new_version_idx = self.target._index.num_versions()
1100
if 'fulltext' in options:
1101
lines = self.target._reannotate_fulltext(self.source, lines,
1102
new_version_id, new_version_idx)
1103
elif 'line-delta' in options:
1104
lines = self.target._reannotate_line_delta(self.source, lines,
1105
new_version_id, new_version_idx)
1108
pb.update("Joining knit", count, len(version_list))
1110
pos, size = self.target._data.add_record(version_id, digest, lines)
1111
self.target._index.add_version(version_id, options, pos, size, parents)
1113
for version in mismatched_versions:
1114
n1 = set(self.target.get_parents_with_ghosts(version))
1115
n2 = set(self.source.get_parents_with_ghosts(version))
1116
# write a combined record to our history preserving the current
1117
# parents as first in the list
1118
new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))
1119
self.target.fix_parents(version, new_parents)
942
1126
InterVersionedFile.register_optimiser(InterKnit)