1
# Copyright (C) 2005, 2006 by Canonical Ltd
2
# Written by Martin Pool.
3
# Modified by Johan Rydberg <jrydberg@gnu.org>
4
# Modified by Robert Collins <robert.collins@canonical.com>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
"""Knit versionedfile implementation.
22
A knit is a versioned file implementation that supports efficient append only
26
lifeless: the data file is made up of "delta records". each delta record has a delta header
27
that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of
28
the -expanded data- (ie, the delta applied to the parent). the delta also ends with a
29
end-marker; simply "end VERSION"
31
delta can be line or full contents.a
32
... the 8's there are the index number of the annotation.
33
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
37
8 e.set('executable', 'yes')
39
8 if elt.get('executable') == 'yes':
40
8 ie.executable = True
41
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad
45
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
46
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
47
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
48
09:33 < lifeless> right
49
09:33 < jrydberg> lifeless: the position and size is the range in the data file
52
so the index sequence is the dictionary compressed sequence number used
53
in the deltas to provide line annotation
58
# 10:16 < lifeless> make partial index writes safe
59
# 10:16 < lifeless> implement 'knit.check()' like weave.check()
60
# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave
62
# move sha1 out of the content so that join is faster at verifying parents
63
# record content length ?
66
from cStringIO import StringIO
68
from difflib import SequenceMatcher
69
from gzip import GzipFile
72
import bzrlib.errors as errors
73
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \
74
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \
75
RevisionNotPresent, RevisionAlreadyPresent
76
from bzrlib.trace import mutter
77
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \
79
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
80
from bzrlib.tsort import topo_sort
83
# TODO: Split out code specific to this format into an associated object.
85
# TODO: Can we put in some kind of value to check that the index and data
86
# files belong together?
88
# TODO: accomodate binaries, perhaps by storing a byte count
90
# TODO: function to check whole file
92
# TODO: atomically append data, then measure backwards from the cursor
93
# position after writing to work out where it was located. we may need to
94
# bypass python file buffering.
97
INDEX_SUFFIX = '.kndx'
100
# convenience factories for testing or use:
101
def AnnotatedKnitFactory(name, transport, mode=None):
102
"""Create a knit with path name in transport transport."""
103
return KnitVersionedFile(transport,
106
KnitAnnotateFactory(),
110
class KnitContent(object):
111
"""Content of a knit version to which deltas can be applied."""
113
def __init__(self, lines):
116
def annotate_iter(self):
117
"""Yield tuples of (origin, text) for each content line."""
118
for origin, text in self._lines:
122
"""Return a list of (origin, text) tuples."""
123
return list(self.annotate_iter())
125
def apply_delta(self, delta):
126
"""Apply delta to this content."""
128
for start, end, count, lines in delta:
129
self._lines[offset+start:offset+end] = lines
130
offset = offset + (start - end) + count
132
def line_delta_iter(self, new_lines):
133
"""Generate line-based delta from new_lines to this content."""
134
new_texts = [text for origin, text in new_lines._lines]
135
old_texts = [text for origin, text in self._lines]
136
s = difflib.SequenceMatcher(None, old_texts, new_texts)
137
for op in s.get_opcodes():
140
yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])
142
def line_delta(self, new_lines):
143
return list(self.line_delta_iter(new_lines))
146
return [text for origin, text in self._lines]
149
class _KnitFactory(object):
150
"""Base factory for creating content objects."""
152
def make(self, lines, version):
153
num_lines = len(lines)
154
return KnitContent(zip([version] * num_lines, lines))
157
class KnitAnnotateFactory(_KnitFactory):
158
"""Factory for creating annotated Content objects."""
162
def parse_fulltext(self, content, version):
165
origin, text = line.split(' ', 1)
166
lines.append((int(origin), text))
167
return KnitContent(lines)
169
def parse_line_delta_iter(self, lines):
171
header = lines.pop(0)
172
start, end, c = [int(n) for n in header.split(',')]
175
origin, text = lines.pop(0).split(' ', 1)
176
contents.append((int(origin), text))
177
yield start, end, c, contents
179
def parse_line_delta(self, lines, version):
180
return list(self.parse_line_delta_iter(lines))
182
def lower_fulltext(self, content):
183
return ['%d %s' % (o, t) for o, t in content._lines]
185
def lower_line_delta(self, delta):
187
for start, end, c, lines in delta:
188
out.append('%d,%d,%d\n' % (start, end, c))
189
for origin, text in lines:
190
out.append('%d %s' % (origin, text))
194
class KnitPlainFactory(_KnitFactory):
195
"""Factory for creating plain Content objects."""
199
def parse_fulltext(self, content, version):
200
return self.make(content, version)
202
def parse_line_delta_iter(self, lines, version):
204
header = lines.pop(0)
205
start, end, c = [int(n) for n in header.split(',')]
206
yield start, end, c, zip([version] * c, lines[:c])
209
def parse_line_delta(self, lines, version):
210
return list(self.parse_line_delta_iter(lines, version))
212
def lower_fulltext(self, content):
213
return content.text()
215
def lower_line_delta(self, delta):
217
for start, end, c, lines in delta:
218
out.append('%d,%d,%d\n' % (start, end, c))
219
out.extend([text for origin, text in lines])
223
def make_empty_knit(transport, relpath):
224
"""Construct a empty knit at the specified location."""
225
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)
229
class KnitVersionedFile(VersionedFile):
230
"""Weave-like structure with faster random access.
232
A knit stores a number of texts and a summary of the relationships
233
between them. Texts are identified by a string version-id. Texts
234
are normally stored and retrieved as a series of lines, but can
235
also be passed as single strings.
237
Lines are stored with the trailing newline (if any) included, to
238
avoid special cases for files with no final newline. Lines are
239
composed of 8-bit characters, not unicode. The combination of
240
these approaches should mean any 'binary' file can be safely
241
stored and retrieved.
244
def __init__(self, transport, relpath, mode, factory,
245
basis_knit=None, delta=True):
246
"""Construct a knit at location specified by relpath."""
247
assert mode in ('r', 'w'), "invalid mode specified"
248
assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \
251
self.transport = transport
252
self.filename = relpath
253
self.basis_knit = basis_knit
254
self.factory = factory
255
self.writable = (mode == 'w')
258
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
260
self._data = _KnitData(transport, relpath + DATA_SUFFIX,
263
def create_empty(self, name, transport, mode=None):
264
return KnitVersionedFile(transport, name, 'w', self.factory, delta=self.delta)
267
"""See VersionedFile.versions."""
268
return self._index.get_versions()
270
def has_version(self, version_id):
271
"""See VersionedFile.has_version."""
272
return self._index.has_version(version_id)
274
__contains__ = has_version
276
def _merge_annotations(self, content, parents):
277
"""Merge annotations for content. This is done by comparing
278
the annotations based on changed to the text."""
279
for parent_id in parents:
280
merge_content = self._get_content(parent_id)
281
seq = SequenceMatcher(None, merge_content.text(), content.text())
282
for i, j, n in seq.get_matching_blocks():
285
content._lines[j:j+n] = merge_content._lines[i:i+n]
287
def _get_components(self, version_id):
288
"""Return a list of (version_id, method, data) tuples that
289
makes up version specified by version_id of the knit.
291
The components should be applied in the order of the returned
294
The basis knit will be used to the largest extent possible
295
since it is assumed that accesses to it is faster.
297
# needed_revisions holds a list of (method, version_id) of
298
# versions that is needed to be fetched to construct the final
299
# version of the file.
301
# basis_revisions is a list of versions that needs to be
302
# fetched but exists in the basis knit.
304
basis = self.basis_knit
311
if basis and basis._index.has_version(cursor):
313
basis_versions.append(cursor)
314
method = picked_knit._index.get_method(cursor)
315
needed_versions.append((method, cursor))
316
if method == 'fulltext':
318
cursor = picked_knit.get_parents(cursor)[0]
323
for comp_id in basis_versions:
324
data_pos, data_size = basis._index.get_data_position(comp_id)
325
records.append((piece_id, data_pos, data_size))
326
components.update(basis._data.read_records(records))
329
for comp_id in [vid for method, vid in needed_versions
330
if vid not in basis_versions]:
331
data_pos, data_size = self._index.get_position(comp_id)
332
records.append((comp_id, data_pos, data_size))
333
components.update(self._data.read_records(records))
335
# get_data_records returns a mapping with the version id as
336
# index and the value as data. The order the components need
337
# to be applied is held by needed_versions (reversed).
339
for method, comp_id in reversed(needed_versions):
340
out.append((comp_id, method, components[comp_id]))
344
def _get_content(self, version_id):
345
"""Returns a content object that makes up the specified
347
if not self.has_version(version_id):
348
raise RevisionNotPresent(version_id, self.filename)
350
if self.basis_knit and version_id in self.basis_knit:
351
return self.basis_knit._get_content(version_id)
354
components = self._get_components(version_id)
355
for component_id, method, (data, digest) in components:
356
version_idx = self._index.lookup(component_id)
357
if method == 'fulltext':
358
assert content is None
359
content = self.factory.parse_fulltext(data, version_idx)
360
elif method == 'line-delta':
361
delta = self.factory.parse_line_delta(data, version_idx)
362
content.apply_delta(delta)
364
if 'no-eol' in self._index.get_options(version_id):
365
line = content._lines[-1][1].rstrip('\n')
366
content._lines[-1] = (content._lines[-1][0], line)
368
if sha_strings(content.text()) != digest:
369
raise KnitCorrupt(self.filename, 'sha-1 does not match')
373
def _check_versions_present(self, version_ids):
374
"""Check that all specified versions are present."""
375
version_ids = set(version_ids)
376
for r in list(version_ids):
377
if self._index.has_version(r):
378
version_ids.remove(r)
380
raise RevisionNotPresent(list(version_ids)[0], self.filename)
382
def add_lines(self, version_id, parents, lines):
383
"""See VersionedFile.add_lines."""
384
assert self.writable, "knit is not opened for write"
385
### FIXME escape. RBC 20060228
386
if contains_whitespace(version_id):
387
raise InvalidRevisionId(version_id)
388
if self.has_version(version_id):
389
raise RevisionAlreadyPresent(version_id, self.filename)
391
if True or __debug__:
393
assert '\n' not in l[:-1]
395
self._check_versions_present(parents)
396
return self._add(version_id, lines[:], parents, self.delta)
398
def _add(self, version_id, lines, parents, delta):
399
"""Add a set of lines on top of version specified by parents.
401
If delta is true, compress the text as a line-delta against
404
if delta and not parents:
407
digest = sha_strings(lines)
410
if lines[-1][-1] != '\n':
411
options.append('no-eol')
412
lines[-1] = lines[-1] + '\n'
414
lines = self.factory.make(lines, len(self._index))
415
if self.factory.annotated and len(parents) > 0:
416
# Merge annotations from parent texts if so is needed.
417
self._merge_annotations(lines, parents)
419
if parents and delta:
420
# To speed the extract of texts the delta chain is limited
421
# to a fixed number of deltas. This should minimize both
422
# I/O and the time spend applying deltas.
424
delta_parents = parents
426
parent = delta_parents[0]
427
method = self._index.get_method(parent)
428
if method == 'fulltext':
430
delta_parents = self._index.get_parents(parent)
432
if method == 'line-delta':
436
options.append('line-delta')
437
content = self._get_content(parents[0])
438
delta_hunks = content.line_delta(lines)
439
store_lines = self.factory.lower_line_delta(delta_hunks)
441
options.append('fulltext')
442
store_lines = self.factory.lower_fulltext(lines)
444
where, size = self._data.add_record(version_id, digest, store_lines)
445
self._index.add_version(version_id, options, where, size, parents)
447
def clone_text(self, new_version_id, old_version_id, parents):
448
"""See VersionedFile.clone_text()."""
449
# FIXME RBC 20060228 make fast by only inserting an index with null delta.
450
self.add_lines(new_version_id, parents, self.get_lines(old_version_id))
452
def get_lines(self, version_id):
453
"""See VersionedFile.get_lines()."""
454
return self._get_content(version_id).text()
456
def annotate_iter(self, version_id):
457
"""See VersionedFile.annotate_iter."""
458
content = self._get_content(version_id)
459
for origin, text in content.annotate_iter():
460
yield self._index.idx_to_name(origin), text
462
def get_parents(self, version_id):
463
"""See VersionedFile.get_parents."""
464
self._check_versions_present([version_id])
465
return list(self._index.get_parents(version_id))
467
def get_ancestry(self, versions):
468
"""See VersionedFile.get_ancestry."""
469
if isinstance(versions, basestring):
470
versions = [versions]
473
self._check_versions_present(versions)
474
return self._index.get_ancestry(versions)
476
def _reannotate_line_delta(self, other, lines, new_version_id,
478
"""Re-annotate line-delta and return new delta."""
480
for start, end, count, contents \
481
in self.factory.parse_line_delta_iter(lines):
483
for origin, line in contents:
484
old_version_id = other._index.idx_to_name(origin)
485
if old_version_id == new_version_id:
486
idx = new_version_idx
488
idx = self._index.lookup(old_version_id)
489
new_lines.append((idx, line))
490
new_delta.append((start, end, count, new_lines))
492
return self.factory.lower_line_delta(new_delta)
494
def _reannotate_fulltext(self, other, lines, new_version_id,
496
"""Re-annotate fulltext and return new version."""
497
content = self.factory.parse_fulltext(lines, new_version_idx)
499
for origin, line in content.annotate_iter():
500
old_version_id = other._index.idx_to_name(origin)
501
if old_version_id == new_version_id:
502
idx = new_version_idx
504
idx = self._index.lookup(old_version_id)
505
new_lines.append((idx, line))
507
return self.factory.lower_fulltext(KnitContent(new_lines))
509
def walk(self, version_ids):
510
"""See VersionedFile.walk."""
511
# We take the short path here, and extract all relevant texts
512
# and put them in a weave and let that do all the work. Far
513
# from optimal, but is much simpler.
514
# FIXME RB 20060228 this really is inefficient!
515
from bzrlib.weave import Weave
517
w = Weave(self.filename)
518
ancestry = self.get_ancestry(version_ids)
519
sorted_graph = topo_sort(self._index.get_graph())
520
version_list = [vid for vid in sorted_graph if vid in ancestry]
522
for version_id in version_list:
523
lines = self.get_lines(version_id)
524
w.add_lines(version_id, self.get_parents(version_id), lines)
526
for lineno, insert_id, dset, line in w.walk(version_ids):
527
yield lineno, insert_id, dset, line
530
class _KnitComponentFile(object):
531
"""One of the files used to implement a knit database"""
533
def __init__(self, transport, filename, mode):
534
self._transport = transport
535
self._filename = filename
538
def write_header(self):
539
old_len = self._transport.append(self._filename, StringIO(self.HEADER))
541
raise KnitCorrupt(self._filename, 'misaligned after writing header')
543
def check_header(self, fp):
544
line = fp.read(len(self.HEADER))
545
if line != self.HEADER:
546
raise KnitHeaderError(badline=line)
549
"""Commit is a nop."""
552
return '%s(%s)' % (self.__class__.__name__, self._filename)
555
class _KnitIndex(_KnitComponentFile):
556
"""Manages knit index file.
558
The index is already kept in memory and read on startup, to enable
559
fast lookups of revision information. The cursor of the index
560
file is always pointing to the end, making it easy to append
563
_cache is a cache for fast mapping from version id to a Index
566
_history is a cache for fast mapping from indexes to version ids.
568
The index data format is dictionary compressed when it comes to
569
parent references; a index entry may only have parents that with a
570
lover index number. As a result, the index is topological sorted.
572
Duplicate entries may be written to the index for a single version id
573
if this is done then the latter one completely replaces the former:
574
this allows updates to correct version and parent information.
575
Note that the two entries may share the delta, and that successive
576
annotations and references MUST point to the first entry.
579
HEADER = "# bzr knit index 7\n"
581
def _cache_version(self, version_id, options, pos, size, parents):
582
val = (version_id, options, pos, size, parents)
583
self._cache[version_id] = val
584
if not version_id in self._history:
585
self._history.append(version_id)
587
def _iter_index(self, fp):
589
for l in lines.splitlines(False):
592
def __init__(self, transport, filename, mode):
593
_KnitComponentFile.__init__(self, transport, filename, mode)
595
# position in _history is the 'official' index for a revision
596
# but the values may have come from a newer entry.
597
# so - wc -l of a knit index is != the number of uniqe names
601
fp = self._transport.get(self._filename)
602
self.check_header(fp)
603
for rec in self._iter_index(fp):
604
self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),
605
[self._history[int(i)] for i in rec[4:]])
606
except NoSuchFile, e:
613
for version_id, index in self._cache.iteritems():
614
graph.append((version_id, index[4]))
617
def get_ancestry(self, versions):
618
"""See VersionedFile.get_ancestry."""
620
for version_id in versions:
621
version_idxs.append(self._history.index(version_id))
623
for v in xrange(max(version_idxs), 0, -1):
624
if self._history[v] in i:
625
# include all its parents
626
i.update(self._cache[self._history[v]][4])
629
def num_versions(self):
630
return len(self._history)
632
__len__ = num_versions
634
def get_versions(self):
637
def idx_to_name(self, idx):
638
return self._history[idx]
640
def lookup(self, version_id):
641
assert version_id in self._cache
642
return self._history.index(version_id)
644
def add_version(self, version_id, options, pos, size, parents):
645
"""Add a version record to the index."""
646
self._cache_version(version_id, options, pos, size, parents)
648
content = "%s %s %s %s %s\n" % (version_id,
652
' '.join([str(self.lookup(vid)) for
654
self._transport.append(self._filename, StringIO(content))
656
def has_version(self, version_id):
657
"""True if the version is in the index."""
658
return self._cache.has_key(version_id)
660
def get_position(self, version_id):
661
"""Return data position and size of specified version."""
662
return (self._cache[version_id][2], \
663
self._cache[version_id][3])
665
def get_method(self, version_id):
666
"""Return compression method of specified version."""
667
options = self._cache[version_id][1]
668
if 'fulltext' in options:
671
assert 'line-delta' in options
674
def get_options(self, version_id):
675
return self._cache[version_id][1]
677
def get_parents(self, version_id):
678
"""Return parents of specified version."""
679
return self._cache[version_id][4]
681
def check_versions_present(self, version_ids):
682
"""Check that all specified versions are present."""
683
version_ids = set(version_ids)
684
for version_id in list(version_ids):
685
if version_id in self._cache:
686
version_ids.remove(version_id)
688
raise RevisionNotPresent(list(version_ids)[0], self.filename)
691
class _KnitData(_KnitComponentFile):
692
"""Contents of the knit data file"""
694
HEADER = "# bzr knit data 7\n"
696
def __init__(self, transport, filename, mode):
697
_KnitComponentFile.__init__(self, transport, filename, mode)
699
self._checked = False
701
def _open_file(self):
702
if self._file is None:
704
self._file = self._transport.get(self._filename)
709
def add_record(self, version_id, digest, lines):
710
"""Write new text record to disk. Returns the position in the
711
file where it was written."""
713
data_file = GzipFile(None, mode='wb', fileobj=sio)
714
print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)
715
data_file.writelines(lines)
716
print >>data_file, "end %s\n" % version_id
719
content = sio.getvalue()
720
start_pos = self._transport.append(self._filename, StringIO(content))
721
return start_pos, len(content)
723
def _parse_record(self, version_id, data):
724
df = GzipFile(mode='rb', fileobj=StringIO(data))
725
rec = df.readline().split()
727
raise KnitCorrupt(self._filename, 'unexpected number of records')
728
if rec[1] != version_id:
729
raise KnitCorrupt(self.file.name,
730
'unexpected version, wanted %r' % version_id)
732
record_contents = self._read_record_contents(df, lines)
734
if l != 'end %s\n' % version_id:
735
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'
737
return record_contents, rec[3]
739
def _read_record_contents(self, df, record_lines):
740
"""Read and return n lines from datafile."""
742
for i in range(record_lines):
743
r.append(df.readline())
746
def read_records_iter(self, records):
747
"""Read text records from data file and yield result.
749
Each passed record is a tuple of (version_id, pos, len) and
750
will be read in the given order. Yields (version_id,
754
class ContinuousRange:
755
def __init__(self, rec_id, pos, size):
757
self.end_pos = pos + size
758
self.versions = [(rec_id, pos, size)]
760
def add(self, rec_id, pos, size):
761
if self.end_pos != pos:
763
self.end_pos = pos + size
764
self.versions.append((rec_id, pos, size))
768
for rec_id, pos, size in self.versions:
769
yield rec_id, fp.read(size)
771
fp = self._open_file()
773
# Loop through all records and try to collect as large
774
# continuous region as possible to read.
776
record_id, pos, size = records.pop(0)
777
continuous_range = ContinuousRange(record_id, pos, size)
779
record_id, pos, size = records[0]
780
if continuous_range.add(record_id, pos, size):
784
fp.seek(continuous_range.start_pos, 0)
785
for record_id, data in continuous_range.split(fp):
786
content, digest = self._parse_record(record_id, data)
787
yield record_id, content, digest
791
def read_records(self, records):
792
"""Read records into a dictionary."""
794
for record_id, content, digest in self.read_records_iter(records):
795
components[record_id] = (content, digest)
799
class InterKnit(InterVersionedFile):
800
"""Optimised code paths for knit to knit operations."""
802
_matching_file_factory = staticmethod(AnnotatedKnitFactory)
805
def is_compatible(source, target):
806
"""Be compatible with knits. """
808
return (isinstance(source, KnitVersionedFile) and
809
isinstance(target, KnitVersionedFile))
810
except AttributeError:
813
def join(self, pb=None, msg=None, version_ids=None):
814
"""See InterVersionedFile.join."""
815
assert isinstance(self.source, KnitVersionedFile)
816
assert isinstance(self.target, KnitVersionedFile)
818
if version_ids is None:
819
version_ids = self.source.versions()
824
from bzrlib.progress import DummyProgress
827
version_ids = list(version_ids)
828
if None in version_ids:
829
version_ids.remove(None)
831
self.source_ancestry = set(self.source.get_ancestry(version_ids))
832
this_versions = set(self.target._index.get_versions())
833
needed_versions = self.source_ancestry - this_versions
834
cross_check_versions = self.source_ancestry.intersection(this_versions)
835
mismatched_versions = set()
836
for version in cross_check_versions:
837
# scan to include needed parents.
838
n1 = set(self.target.get_parents(version))
839
n2 = set(self.source.get_parents(version))
841
# FIXME TEST this check for cycles being introduced works
842
# the logic is we have a cycle if in our graph we are an
843
# ancestor of any of the n2 revisions.
849
parent_ancestors = self.source.get_ancestry(parent)
850
if version in parent_ancestors:
851
raise errors.GraphCycleError([parent, version])
852
# ensure this parent will be available later.
853
new_parents = n2.difference(n1)
854
needed_versions.update(new_parents.difference(this_versions))
855
mismatched_versions.add(version)
857
if not needed_versions and not cross_check_versions:
859
full_list = topo_sort(self.source._index.get_graph())
861
version_list = [i for i in full_list if (not self.target.has_version(i)
862
and i in needed_versions)]
865
for version_id in version_list:
866
data_pos, data_size = self.source._index.get_position(version_id)
867
records.append((version_id, data_pos, data_size))
870
for version_id, lines, digest \
871
in self.source._data.read_records_iter(records):
872
options = self.source._index.get_options(version_id)
873
parents = self.source._index.get_parents(version_id)
875
for parent in parents:
876
assert self.target.has_version(parent)
878
if self.target.factory.annotated:
879
# FIXME jrydberg: it should be possible to skip
880
# re-annotating components if we know that we are
881
# going to pull all revisions in the same order.
882
new_version_id = version_id
883
new_version_idx = self.target._index.num_versions()
884
if 'fulltext' in options:
885
lines = self.target._reannotate_fulltext(self.source, lines,
886
new_version_id, new_version_idx)
887
elif 'line-delta' in options:
888
lines = self.target._reannotate_line_delta(self.source, lines,
889
new_version_id, new_version_idx)
892
pb.update(self.target.filename, count, len(version_list))
894
pos, size = self.target._data.add_record(version_id, digest, lines)
895
self.target._index.add_version(version_id, options, pos, size, parents)
897
for version in mismatched_versions:
898
n1 = set(self.target.get_parents(version))
899
n2 = set(self.source.get_parents(version))
900
# write a combined record to our history.
901
new_parents = self.target.get_parents(version) + list(n2.difference(n1))
902
current_values = self.target._index._cache[version]
903
self.target._index.add_version(version,
912
InterVersionedFile.register_optimiser(InterKnit)