~bzr-pqm/bzr/bzr.dev

5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
1
# Copyright (C) 2007-2011 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Knit-based pack repository formats."""
18
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
21
from itertools import izip
22
import time
23
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
24
from bzrlib import (
25
    bzrdir,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
26
    debug,
27
    errors,
5757.3.4 by Jelmer Vernooij
Fix some imports.
28
    knit,
29
    osutils,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
30
    pack,
5757.3.2 by Jelmer Vernooij
Fix import.
31
    revision as _mod_revision,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
32
    trace,
5757.3.4 by Jelmer Vernooij
Fix some imports.
33
    tsort,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
34
    ui,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
35
    xml5,
36
    xml6,
37
    xml7,
38
    )
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
39
from bzrlib.knit import (
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
40
    _KnitGraphIndex,
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
41
    KnitPlainFactory,
42
    KnitVersionedFiles,
43
    )
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
44
""")
45
46
from bzrlib import (
47
    btree_index,
48
    )
49
from bzrlib.index import (
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
50
    CombinedGraphIndex,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
51
    GraphIndex,
5757.3.4 by Jelmer Vernooij
Fix some imports.
52
    GraphIndexPrefixAdapter,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
53
    InMemoryGraphIndex,
54
    )
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
55
from bzrlib.repofmt.knitrepo import (
56
    KnitRepository,
57
    )
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
58
from bzrlib.repofmt.pack_repo import (
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
59
    NewPack,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
60
    RepositoryFormatPack,
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
61
    ResumedPack,
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
62
    Packer,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
63
    PackCommitBuilder,
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
64
    PackRepository,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
65
    PackRootCommitBuilder,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
66
    RepositoryPackCollection,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
67
    )
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
68
from bzrlib.repository import (
69
    StreamSource,
70
    )
71
72
5757.6.11 by Jelmer Vernooij
Fix space issue.
73
class KnitPackRepository(PackRepository, KnitRepository):
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
74
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
75
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
76
        _serializer):
5757.6.1 by Jelmer Vernooij
Don't make PackRepository derive from KnitRepository.
77
        PackRepository.__init__(self, _format, a_bzrdir, control_files,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
78
            _commit_builder_class, _serializer)
79
        if self._format.supports_chks:
80
            raise AssertionError("chk not supported")
81
        index_transport = self._transport.clone('indices')
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
82
        self._pack_collection = KnitRepositoryPackCollection(self,
83
            self._transport,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
84
            index_transport,
85
            self._transport.clone('upload'),
86
            self._transport.clone('packs'),
87
            _format.index_builder_class,
88
            _format.index_class,
5757.7.7 by Jelmer Vernooij
Drop chk support in knitpackrepo - was unused anyway, as we explicitly checked that the format didn't support chks.
89
            use_chk_index=False,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
90
            )
91
        self.inventories = KnitVersionedFiles(
92
            _KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
93
                add_callback=self._pack_collection.inventory_index.add_callback,
94
                deltas=True, parents=True, is_locked=self.is_locked),
95
            data_access=self._pack_collection.inventory_index.data_access,
96
            max_delta_chain=200)
97
        self.revisions = KnitVersionedFiles(
98
            _KnitGraphIndex(self._pack_collection.revision_index.combined_index,
99
                add_callback=self._pack_collection.revision_index.add_callback,
100
                deltas=False, parents=True, is_locked=self.is_locked,
101
                track_external_parent_refs=True),
102
            data_access=self._pack_collection.revision_index.data_access,
103
            max_delta_chain=0)
104
        self.signatures = KnitVersionedFiles(
105
            _KnitGraphIndex(self._pack_collection.signature_index.combined_index,
106
                add_callback=self._pack_collection.signature_index.add_callback,
107
                deltas=False, parents=False, is_locked=self.is_locked),
108
            data_access=self._pack_collection.signature_index.data_access,
109
            max_delta_chain=0)
110
        self.texts = KnitVersionedFiles(
111
            _KnitGraphIndex(self._pack_collection.text_index.combined_index,
112
                add_callback=self._pack_collection.text_index.add_callback,
113
                deltas=True, parents=True, is_locked=self.is_locked),
114
            data_access=self._pack_collection.text_index.data_access,
115
            max_delta_chain=200)
116
        self.chk_bytes = None
117
        # True when the repository object is 'write locked' (as opposed to the
118
        # physical lock only taken out around changes to the pack-names list.)
119
        # Another way to represent this would be a decorator around the control
120
        # files object that presents logical locks as physical ones - if this
121
        # gets ugly consider that alternative design. RBC 20071011
122
        self._write_lock_count = 0
123
        self._transaction = None
124
        # for tests
125
        self._reconcile_does_inventory_gc = True
126
        self._reconcile_fixes_text_parents = True
127
        self._reconcile_backsup_inventory = False
128
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
129
    def _get_source(self, to_format):
130
        if to_format.network_name() == self._format.network_name():
131
            return KnitPackStreamSource(self, to_format)
132
        return PackRepository._get_source(self, to_format)
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
133
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
134
    def _reconcile_pack(self, collection, packs, extension, revs, pb):
135
        packer = KnitReconcilePacker(collection, packs, extension, revs)
136
        return packer.pack(pb)
137
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
138
139
class RepositoryFormatKnitPack1(RepositoryFormatPack):
140
    """A no-subtrees parameterized Pack repository.
141
142
    This format was introduced in 0.92.
143
    """
144
145
    repository_class = KnitPackRepository
146
    _commit_builder_class = PackCommitBuilder
147
    @property
148
    def _serializer(self):
149
        return xml5.serializer_v5
150
    # What index classes to use
151
    index_builder_class = InMemoryGraphIndex
152
    index_class = GraphIndex
153
154
    def _get_matching_bzrdir(self):
155
        return bzrdir.format_registry.make_bzrdir('pack-0.92')
156
157
    def _ignore_setting_bzrdir(self, format):
158
        pass
159
160
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
161
162
    def get_format_string(self):
163
        """See RepositoryFormat.get_format_string()."""
164
        return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
165
166
    def get_format_description(self):
167
        """See RepositoryFormat.get_format_description()."""
168
        return "Packs containing knits without subtree support"
169
170
171
class RepositoryFormatKnitPack3(RepositoryFormatPack):
172
    """A subtrees parameterized Pack repository.
173
174
    This repository format uses the xml7 serializer to get:
175
     - support for recording full info about the tree root
176
     - support for recording tree-references
177
178
    This format was introduced in 0.92.
179
    """
180
181
    repository_class = KnitPackRepository
182
    _commit_builder_class = PackRootCommitBuilder
183
    rich_root_data = True
184
    experimental = True
185
    supports_tree_reference = True
186
    @property
187
    def _serializer(self):
188
        return xml7.serializer_v7
189
    # What index classes to use
190
    index_builder_class = InMemoryGraphIndex
191
    index_class = GraphIndex
192
193
    def _get_matching_bzrdir(self):
194
        return bzrdir.format_registry.make_bzrdir(
195
            'pack-0.92-subtree')
196
197
    def _ignore_setting_bzrdir(self, format):
198
        pass
199
200
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
201
202
    def get_format_string(self):
203
        """See RepositoryFormat.get_format_string()."""
204
        return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
205
206
    def get_format_description(self):
207
        """See RepositoryFormat.get_format_description()."""
208
        return "Packs containing knits with subtree support\n"
209
210
211
class RepositoryFormatKnitPack4(RepositoryFormatPack):
212
    """A rich-root, no subtrees parameterized Pack repository.
213
214
    This repository format uses the xml6 serializer to get:
215
     - support for recording full info about the tree root
216
217
    This format was introduced in 1.0.
218
    """
219
220
    repository_class = KnitPackRepository
221
    _commit_builder_class = PackRootCommitBuilder
222
    rich_root_data = True
223
    supports_tree_reference = False
224
    @property
225
    def _serializer(self):
226
        return xml6.serializer_v6
227
    # What index classes to use
228
    index_builder_class = InMemoryGraphIndex
229
    index_class = GraphIndex
230
231
    def _get_matching_bzrdir(self):
232
        return bzrdir.format_registry.make_bzrdir(
233
            'rich-root-pack')
234
235
    def _ignore_setting_bzrdir(self, format):
236
        pass
237
238
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
239
240
    def get_format_string(self):
241
        """See RepositoryFormat.get_format_string()."""
242
        return ("Bazaar pack repository format 1 with rich root"
243
                " (needs bzr 1.0)\n")
244
245
    def get_format_description(self):
246
        """See RepositoryFormat.get_format_description()."""
247
        return "Packs containing knits with rich root support\n"
248
249
250
class RepositoryFormatKnitPack5(RepositoryFormatPack):
251
    """Repository that supports external references to allow stacking.
252
253
    New in release 1.6.
254
255
    Supports external lookups, which results in non-truncated ghosts after
256
    reconcile compared to pack-0.92 formats.
257
    """
258
259
    repository_class = KnitPackRepository
260
    _commit_builder_class = PackCommitBuilder
261
    supports_external_lookups = True
262
    # What index classes to use
263
    index_builder_class = InMemoryGraphIndex
264
    index_class = GraphIndex
265
266
    @property
267
    def _serializer(self):
268
        return xml5.serializer_v5
269
270
    def _get_matching_bzrdir(self):
271
        return bzrdir.format_registry.make_bzrdir('1.6')
272
273
    def _ignore_setting_bzrdir(self, format):
274
        pass
275
276
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
277
278
    def get_format_string(self):
279
        """See RepositoryFormat.get_format_string()."""
280
        return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
281
282
    def get_format_description(self):
283
        """See RepositoryFormat.get_format_description()."""
284
        return "Packs 5 (adds stacking support, requires bzr 1.6)"
285
286
287
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
288
    """A repository with rich roots and stacking.
289
290
    New in release 1.6.1.
291
292
    Supports stacking on other repositories, allowing data to be accessed
293
    without being stored locally.
294
    """
295
296
    repository_class = KnitPackRepository
297
    _commit_builder_class = PackRootCommitBuilder
298
    rich_root_data = True
299
    supports_tree_reference = False # no subtrees
300
    supports_external_lookups = True
301
    # What index classes to use
302
    index_builder_class = InMemoryGraphIndex
303
    index_class = GraphIndex
304
305
    @property
306
    def _serializer(self):
307
        return xml6.serializer_v6
308
309
    def _get_matching_bzrdir(self):
310
        return bzrdir.format_registry.make_bzrdir(
311
            '1.6.1-rich-root')
312
313
    def _ignore_setting_bzrdir(self, format):
314
        pass
315
316
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
317
318
    def get_format_string(self):
319
        """See RepositoryFormat.get_format_string()."""
320
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
321
322
    def get_format_description(self):
323
        return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
324
325
326
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
327
    """A repository with rich roots and external references.
328
329
    New in release 1.6.
330
331
    Supports external lookups, which results in non-truncated ghosts after
332
    reconcile compared to pack-0.92 formats.
333
334
    This format was deprecated because the serializer it uses accidentally
335
    supported subtrees, when the format was not intended to. This meant that
336
    someone could accidentally fetch from an incorrect repository.
337
    """
338
339
    repository_class = KnitPackRepository
340
    _commit_builder_class = PackRootCommitBuilder
341
    rich_root_data = True
342
    supports_tree_reference = False # no subtrees
343
344
    supports_external_lookups = True
345
    # What index classes to use
346
    index_builder_class = InMemoryGraphIndex
347
    index_class = GraphIndex
348
349
    @property
350
    def _serializer(self):
351
        return xml7.serializer_v7
352
353
    def _get_matching_bzrdir(self):
354
        matching = bzrdir.format_registry.make_bzrdir(
355
            '1.6.1-rich-root')
356
        matching.repository_format = self
357
        return matching
358
359
    def _ignore_setting_bzrdir(self, format):
360
        pass
361
362
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
363
364
    def get_format_string(self):
365
        """See RepositoryFormat.get_format_string()."""
366
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
367
368
    def get_format_description(self):
369
        return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
370
                " (deprecated)")
371
372
    def is_deprecated(self):
373
        return True
374
375
376
class RepositoryFormatKnitPack6(RepositoryFormatPack):
377
    """A repository with stacking and btree indexes,
378
    without rich roots or subtrees.
379
380
    This is equivalent to pack-1.6 with B+Tree indices.
381
    """
382
383
    repository_class = KnitPackRepository
384
    _commit_builder_class = PackCommitBuilder
385
    supports_external_lookups = True
386
    # What index classes to use
387
    index_builder_class = btree_index.BTreeBuilder
388
    index_class = btree_index.BTreeGraphIndex
389
390
    @property
391
    def _serializer(self):
392
        return xml5.serializer_v5
393
394
    def _get_matching_bzrdir(self):
395
        return bzrdir.format_registry.make_bzrdir('1.9')
396
397
    def _ignore_setting_bzrdir(self, format):
398
        pass
399
400
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
401
402
    def get_format_string(self):
403
        """See RepositoryFormat.get_format_string()."""
404
        return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
405
406
    def get_format_description(self):
407
        """See RepositoryFormat.get_format_description()."""
408
        return "Packs 6 (uses btree indexes, requires bzr 1.9)"
409
410
411
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
412
    """A repository with rich roots, no subtrees, stacking and btree indexes.
413
414
    1.6-rich-root with B+Tree indices.
415
    """
416
417
    repository_class = KnitPackRepository
418
    _commit_builder_class = PackRootCommitBuilder
419
    rich_root_data = True
420
    supports_tree_reference = False # no subtrees
421
    supports_external_lookups = True
422
    # What index classes to use
423
    index_builder_class = btree_index.BTreeBuilder
424
    index_class = btree_index.BTreeGraphIndex
425
426
    @property
427
    def _serializer(self):
428
        return xml6.serializer_v6
429
430
    def _get_matching_bzrdir(self):
431
        return bzrdir.format_registry.make_bzrdir(
432
            '1.9-rich-root')
433
434
    def _ignore_setting_bzrdir(self, format):
435
        pass
436
437
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
438
439
    def get_format_string(self):
440
        """See RepositoryFormat.get_format_string()."""
441
        return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
442
443
    def get_format_description(self):
444
        return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
445
446
447
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
448
    """A subtrees development repository.
449
450
    This format should be retained in 2.3, to provide an upgrade path from this
451
    to RepositoryFormat2aSubtree.  It can be removed in later releases.
452
453
    1.6.1-subtree[as it might have been] with B+Tree indices.
454
    """
455
456
    repository_class = KnitPackRepository
457
    _commit_builder_class = PackRootCommitBuilder
458
    rich_root_data = True
459
    experimental = True
460
    supports_tree_reference = True
461
    supports_external_lookups = True
462
    # What index classes to use
463
    index_builder_class = btree_index.BTreeBuilder
464
    index_class = btree_index.BTreeGraphIndex
465
466
    @property
467
    def _serializer(self):
468
        return xml7.serializer_v7
469
470
    def _get_matching_bzrdir(self):
471
        return bzrdir.format_registry.make_bzrdir(
472
            'development5-subtree')
473
474
    def _ignore_setting_bzrdir(self, format):
475
        pass
476
477
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
478
479
    def get_format_string(self):
480
        """See RepositoryFormat.get_format_string()."""
481
        return ("Bazaar development format 2 with subtree support "
482
            "(needs bzr.dev from before 1.8)\n")
483
484
    def get_format_description(self):
485
        """See RepositoryFormat.get_format_description()."""
486
        return ("Development repository format, currently the same as "
487
            "1.6.1-subtree with B+Tree indices.\n")
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
488
489
490
class KnitPackStreamSource(StreamSource):
491
    """A StreamSource used to transfer data between same-format KnitPack repos.
492
493
    This source assumes:
494
        1) Same serialization format for all objects
495
        2) Same root information
496
        3) XML format inventories
497
        4) Atomic inserts (so we can stream inventory texts before text
498
           content)
499
        5) No chk_bytes
500
    """
501
502
    def __init__(self, from_repository, to_format):
503
        super(KnitPackStreamSource, self).__init__(from_repository, to_format)
504
        self._text_keys = None
505
        self._text_fetch_order = 'unordered'
506
507
    def _get_filtered_inv_stream(self, revision_ids):
508
        from_repo = self.from_repository
509
        parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
510
        parent_keys = [(p,) for p in parent_ids]
511
        find_text_keys = from_repo._serializer._find_text_key_references
512
        parent_text_keys = set(find_text_keys(
513
            from_repo._inventory_xml_lines_for_keys(parent_keys)))
514
        content_text_keys = set()
515
        knit = KnitVersionedFiles(None, None)
516
        factory = KnitPlainFactory()
517
        def find_text_keys_from_content(record):
518
            if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
519
                raise ValueError("Unknown content storage kind for"
520
                    " inventory text: %s" % (record.storage_kind,))
521
            # It's a knit record, it has a _raw_record field (even if it was
522
            # reconstituted from a network stream).
523
            raw_data = record._raw_record
524
            # read the entire thing
525
            revision_id = record.key[-1]
526
            content, _ = knit._parse_record(revision_id, raw_data)
527
            if record.storage_kind == 'knit-delta-gz':
528
                line_iterator = factory.get_linedelta_content(content)
529
            elif record.storage_kind == 'knit-ft-gz':
530
                line_iterator = factory.get_fulltext_content(content)
531
            content_text_keys.update(find_text_keys(
532
                [(line, revision_id) for line in line_iterator]))
533
        revision_keys = [(r,) for r in revision_ids]
534
        def _filtered_inv_stream():
535
            source_vf = from_repo.inventories
536
            stream = source_vf.get_record_stream(revision_keys,
537
                                                 'unordered', False)
538
            for record in stream:
539
                if record.storage_kind == 'absent':
540
                    raise errors.NoSuchRevision(from_repo, record.key)
541
                find_text_keys_from_content(record)
542
                yield record
543
            self._text_keys = content_text_keys - parent_text_keys
544
        return ('inventories', _filtered_inv_stream())
545
546
    def _get_text_stream(self):
547
        # Note: We know we don't have to handle adding root keys, because both
548
        # the source and target are the identical network name.
549
        text_stream = self.from_repository.texts.get_record_stream(
550
                        self._text_keys, self._text_fetch_order, False)
551
        return ('texts', text_stream)
552
553
    def get_stream(self, search):
554
        revision_ids = search.get_keys()
555
        for stream_info in self._fetch_revision_texts(revision_ids):
556
            yield stream_info
557
        self._revision_keys = [(rev_id,) for rev_id in revision_ids]
558
        yield self._get_filtered_inv_stream(revision_ids)
559
        yield self._get_text_stream()
560
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
561
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
562
class KnitPacker(Packer):
563
    """Packer that works with knit packs."""
564
565
    def __init__(self, pack_collection, packs, suffix, revision_ids=None,
566
                 reload_func=None):
567
        super(KnitPacker, self).__init__(pack_collection, packs, suffix,
568
                                          revision_ids=revision_ids,
569
                                          reload_func=reload_func)
570
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
571
    def _pack_map_and_index_list(self, index_attribute):
572
        """Convert a list of packs to an index pack map and index list.
573
574
        :param index_attribute: The attribute that the desired index is found
575
            on.
576
        :return: A tuple (map, list) where map contains the dict from
577
            index:pack_tuple, and list contains the indices in the preferred
578
            access order.
579
        """
580
        indices = []
581
        pack_map = {}
582
        for pack_obj in self.packs:
583
            index = getattr(pack_obj, index_attribute)
584
            indices.append(index)
585
            pack_map[index] = pack_obj
586
        return pack_map, indices
587
588
    def _index_contents(self, indices, key_filter=None):
589
        """Get an iterable of the index contents from a pack_map.
590
591
        :param indices: The list of indices to query
592
        :param key_filter: An optional filter to limit the keys returned.
593
        """
594
        all_index = CombinedGraphIndex(indices)
595
        if key_filter is None:
596
            return all_index.iter_all_entries()
597
        else:
598
            return all_index.iter_entries(key_filter)
599
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
600
    def _copy_nodes(self, nodes, index_map, writer, write_index,
601
        output_lines=None):
602
        """Copy knit nodes between packs with no graph references.
603
604
        :param output_lines: Output full texts of copied items.
605
        """
606
        pb = ui.ui_factory.nested_progress_bar()
607
        try:
608
            return self._do_copy_nodes(nodes, index_map, writer,
609
                write_index, pb, output_lines=output_lines)
610
        finally:
611
            pb.finished()
612
613
    def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
614
        output_lines=None):
615
        # for record verification
616
        knit = KnitVersionedFiles(None, None)
617
        # plan a readv on each source pack:
618
        # group by pack
619
        nodes = sorted(nodes)
620
        # how to map this into knit.py - or knit.py into this?
621
        # we don't want the typical knit logic, we want grouping by pack
622
        # at this point - perhaps a helper library for the following code
623
        # duplication points?
624
        request_groups = {}
625
        for index, key, value in nodes:
626
            if index not in request_groups:
627
                request_groups[index] = []
628
            request_groups[index].append((key, value))
629
        record_index = 0
630
        pb.update("Copied record", record_index, len(nodes))
631
        for index, items in request_groups.iteritems():
632
            pack_readv_requests = []
633
            for key, value in items:
634
                # ---- KnitGraphIndex.get_position
635
                bits = value[1:].split(' ')
636
                offset, length = int(bits[0]), int(bits[1])
637
                pack_readv_requests.append((offset, length, (key, value[0])))
638
            # linear scan up the pack
639
            pack_readv_requests.sort()
640
            # copy the data
641
            pack_obj = index_map[index]
642
            transport, path = pack_obj.access_tuple()
643
            try:
644
                reader = pack.make_readv_reader(transport, path,
645
                    [offset[0:2] for offset in pack_readv_requests])
646
            except errors.NoSuchFile:
647
                if self._reload_func is not None:
648
                    self._reload_func()
649
                raise
650
            for (names, read_func), (_1, _2, (key, eol_flag)) in \
651
                izip(reader.iter_records(), pack_readv_requests):
652
                raw_data = read_func(None)
653
                # check the header only
654
                if output_lines is not None:
655
                    output_lines(knit._parse_record(key[-1], raw_data)[0])
656
                else:
657
                    df, _ = knit._parse_record_header(key, raw_data)
658
                    df.close()
659
                pos, size = writer.add_bytes_record(raw_data, names)
660
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
661
                pb.update("Copied record", record_index)
662
                record_index += 1
663
664
    def _copy_nodes_graph(self, index_map, writer, write_index,
665
        readv_group_iter, total_items, output_lines=False):
666
        """Copy knit nodes between packs.
667
668
        :param output_lines: Return lines present in the copied data as
669
            an iterator of line,version_id.
670
        """
671
        pb = ui.ui_factory.nested_progress_bar()
672
        try:
673
            for result in self._do_copy_nodes_graph(index_map, writer,
674
                write_index, output_lines, pb, readv_group_iter, total_items):
675
                yield result
676
        except Exception:
677
            # Python 2.4 does not permit try:finally: in a generator.
678
            pb.finished()
679
            raise
680
        else:
681
            pb.finished()
682
683
    def _do_copy_nodes_graph(self, index_map, writer, write_index,
684
        output_lines, pb, readv_group_iter, total_items):
685
        # for record verification
686
        knit = KnitVersionedFiles(None, None)
687
        # for line extraction when requested (inventories only)
688
        if output_lines:
689
            factory = KnitPlainFactory()
690
        record_index = 0
691
        pb.update("Copied record", record_index, total_items)
692
        for index, readv_vector, node_vector in readv_group_iter:
693
            # copy the data
694
            pack_obj = index_map[index]
695
            transport, path = pack_obj.access_tuple()
696
            try:
697
                reader = pack.make_readv_reader(transport, path, readv_vector)
698
            except errors.NoSuchFile:
699
                if self._reload_func is not None:
700
                    self._reload_func()
701
                raise
702
            for (names, read_func), (key, eol_flag, references) in \
703
                izip(reader.iter_records(), node_vector):
704
                raw_data = read_func(None)
705
                if output_lines:
706
                    # read the entire thing
707
                    content, _ = knit._parse_record(key[-1], raw_data)
708
                    if len(references[-1]) == 0:
709
                        line_iterator = factory.get_fulltext_content(content)
710
                    else:
711
                        line_iterator = factory.get_linedelta_content(content)
712
                    for line in line_iterator:
713
                        yield line, key
714
                else:
715
                    # check the header only
716
                    df, _ = knit._parse_record_header(key, raw_data)
717
                    df.close()
718
                pos, size = writer.add_bytes_record(raw_data, names)
719
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
720
                pb.update("Copied record", record_index)
721
                record_index += 1
722
723
    def _process_inventory_lines(self, inv_lines):
724
        """Use up the inv_lines generator and setup a text key filter."""
725
        repo = self._pack_collection.repo
726
        fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
727
            inv_lines, self.revision_keys)
728
        text_filter = []
729
        for fileid, file_revids in fileid_revisions.iteritems():
730
            text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
731
        self._text_filter = text_filter
732
733
    def _copy_inventory_texts(self):
734
        # select inventory keys
735
        inv_keys = self._revision_keys # currently the same keyspace, and note that
736
        # querying for keys here could introduce a bug where an inventory item
737
        # is missed, so do not change it to query separately without cross
738
        # checking like the text key check below.
739
        inventory_index_map, inventory_indices = self._pack_map_and_index_list(
740
            'inventory_index')
741
        inv_nodes = self._index_contents(inventory_indices, inv_keys)
742
        # copy inventory keys and adjust values
743
        # XXX: Should be a helper function to allow different inv representation
744
        # at this point.
745
        self.pb.update("Copying inventory texts", 2)
746
        total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
747
        # Only grab the output lines if we will be processing them
748
        output_lines = bool(self.revision_ids)
749
        inv_lines = self._copy_nodes_graph(inventory_index_map,
750
            self.new_pack._writer, self.new_pack.inventory_index,
751
            readv_group_iter, total_items, output_lines=output_lines)
752
        if self.revision_ids:
753
            self._process_inventory_lines(inv_lines)
754
        else:
755
            # eat the iterator to cause it to execute.
756
            list(inv_lines)
757
            self._text_filter = None
758
        if 'pack' in debug.debug_flags:
759
            trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
760
                time.ctime(), self._pack_collection._upload_transport.base,
761
                self.new_pack.random_name,
762
                self.new_pack.inventory_index.key_count(),
763
                time.time() - self.new_pack.start_time)
764
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
765
    def _update_pack_order(self, entries, index_to_pack_map):
766
        """Determine how we want our packs to be ordered.
767
768
        This changes the sort order of the self.packs list so that packs unused
769
        by 'entries' will be at the end of the list, so that future requests
770
        can avoid probing them.  Used packs will be at the front of the
771
        self.packs list, in the order of their first use in 'entries'.
772
773
        :param entries: A list of (index, ...) tuples
774
        :param index_to_pack_map: A mapping from index objects to pack objects.
775
        """
776
        packs = []
777
        seen_indexes = set()
778
        for entry in entries:
779
            index = entry[0]
780
            if index not in seen_indexes:
781
                packs.append(index_to_pack_map[index])
782
                seen_indexes.add(index)
783
        if len(packs) == len(self.packs):
784
            if 'pack' in debug.debug_flags:
785
                trace.mutter('Not changing pack list, all packs used.')
786
            return
787
        seen_packs = set(packs)
788
        for pack in self.packs:
789
            if pack not in seen_packs:
790
                packs.append(pack)
791
                seen_packs.add(pack)
792
        if 'pack' in debug.debug_flags:
793
            old_names = [p.access_tuple()[1] for p in self.packs]
794
            new_names = [p.access_tuple()[1] for p in packs]
795
            trace.mutter('Reordering packs\nfrom: %s\n  to: %s',
796
                   old_names, new_names)
797
        self.packs = packs
798
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
799
    def _copy_revision_texts(self):
800
        # select revisions
801
        if self.revision_ids:
802
            revision_keys = [(revision_id,) for revision_id in self.revision_ids]
803
        else:
804
            revision_keys = None
805
        # select revision keys
806
        revision_index_map, revision_indices = self._pack_map_and_index_list(
807
            'revision_index')
808
        revision_nodes = self._index_contents(revision_indices, revision_keys)
809
        revision_nodes = list(revision_nodes)
810
        self._update_pack_order(revision_nodes, revision_index_map)
811
        # copy revision keys and adjust values
812
        self.pb.update("Copying revision texts", 1)
813
        total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
814
        list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
815
            self.new_pack.revision_index, readv_group_iter, total_items))
816
        if 'pack' in debug.debug_flags:
817
            trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
818
                time.ctime(), self._pack_collection._upload_transport.base,
819
                self.new_pack.random_name,
820
                self.new_pack.revision_index.key_count(),
821
                time.time() - self.new_pack.start_time)
822
        self._revision_keys = revision_keys
823
824
    def _get_text_nodes(self):
825
        text_index_map, text_indices = self._pack_map_and_index_list(
826
            'text_index')
827
        return text_index_map, self._index_contents(text_indices,
828
            self._text_filter)
829
830
    def _copy_text_texts(self):
831
        # select text keys
832
        text_index_map, text_nodes = self._get_text_nodes()
833
        if self._text_filter is not None:
834
            # We could return the keys copied as part of the return value from
835
            # _copy_nodes_graph but this doesn't work all that well with the
836
            # need to get line output too, so we check separately, and as we're
837
            # going to buffer everything anyway, we check beforehand, which
838
            # saves reading knit data over the wire when we know there are
839
            # mising records.
840
            text_nodes = set(text_nodes)
841
            present_text_keys = set(_node[1] for _node in text_nodes)
842
            missing_text_keys = set(self._text_filter) - present_text_keys
843
            if missing_text_keys:
844
                # TODO: raise a specific error that can handle many missing
845
                # keys.
846
                trace.mutter("missing keys during fetch: %r", missing_text_keys)
847
                a_missing_key = missing_text_keys.pop()
848
                raise errors.RevisionNotPresent(a_missing_key[1],
849
                    a_missing_key[0])
850
        # copy text keys and adjust values
851
        self.pb.update("Copying content texts", 3)
852
        total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
853
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
854
            self.new_pack.text_index, readv_group_iter, total_items))
855
        self._log_copied_texts()
856
857
    def _create_pack_from_packs(self):
858
        self.pb.update("Opening pack", 0, 5)
859
        self.new_pack = self.open_pack()
860
        new_pack = self.new_pack
861
        # buffer data - we won't be reading-back during the pack creation and
862
        # this makes a significant difference on sftp pushes.
863
        new_pack.set_write_cache_size(1024*1024)
864
        if 'pack' in debug.debug_flags:
865
            plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
866
                for a_pack in self.packs]
867
            if self.revision_ids is not None:
868
                rev_count = len(self.revision_ids)
869
            else:
870
                rev_count = 'all'
871
            trace.mutter('%s: create_pack: creating pack from source packs: '
872
                '%s%s %s revisions wanted %s t=0',
873
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
874
                plain_pack_list, rev_count)
875
        self._copy_revision_texts()
876
        self._copy_inventory_texts()
877
        self._copy_text_texts()
878
        # select signature keys
879
        signature_filter = self._revision_keys # same keyspace
880
        signature_index_map, signature_indices = self._pack_map_and_index_list(
881
            'signature_index')
882
        signature_nodes = self._index_contents(signature_indices,
883
            signature_filter)
884
        # copy signature keys and adjust values
885
        self.pb.update("Copying signature texts", 4)
886
        self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
887
            new_pack.signature_index)
888
        if 'pack' in debug.debug_flags:
889
            trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
890
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
891
                new_pack.signature_index.key_count(),
892
                time.time() - new_pack.start_time)
893
        new_pack._check_references()
894
        if not self._use_pack(new_pack):
895
            new_pack.abort()
896
            return None
897
        self.pb.update("Finishing pack", 5)
898
        new_pack.finish()
899
        self._pack_collection.allocate(new_pack)
900
        return new_pack
901
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
902
    def _least_readv_node_readv(self, nodes):
903
        """Generate request groups for nodes using the least readv's.
904
905
        :param nodes: An iterable of graph index nodes.
906
        :return: Total node count and an iterator of the data needed to perform
907
            readvs to obtain the data for nodes. Each item yielded by the
908
            iterator is a tuple with:
909
            index, readv_vector, node_vector. readv_vector is a list ready to
910
            hand to the transport readv method, and node_vector is a list of
911
            (key, eol_flag, references) for the node retrieved by the
912
            matching readv_vector.
913
        """
914
        # group by pack so we do one readv per pack
915
        nodes = sorted(nodes)
916
        total = len(nodes)
917
        request_groups = {}
918
        for index, key, value, references in nodes:
919
            if index not in request_groups:
920
                request_groups[index] = []
921
            request_groups[index].append((key, value, references))
922
        result = []
923
        for index, items in request_groups.iteritems():
924
            pack_readv_requests = []
925
            for key, value, references in items:
926
                # ---- KnitGraphIndex.get_position
927
                bits = value[1:].split(' ')
928
                offset, length = int(bits[0]), int(bits[1])
929
                pack_readv_requests.append(
930
                    ((offset, length), (key, value[0], references)))
931
            # linear scan up the pack to maximum range combining.
932
            pack_readv_requests.sort()
933
            # split out the readv and the node data.
934
            pack_readv = [readv for readv, node in pack_readv_requests]
935
            node_vector = [node for readv, node in pack_readv_requests]
936
            result.append((index, pack_readv, node_vector))
937
        return total, result
938
939
    def _revision_node_readv(self, revision_nodes):
940
        """Return the total revisions and the readv's to issue.
941
942
        :param revision_nodes: The revision index contents for the packs being
943
            incorporated into the new pack.
944
        :return: As per _least_readv_node_readv.
945
        """
946
        return self._least_readv_node_readv(revision_nodes)
947
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
948
949
class KnitReconcilePacker(KnitPacker):
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
950
    """A packer which regenerates indices etc as it copies.
951
952
    This is used by ``bzr reconcile`` to cause parent text pointers to be
953
    regenerated.
954
    """
955
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
956
    def __init__(self, *args, **kwargs):
957
        super(KnitReconcilePacker, self).__init__(*args, **kwargs)
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
958
        self._data_changed = False
959
960
    def _process_inventory_lines(self, inv_lines):
961
        """Generate a text key reference map rather for reconciling with."""
962
        repo = self._pack_collection.repo
963
        refs = repo._serializer._find_text_key_references(inv_lines)
964
        self._text_refs = refs
965
        # during reconcile we:
966
        #  - convert unreferenced texts to full texts
967
        #  - correct texts which reference a text not copied to be full texts
968
        #  - copy all others as-is but with corrected parents.
969
        #  - so at this point we don't know enough to decide what becomes a full
970
        #    text.
971
        self._text_filter = None
972
973
    def _copy_text_texts(self):
974
        """generate what texts we should have and then copy."""
975
        self.pb.update("Copying content texts", 3)
976
        # we have three major tasks here:
977
        # 1) generate the ideal index
978
        repo = self._pack_collection.repo
979
        ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
980
            _1, key, _2, refs in
981
            self.new_pack.revision_index.iter_all_entries()])
982
        ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
983
        # 2) generate a text_nodes list that contains all the deltas that can
984
        #    be used as-is, with corrected parents.
985
        ok_nodes = []
986
        bad_texts = []
987
        discarded_nodes = []
988
        NULL_REVISION = _mod_revision.NULL_REVISION
989
        text_index_map, text_nodes = self._get_text_nodes()
990
        for node in text_nodes:
991
            # 0 - index
992
            # 1 - key
993
            # 2 - value
994
            # 3 - refs
995
            try:
996
                ideal_parents = tuple(ideal_index[node[1]])
997
            except KeyError:
998
                discarded_nodes.append(node)
999
                self._data_changed = True
1000
            else:
1001
                if ideal_parents == (NULL_REVISION,):
1002
                    ideal_parents = ()
1003
                if ideal_parents == node[3][0]:
1004
                    # no change needed.
1005
                    ok_nodes.append(node)
1006
                elif ideal_parents[0:1] == node[3][0][0:1]:
1007
                    # the left most parent is the same, or there are no parents
1008
                    # today. Either way, we can preserve the representation as
1009
                    # long as we change the refs to be inserted.
1010
                    self._data_changed = True
1011
                    ok_nodes.append((node[0], node[1], node[2],
1012
                        (ideal_parents, node[3][1])))
1013
                    self._data_changed = True
1014
                else:
1015
                    # Reinsert this text completely
1016
                    bad_texts.append((node[1], ideal_parents))
1017
                    self._data_changed = True
1018
        # we're finished with some data.
1019
        del ideal_index
1020
        del text_nodes
1021
        # 3) bulk copy the ok data
1022
        total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1023
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1024
            self.new_pack.text_index, readv_group_iter, total_items))
1025
        # 4) adhoc copy all the other texts.
1026
        # We have to topologically insert all texts otherwise we can fail to
1027
        # reconcile when parts of a single delta chain are preserved intact,
1028
        # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1029
        # reinserted, and if d3 has incorrect parents it will also be
1030
        # reinserted. If we insert d3 first, d2 is present (as it was bulk
1031
        # copied), so we will try to delta, but d2 is not currently able to be
1032
        # extracted because its basis d1 is not present. Topologically sorting
1033
        # addresses this. The following generates a sort for all the texts that
1034
        # are being inserted without having to reference the entire text key
1035
        # space (we only topo sort the revisions, which is smaller).
1036
        topo_order = tsort.topo_sort(ancestors)
1037
        rev_order = dict(zip(topo_order, range(len(topo_order))))
1038
        bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1039
        transaction = repo.get_transaction()
1040
        file_id_index = GraphIndexPrefixAdapter(
1041
            self.new_pack.text_index,
1042
            ('blank', ), 1,
1043
            add_nodes_callback=self.new_pack.text_index.add_nodes)
5757.3.4 by Jelmer Vernooij
Fix some imports.
1044
        data_access = knit._DirectPackAccess(
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
1045
                {self.new_pack.text_index:self.new_pack.access_tuple()})
1046
        data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1047
            self.new_pack.access_tuple())
1048
        output_texts = KnitVersionedFiles(
1049
            _KnitGraphIndex(self.new_pack.text_index,
1050
                add_callback=self.new_pack.text_index.add_nodes,
1051
                deltas=True, parents=True, is_locked=repo.is_locked),
1052
            data_access=data_access, max_delta_chain=200)
1053
        for key, parent_keys in bad_texts:
1054
            # We refer to the new pack to delta data being output.
1055
            # A possible improvement would be to catch errors on short reads
1056
            # and only flush then.
1057
            self.new_pack.flush()
1058
            parents = []
1059
            for parent_key in parent_keys:
1060
                if parent_key[0] != key[0]:
1061
                    # Graph parents must match the fileid
1062
                    raise errors.BzrError('Mismatched key parent %r:%r' %
1063
                        (key, parent_keys))
1064
                parents.append(parent_key[1])
1065
            text_lines = osutils.split_lines(repo.texts.get_record_stream(
1066
                [key], 'unordered', True).next().get_bytes_as('fulltext'))
1067
            output_texts.add_lines(key, parent_keys, text_lines,
1068
                random_id=True, check_content=False)
1069
        # 5) check that nothing inserted has a reference outside the keyspace.
1070
        missing_text_keys = self.new_pack.text_index._external_references()
1071
        if missing_text_keys:
1072
            raise errors.BzrCheckError('Reference to missing compression parents %r'
1073
                % (missing_text_keys,))
1074
        self._log_copied_texts()
1075
1076
    def _use_pack(self, new_pack):
1077
        """Override _use_pack to check for reconcile having changed content."""
1078
        # XXX: we might be better checking this at the copy time.
1079
        original_inventory_keys = set()
1080
        inv_index = self._pack_collection.inventory_index.combined_index
1081
        for entry in inv_index.iter_all_entries():
1082
            original_inventory_keys.add(entry[1])
1083
        new_inventory_keys = set()
1084
        for entry in new_pack.inventory_index.iter_all_entries():
1085
            new_inventory_keys.add(entry[1])
1086
        if new_inventory_keys != original_inventory_keys:
1087
            self._data_changed = True
1088
        return new_pack.data_inserted() and self._data_changed
1089
1090
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
1091
class OptimisingKnitPacker(KnitPacker):
1092
    """A packer which spends more time to create better disk layouts."""
1093
1094
    def _revision_node_readv(self, revision_nodes):
1095
        """Return the total revisions and the readv's to issue.
1096
1097
        This sort places revisions in topological order with the ancestors
1098
        after the children.
1099
1100
        :param revision_nodes: The revision index contents for the packs being
1101
            incorporated into the new pack.
1102
        :return: As per _least_readv_node_readv.
1103
        """
1104
        # build an ancestors dict
1105
        ancestors = {}
1106
        by_key = {}
1107
        for index, key, value, references in revision_nodes:
1108
            ancestors[key] = references[0]
1109
            by_key[key] = (index, value, references)
1110
        order = tsort.topo_sort(ancestors)
1111
        total = len(order)
1112
        # Single IO is pathological, but it will work as a starting point.
1113
        requests = []
1114
        for key in reversed(order):
1115
            index, value, references = by_key[key]
1116
            # ---- KnitGraphIndex.get_position
1117
            bits = value[1:].split(' ')
1118
            offset, length = int(bits[0]), int(bits[1])
1119
            requests.append(
1120
                (index, [(offset, length)], [(key, value[0], references)]))
1121
        # TODO: combine requests in the same index that are in ascending order.
1122
        return total, requests
1123
1124
    def open_pack(self):
1125
        """Open a pack for the pack we are creating."""
1126
        new_pack = super(OptimisingKnitPacker, self).open_pack()
1127
        # Turn on the optimization flags for all the index builders.
1128
        new_pack.revision_index.set_optimize(for_size=True)
1129
        new_pack.inventory_index.set_optimize(for_size=True)
1130
        new_pack.text_index.set_optimize(for_size=True)
1131
        new_pack.signature_index.set_optimize(for_size=True)
1132
        return new_pack
1133
1134
1135
class KnitRepositoryPackCollection(RepositoryPackCollection):
1136
    """A knit pack collection."""
1137
1138
    pack_factory = NewPack
1139
    resumed_pack_factory = ResumedPack
1140
    normal_packer_class = KnitPacker
1141
    optimising_packer_class = OptimisingKnitPacker
1142
1143
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
1144