~bzr-pqm/bzr/bzr.dev

5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
1
# Copyright (C) 2007-2011 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Knit-based pack repository formats."""
18
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
21
from itertools import izip
22
import time
23
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
24
from bzrlib import (
25
    bzrdir,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
26
    debug,
27
    errors,
5757.3.4 by Jelmer Vernooij
Fix some imports.
28
    knit,
29
    osutils,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
30
    pack,
5757.3.2 by Jelmer Vernooij
Fix import.
31
    revision as _mod_revision,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
32
    trace,
5757.3.4 by Jelmer Vernooij
Fix some imports.
33
    tsort,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
34
    ui,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
35
    xml5,
36
    xml6,
37
    xml7,
38
    )
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
39
from bzrlib.knit import (
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
40
    _KnitGraphIndex,
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
41
    KnitPlainFactory,
42
    KnitVersionedFiles,
43
    )
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
44
""")
45
46
from bzrlib import (
47
    btree_index,
48
    )
49
from bzrlib.index import (
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
50
    CombinedGraphIndex,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
51
    GraphIndex,
5757.3.4 by Jelmer Vernooij
Fix some imports.
52
    GraphIndexPrefixAdapter,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
53
    InMemoryGraphIndex,
54
    )
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
55
from bzrlib.repofmt.knitrepo import (
56
    KnitRepository,
57
    )
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
58
from bzrlib.repofmt.pack_repo import (
5757.8.7 by Jelmer Vernooij
Merge moving of _DirectPackAccess.
59
    _DirectPackAccess,
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
60
    NewPack,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
61
    RepositoryFormatPack,
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
62
    ResumedPack,
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
63
    Packer,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
64
    PackCommitBuilder,
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
65
    PackRepository,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
66
    PackRootCommitBuilder,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
67
    RepositoryPackCollection,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
68
    )
5815.4.1 by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository.
69
from bzrlib.vf_repository import (
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
70
    StreamSource,
71
    )
72
73
5757.6.11 by Jelmer Vernooij
Fix space issue.
74
class KnitPackRepository(PackRepository, KnitRepository):
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
75
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
76
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
77
        _serializer):
5757.6.1 by Jelmer Vernooij
Don't make PackRepository derive from KnitRepository.
78
        PackRepository.__init__(self, _format, a_bzrdir, control_files,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
79
            _commit_builder_class, _serializer)
80
        if self._format.supports_chks:
81
            raise AssertionError("chk not supported")
82
        index_transport = self._transport.clone('indices')
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
83
        self._pack_collection = KnitRepositoryPackCollection(self,
84
            self._transport,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
85
            index_transport,
86
            self._transport.clone('upload'),
87
            self._transport.clone('packs'),
88
            _format.index_builder_class,
89
            _format.index_class,
5757.7.7 by Jelmer Vernooij
Drop chk support in knitpackrepo - was unused anyway, as we explicitly checked that the format didn't support chks.
90
            use_chk_index=False,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
91
            )
92
        self.inventories = KnitVersionedFiles(
93
            _KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
94
                add_callback=self._pack_collection.inventory_index.add_callback,
95
                deltas=True, parents=True, is_locked=self.is_locked),
96
            data_access=self._pack_collection.inventory_index.data_access,
97
            max_delta_chain=200)
98
        self.revisions = KnitVersionedFiles(
99
            _KnitGraphIndex(self._pack_collection.revision_index.combined_index,
100
                add_callback=self._pack_collection.revision_index.add_callback,
101
                deltas=False, parents=True, is_locked=self.is_locked,
102
                track_external_parent_refs=True),
103
            data_access=self._pack_collection.revision_index.data_access,
104
            max_delta_chain=0)
105
        self.signatures = KnitVersionedFiles(
106
            _KnitGraphIndex(self._pack_collection.signature_index.combined_index,
107
                add_callback=self._pack_collection.signature_index.add_callback,
108
                deltas=False, parents=False, is_locked=self.is_locked),
109
            data_access=self._pack_collection.signature_index.data_access,
110
            max_delta_chain=0)
111
        self.texts = KnitVersionedFiles(
112
            _KnitGraphIndex(self._pack_collection.text_index.combined_index,
113
                add_callback=self._pack_collection.text_index.add_callback,
114
                deltas=True, parents=True, is_locked=self.is_locked),
115
            data_access=self._pack_collection.text_index.data_access,
116
            max_delta_chain=200)
117
        self.chk_bytes = None
118
        # True when the repository object is 'write locked' (as opposed to the
119
        # physical lock only taken out around changes to the pack-names list.)
120
        # Another way to represent this would be a decorator around the control
121
        # files object that presents logical locks as physical ones - if this
122
        # gets ugly consider that alternative design. RBC 20071011
123
        self._write_lock_count = 0
124
        self._transaction = None
125
        # for tests
126
        self._reconcile_does_inventory_gc = True
127
        self._reconcile_fixes_text_parents = True
128
        self._reconcile_backsup_inventory = False
129
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
130
    def _get_source(self, to_format):
131
        if to_format.network_name() == self._format.network_name():
132
            return KnitPackStreamSource(self, to_format)
133
        return PackRepository._get_source(self, to_format)
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
134
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
135
    def _reconcile_pack(self, collection, packs, extension, revs, pb):
136
        packer = KnitReconcilePacker(collection, packs, extension, revs)
137
        return packer.pack(pb)
138
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
139
140
class RepositoryFormatKnitPack1(RepositoryFormatPack):
141
    """A no-subtrees parameterized Pack repository.
142
143
    This format was introduced in 0.92.
144
    """
145
146
    repository_class = KnitPackRepository
147
    _commit_builder_class = PackCommitBuilder
148
    @property
149
    def _serializer(self):
150
        return xml5.serializer_v5
151
    # What index classes to use
152
    index_builder_class = InMemoryGraphIndex
153
    index_class = GraphIndex
154
155
    def _get_matching_bzrdir(self):
156
        return bzrdir.format_registry.make_bzrdir('pack-0.92')
157
158
    def _ignore_setting_bzrdir(self, format):
159
        pass
160
161
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
162
163
    def get_format_string(self):
164
        """See RepositoryFormat.get_format_string()."""
165
        return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
166
167
    def get_format_description(self):
168
        """See RepositoryFormat.get_format_description()."""
169
        return "Packs containing knits without subtree support"
170
171
172
class RepositoryFormatKnitPack3(RepositoryFormatPack):
173
    """A subtrees parameterized Pack repository.
174
175
    This repository format uses the xml7 serializer to get:
176
     - support for recording full info about the tree root
177
     - support for recording tree-references
178
179
    This format was introduced in 0.92.
180
    """
181
182
    repository_class = KnitPackRepository
183
    _commit_builder_class = PackRootCommitBuilder
184
    rich_root_data = True
185
    experimental = True
186
    supports_tree_reference = True
187
    @property
188
    def _serializer(self):
189
        return xml7.serializer_v7
190
    # What index classes to use
191
    index_builder_class = InMemoryGraphIndex
192
    index_class = GraphIndex
193
194
    def _get_matching_bzrdir(self):
195
        return bzrdir.format_registry.make_bzrdir(
196
            'pack-0.92-subtree')
197
198
    def _ignore_setting_bzrdir(self, format):
199
        pass
200
201
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
202
203
    def get_format_string(self):
204
        """See RepositoryFormat.get_format_string()."""
205
        return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
206
207
    def get_format_description(self):
208
        """See RepositoryFormat.get_format_description()."""
209
        return "Packs containing knits with subtree support\n"
210
211
212
class RepositoryFormatKnitPack4(RepositoryFormatPack):
213
    """A rich-root, no subtrees parameterized Pack repository.
214
215
    This repository format uses the xml6 serializer to get:
216
     - support for recording full info about the tree root
217
218
    This format was introduced in 1.0.
219
    """
220
221
    repository_class = KnitPackRepository
222
    _commit_builder_class = PackRootCommitBuilder
223
    rich_root_data = True
224
    supports_tree_reference = False
225
    @property
226
    def _serializer(self):
227
        return xml6.serializer_v6
228
    # What index classes to use
229
    index_builder_class = InMemoryGraphIndex
230
    index_class = GraphIndex
231
232
    def _get_matching_bzrdir(self):
233
        return bzrdir.format_registry.make_bzrdir(
234
            'rich-root-pack')
235
236
    def _ignore_setting_bzrdir(self, format):
237
        pass
238
239
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
240
241
    def get_format_string(self):
242
        """See RepositoryFormat.get_format_string()."""
243
        return ("Bazaar pack repository format 1 with rich root"
244
                " (needs bzr 1.0)\n")
245
246
    def get_format_description(self):
247
        """See RepositoryFormat.get_format_description()."""
248
        return "Packs containing knits with rich root support\n"
249
250
251
class RepositoryFormatKnitPack5(RepositoryFormatPack):
252
    """Repository that supports external references to allow stacking.
253
254
    New in release 1.6.
255
256
    Supports external lookups, which results in non-truncated ghosts after
257
    reconcile compared to pack-0.92 formats.
258
    """
259
260
    repository_class = KnitPackRepository
261
    _commit_builder_class = PackCommitBuilder
262
    supports_external_lookups = True
263
    # What index classes to use
264
    index_builder_class = InMemoryGraphIndex
265
    index_class = GraphIndex
266
267
    @property
268
    def _serializer(self):
269
        return xml5.serializer_v5
270
271
    def _get_matching_bzrdir(self):
272
        return bzrdir.format_registry.make_bzrdir('1.6')
273
274
    def _ignore_setting_bzrdir(self, format):
275
        pass
276
277
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
278
279
    def get_format_string(self):
280
        """See RepositoryFormat.get_format_string()."""
281
        return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
282
283
    def get_format_description(self):
284
        """See RepositoryFormat.get_format_description()."""
285
        return "Packs 5 (adds stacking support, requires bzr 1.6)"
286
287
288
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
289
    """A repository with rich roots and stacking.
290
291
    New in release 1.6.1.
292
293
    Supports stacking on other repositories, allowing data to be accessed
294
    without being stored locally.
295
    """
296
297
    repository_class = KnitPackRepository
298
    _commit_builder_class = PackRootCommitBuilder
299
    rich_root_data = True
300
    supports_tree_reference = False # no subtrees
301
    supports_external_lookups = True
302
    # What index classes to use
303
    index_builder_class = InMemoryGraphIndex
304
    index_class = GraphIndex
305
306
    @property
307
    def _serializer(self):
308
        return xml6.serializer_v6
309
310
    def _get_matching_bzrdir(self):
311
        return bzrdir.format_registry.make_bzrdir(
312
            '1.6.1-rich-root')
313
314
    def _ignore_setting_bzrdir(self, format):
315
        pass
316
317
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
318
319
    def get_format_string(self):
320
        """See RepositoryFormat.get_format_string()."""
321
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
322
323
    def get_format_description(self):
324
        return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
325
326
327
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
328
    """A repository with rich roots and external references.
329
330
    New in release 1.6.
331
332
    Supports external lookups, which results in non-truncated ghosts after
333
    reconcile compared to pack-0.92 formats.
334
335
    This format was deprecated because the serializer it uses accidentally
336
    supported subtrees, when the format was not intended to. This meant that
337
    someone could accidentally fetch from an incorrect repository.
338
    """
339
340
    repository_class = KnitPackRepository
341
    _commit_builder_class = PackRootCommitBuilder
342
    rich_root_data = True
343
    supports_tree_reference = False # no subtrees
344
345
    supports_external_lookups = True
346
    # What index classes to use
347
    index_builder_class = InMemoryGraphIndex
348
    index_class = GraphIndex
349
350
    @property
351
    def _serializer(self):
352
        return xml7.serializer_v7
353
354
    def _get_matching_bzrdir(self):
355
        matching = bzrdir.format_registry.make_bzrdir(
356
            '1.6.1-rich-root')
357
        matching.repository_format = self
358
        return matching
359
360
    def _ignore_setting_bzrdir(self, format):
361
        pass
362
363
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
364
365
    def get_format_string(self):
366
        """See RepositoryFormat.get_format_string()."""
367
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
368
369
    def get_format_description(self):
370
        return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
371
                " (deprecated)")
372
373
    def is_deprecated(self):
374
        return True
375
376
377
class RepositoryFormatKnitPack6(RepositoryFormatPack):
378
    """A repository with stacking and btree indexes,
379
    without rich roots or subtrees.
380
381
    This is equivalent to pack-1.6 with B+Tree indices.
382
    """
383
384
    repository_class = KnitPackRepository
385
    _commit_builder_class = PackCommitBuilder
386
    supports_external_lookups = True
387
    # What index classes to use
388
    index_builder_class = btree_index.BTreeBuilder
389
    index_class = btree_index.BTreeGraphIndex
390
391
    @property
392
    def _serializer(self):
393
        return xml5.serializer_v5
394
395
    def _get_matching_bzrdir(self):
396
        return bzrdir.format_registry.make_bzrdir('1.9')
397
398
    def _ignore_setting_bzrdir(self, format):
399
        pass
400
401
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
402
403
    def get_format_string(self):
404
        """See RepositoryFormat.get_format_string()."""
405
        return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
406
407
    def get_format_description(self):
408
        """See RepositoryFormat.get_format_description()."""
409
        return "Packs 6 (uses btree indexes, requires bzr 1.9)"
410
411
412
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
413
    """A repository with rich roots, no subtrees, stacking and btree indexes.
414
415
    1.6-rich-root with B+Tree indices.
416
    """
417
418
    repository_class = KnitPackRepository
419
    _commit_builder_class = PackRootCommitBuilder
420
    rich_root_data = True
421
    supports_tree_reference = False # no subtrees
422
    supports_external_lookups = True
423
    # What index classes to use
424
    index_builder_class = btree_index.BTreeBuilder
425
    index_class = btree_index.BTreeGraphIndex
426
427
    @property
428
    def _serializer(self):
429
        return xml6.serializer_v6
430
431
    def _get_matching_bzrdir(self):
432
        return bzrdir.format_registry.make_bzrdir(
433
            '1.9-rich-root')
434
435
    def _ignore_setting_bzrdir(self, format):
436
        pass
437
438
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
439
440
    def get_format_string(self):
441
        """See RepositoryFormat.get_format_string()."""
442
        return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
443
444
    def get_format_description(self):
445
        return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
446
447
448
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
449
    """A subtrees development repository.
450
451
    This format should be retained in 2.3, to provide an upgrade path from this
452
    to RepositoryFormat2aSubtree.  It can be removed in later releases.
453
454
    1.6.1-subtree[as it might have been] with B+Tree indices.
455
    """
456
457
    repository_class = KnitPackRepository
458
    _commit_builder_class = PackRootCommitBuilder
459
    rich_root_data = True
460
    experimental = True
461
    supports_tree_reference = True
462
    supports_external_lookups = True
463
    # What index classes to use
464
    index_builder_class = btree_index.BTreeBuilder
465
    index_class = btree_index.BTreeGraphIndex
466
467
    @property
468
    def _serializer(self):
469
        return xml7.serializer_v7
470
471
    def _get_matching_bzrdir(self):
472
        return bzrdir.format_registry.make_bzrdir(
473
            'development5-subtree')
474
475
    def _ignore_setting_bzrdir(self, format):
476
        pass
477
478
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
479
480
    def get_format_string(self):
481
        """See RepositoryFormat.get_format_string()."""
482
        return ("Bazaar development format 2 with subtree support "
483
            "(needs bzr.dev from before 1.8)\n")
484
485
    def get_format_description(self):
486
        """See RepositoryFormat.get_format_description()."""
487
        return ("Development repository format, currently the same as "
488
            "1.6.1-subtree with B+Tree indices.\n")
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
489
490
491
class KnitPackStreamSource(StreamSource):
492
    """A StreamSource used to transfer data between same-format KnitPack repos.
493
494
    This source assumes:
495
        1) Same serialization format for all objects
496
        2) Same root information
497
        3) XML format inventories
498
        4) Atomic inserts (so we can stream inventory texts before text
499
           content)
500
        5) No chk_bytes
501
    """
502
503
    def __init__(self, from_repository, to_format):
504
        super(KnitPackStreamSource, self).__init__(from_repository, to_format)
505
        self._text_keys = None
506
        self._text_fetch_order = 'unordered'
507
508
    def _get_filtered_inv_stream(self, revision_ids):
509
        from_repo = self.from_repository
510
        parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
511
        parent_keys = [(p,) for p in parent_ids]
512
        find_text_keys = from_repo._serializer._find_text_key_references
513
        parent_text_keys = set(find_text_keys(
514
            from_repo._inventory_xml_lines_for_keys(parent_keys)))
515
        content_text_keys = set()
516
        knit = KnitVersionedFiles(None, None)
517
        factory = KnitPlainFactory()
518
        def find_text_keys_from_content(record):
519
            if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
520
                raise ValueError("Unknown content storage kind for"
521
                    " inventory text: %s" % (record.storage_kind,))
522
            # It's a knit record, it has a _raw_record field (even if it was
523
            # reconstituted from a network stream).
524
            raw_data = record._raw_record
525
            # read the entire thing
526
            revision_id = record.key[-1]
527
            content, _ = knit._parse_record(revision_id, raw_data)
528
            if record.storage_kind == 'knit-delta-gz':
529
                line_iterator = factory.get_linedelta_content(content)
530
            elif record.storage_kind == 'knit-ft-gz':
531
                line_iterator = factory.get_fulltext_content(content)
532
            content_text_keys.update(find_text_keys(
533
                [(line, revision_id) for line in line_iterator]))
534
        revision_keys = [(r,) for r in revision_ids]
535
        def _filtered_inv_stream():
536
            source_vf = from_repo.inventories
537
            stream = source_vf.get_record_stream(revision_keys,
538
                                                 'unordered', False)
539
            for record in stream:
540
                if record.storage_kind == 'absent':
541
                    raise errors.NoSuchRevision(from_repo, record.key)
542
                find_text_keys_from_content(record)
543
                yield record
544
            self._text_keys = content_text_keys - parent_text_keys
545
        return ('inventories', _filtered_inv_stream())
546
547
    def _get_text_stream(self):
548
        # Note: We know we don't have to handle adding root keys, because both
549
        # the source and target are the identical network name.
550
        text_stream = self.from_repository.texts.get_record_stream(
551
                        self._text_keys, self._text_fetch_order, False)
552
        return ('texts', text_stream)
553
554
    def get_stream(self, search):
555
        revision_ids = search.get_keys()
556
        for stream_info in self._fetch_revision_texts(revision_ids):
557
            yield stream_info
558
        self._revision_keys = [(rev_id,) for rev_id in revision_ids]
559
        yield self._get_filtered_inv_stream(revision_ids)
560
        yield self._get_text_stream()
561
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
562
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
563
class KnitPacker(Packer):
564
    """Packer that works with knit packs."""
565
566
    def __init__(self, pack_collection, packs, suffix, revision_ids=None,
567
                 reload_func=None):
568
        super(KnitPacker, self).__init__(pack_collection, packs, suffix,
569
                                          revision_ids=revision_ids,
570
                                          reload_func=reload_func)
571
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
572
    def _pack_map_and_index_list(self, index_attribute):
573
        """Convert a list of packs to an index pack map and index list.
574
575
        :param index_attribute: The attribute that the desired index is found
576
            on.
577
        :return: A tuple (map, list) where map contains the dict from
578
            index:pack_tuple, and list contains the indices in the preferred
579
            access order.
580
        """
581
        indices = []
582
        pack_map = {}
583
        for pack_obj in self.packs:
584
            index = getattr(pack_obj, index_attribute)
585
            indices.append(index)
586
            pack_map[index] = pack_obj
587
        return pack_map, indices
588
589
    def _index_contents(self, indices, key_filter=None):
590
        """Get an iterable of the index contents from a pack_map.
591
592
        :param indices: The list of indices to query
593
        :param key_filter: An optional filter to limit the keys returned.
594
        """
595
        all_index = CombinedGraphIndex(indices)
596
        if key_filter is None:
597
            return all_index.iter_all_entries()
598
        else:
599
            return all_index.iter_entries(key_filter)
600
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
601
    def _copy_nodes(self, nodes, index_map, writer, write_index,
602
        output_lines=None):
603
        """Copy knit nodes between packs with no graph references.
604
605
        :param output_lines: Output full texts of copied items.
606
        """
607
        pb = ui.ui_factory.nested_progress_bar()
608
        try:
609
            return self._do_copy_nodes(nodes, index_map, writer,
610
                write_index, pb, output_lines=output_lines)
611
        finally:
612
            pb.finished()
613
614
    def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
615
        output_lines=None):
616
        # for record verification
617
        knit = KnitVersionedFiles(None, None)
618
        # plan a readv on each source pack:
619
        # group by pack
620
        nodes = sorted(nodes)
621
        # how to map this into knit.py - or knit.py into this?
622
        # we don't want the typical knit logic, we want grouping by pack
623
        # at this point - perhaps a helper library for the following code
624
        # duplication points?
625
        request_groups = {}
626
        for index, key, value in nodes:
627
            if index not in request_groups:
628
                request_groups[index] = []
629
            request_groups[index].append((key, value))
630
        record_index = 0
631
        pb.update("Copied record", record_index, len(nodes))
632
        for index, items in request_groups.iteritems():
633
            pack_readv_requests = []
634
            for key, value in items:
635
                # ---- KnitGraphIndex.get_position
636
                bits = value[1:].split(' ')
637
                offset, length = int(bits[0]), int(bits[1])
638
                pack_readv_requests.append((offset, length, (key, value[0])))
639
            # linear scan up the pack
640
            pack_readv_requests.sort()
641
            # copy the data
642
            pack_obj = index_map[index]
643
            transport, path = pack_obj.access_tuple()
644
            try:
645
                reader = pack.make_readv_reader(transport, path,
646
                    [offset[0:2] for offset in pack_readv_requests])
647
            except errors.NoSuchFile:
648
                if self._reload_func is not None:
649
                    self._reload_func()
650
                raise
651
            for (names, read_func), (_1, _2, (key, eol_flag)) in \
652
                izip(reader.iter_records(), pack_readv_requests):
653
                raw_data = read_func(None)
654
                # check the header only
655
                if output_lines is not None:
656
                    output_lines(knit._parse_record(key[-1], raw_data)[0])
657
                else:
658
                    df, _ = knit._parse_record_header(key, raw_data)
659
                    df.close()
660
                pos, size = writer.add_bytes_record(raw_data, names)
661
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
662
                pb.update("Copied record", record_index)
663
                record_index += 1
664
665
    def _copy_nodes_graph(self, index_map, writer, write_index,
666
        readv_group_iter, total_items, output_lines=False):
667
        """Copy knit nodes between packs.
668
669
        :param output_lines: Return lines present in the copied data as
670
            an iterator of line,version_id.
671
        """
672
        pb = ui.ui_factory.nested_progress_bar()
673
        try:
674
            for result in self._do_copy_nodes_graph(index_map, writer,
675
                write_index, output_lines, pb, readv_group_iter, total_items):
676
                yield result
677
        except Exception:
678
            # Python 2.4 does not permit try:finally: in a generator.
679
            pb.finished()
680
            raise
681
        else:
682
            pb.finished()
683
684
    def _do_copy_nodes_graph(self, index_map, writer, write_index,
685
        output_lines, pb, readv_group_iter, total_items):
686
        # for record verification
687
        knit = KnitVersionedFiles(None, None)
688
        # for line extraction when requested (inventories only)
689
        if output_lines:
690
            factory = KnitPlainFactory()
691
        record_index = 0
692
        pb.update("Copied record", record_index, total_items)
693
        for index, readv_vector, node_vector in readv_group_iter:
694
            # copy the data
695
            pack_obj = index_map[index]
696
            transport, path = pack_obj.access_tuple()
697
            try:
698
                reader = pack.make_readv_reader(transport, path, readv_vector)
699
            except errors.NoSuchFile:
700
                if self._reload_func is not None:
701
                    self._reload_func()
702
                raise
703
            for (names, read_func), (key, eol_flag, references) in \
704
                izip(reader.iter_records(), node_vector):
705
                raw_data = read_func(None)
706
                if output_lines:
707
                    # read the entire thing
708
                    content, _ = knit._parse_record(key[-1], raw_data)
709
                    if len(references[-1]) == 0:
710
                        line_iterator = factory.get_fulltext_content(content)
711
                    else:
712
                        line_iterator = factory.get_linedelta_content(content)
713
                    for line in line_iterator:
714
                        yield line, key
715
                else:
716
                    # check the header only
717
                    df, _ = knit._parse_record_header(key, raw_data)
718
                    df.close()
719
                pos, size = writer.add_bytes_record(raw_data, names)
720
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
721
                pb.update("Copied record", record_index)
722
                record_index += 1
723
724
    def _process_inventory_lines(self, inv_lines):
725
        """Use up the inv_lines generator and setup a text key filter."""
726
        repo = self._pack_collection.repo
727
        fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
728
            inv_lines, self.revision_keys)
729
        text_filter = []
730
        for fileid, file_revids in fileid_revisions.iteritems():
731
            text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
732
        self._text_filter = text_filter
733
734
    def _copy_inventory_texts(self):
735
        # select inventory keys
736
        inv_keys = self._revision_keys # currently the same keyspace, and note that
737
        # querying for keys here could introduce a bug where an inventory item
738
        # is missed, so do not change it to query separately without cross
739
        # checking like the text key check below.
740
        inventory_index_map, inventory_indices = self._pack_map_and_index_list(
741
            'inventory_index')
742
        inv_nodes = self._index_contents(inventory_indices, inv_keys)
743
        # copy inventory keys and adjust values
744
        # XXX: Should be a helper function to allow different inv representation
745
        # at this point.
746
        self.pb.update("Copying inventory texts", 2)
747
        total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
748
        # Only grab the output lines if we will be processing them
749
        output_lines = bool(self.revision_ids)
750
        inv_lines = self._copy_nodes_graph(inventory_index_map,
751
            self.new_pack._writer, self.new_pack.inventory_index,
752
            readv_group_iter, total_items, output_lines=output_lines)
753
        if self.revision_ids:
754
            self._process_inventory_lines(inv_lines)
755
        else:
756
            # eat the iterator to cause it to execute.
757
            list(inv_lines)
758
            self._text_filter = None
759
        if 'pack' in debug.debug_flags:
760
            trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
761
                time.ctime(), self._pack_collection._upload_transport.base,
762
                self.new_pack.random_name,
763
                self.new_pack.inventory_index.key_count(),
764
                time.time() - self.new_pack.start_time)
765
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
766
    def _update_pack_order(self, entries, index_to_pack_map):
767
        """Determine how we want our packs to be ordered.
768
769
        This changes the sort order of the self.packs list so that packs unused
770
        by 'entries' will be at the end of the list, so that future requests
771
        can avoid probing them.  Used packs will be at the front of the
772
        self.packs list, in the order of their first use in 'entries'.
773
774
        :param entries: A list of (index, ...) tuples
775
        :param index_to_pack_map: A mapping from index objects to pack objects.
776
        """
777
        packs = []
778
        seen_indexes = set()
779
        for entry in entries:
780
            index = entry[0]
781
            if index not in seen_indexes:
782
                packs.append(index_to_pack_map[index])
783
                seen_indexes.add(index)
784
        if len(packs) == len(self.packs):
785
            if 'pack' in debug.debug_flags:
786
                trace.mutter('Not changing pack list, all packs used.')
787
            return
788
        seen_packs = set(packs)
789
        for pack in self.packs:
790
            if pack not in seen_packs:
791
                packs.append(pack)
792
                seen_packs.add(pack)
793
        if 'pack' in debug.debug_flags:
794
            old_names = [p.access_tuple()[1] for p in self.packs]
795
            new_names = [p.access_tuple()[1] for p in packs]
796
            trace.mutter('Reordering packs\nfrom: %s\n  to: %s',
797
                   old_names, new_names)
798
        self.packs = packs
799
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
800
    def _copy_revision_texts(self):
801
        # select revisions
802
        if self.revision_ids:
803
            revision_keys = [(revision_id,) for revision_id in self.revision_ids]
804
        else:
805
            revision_keys = None
806
        # select revision keys
807
        revision_index_map, revision_indices = self._pack_map_and_index_list(
808
            'revision_index')
809
        revision_nodes = self._index_contents(revision_indices, revision_keys)
810
        revision_nodes = list(revision_nodes)
811
        self._update_pack_order(revision_nodes, revision_index_map)
812
        # copy revision keys and adjust values
813
        self.pb.update("Copying revision texts", 1)
814
        total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
815
        list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
816
            self.new_pack.revision_index, readv_group_iter, total_items))
817
        if 'pack' in debug.debug_flags:
818
            trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
819
                time.ctime(), self._pack_collection._upload_transport.base,
820
                self.new_pack.random_name,
821
                self.new_pack.revision_index.key_count(),
822
                time.time() - self.new_pack.start_time)
823
        self._revision_keys = revision_keys
824
825
    def _get_text_nodes(self):
826
        text_index_map, text_indices = self._pack_map_and_index_list(
827
            'text_index')
828
        return text_index_map, self._index_contents(text_indices,
829
            self._text_filter)
830
831
    def _copy_text_texts(self):
832
        # select text keys
833
        text_index_map, text_nodes = self._get_text_nodes()
834
        if self._text_filter is not None:
835
            # We could return the keys copied as part of the return value from
836
            # _copy_nodes_graph but this doesn't work all that well with the
837
            # need to get line output too, so we check separately, and as we're
838
            # going to buffer everything anyway, we check beforehand, which
839
            # saves reading knit data over the wire when we know there are
840
            # mising records.
841
            text_nodes = set(text_nodes)
842
            present_text_keys = set(_node[1] for _node in text_nodes)
843
            missing_text_keys = set(self._text_filter) - present_text_keys
844
            if missing_text_keys:
845
                # TODO: raise a specific error that can handle many missing
846
                # keys.
847
                trace.mutter("missing keys during fetch: %r", missing_text_keys)
848
                a_missing_key = missing_text_keys.pop()
849
                raise errors.RevisionNotPresent(a_missing_key[1],
850
                    a_missing_key[0])
851
        # copy text keys and adjust values
852
        self.pb.update("Copying content texts", 3)
853
        total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
854
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
855
            self.new_pack.text_index, readv_group_iter, total_items))
856
        self._log_copied_texts()
857
858
    def _create_pack_from_packs(self):
859
        self.pb.update("Opening pack", 0, 5)
860
        self.new_pack = self.open_pack()
861
        new_pack = self.new_pack
862
        # buffer data - we won't be reading-back during the pack creation and
863
        # this makes a significant difference on sftp pushes.
864
        new_pack.set_write_cache_size(1024*1024)
865
        if 'pack' in debug.debug_flags:
866
            plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
867
                for a_pack in self.packs]
868
            if self.revision_ids is not None:
869
                rev_count = len(self.revision_ids)
870
            else:
871
                rev_count = 'all'
872
            trace.mutter('%s: create_pack: creating pack from source packs: '
873
                '%s%s %s revisions wanted %s t=0',
874
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
875
                plain_pack_list, rev_count)
876
        self._copy_revision_texts()
877
        self._copy_inventory_texts()
878
        self._copy_text_texts()
879
        # select signature keys
880
        signature_filter = self._revision_keys # same keyspace
881
        signature_index_map, signature_indices = self._pack_map_and_index_list(
882
            'signature_index')
883
        signature_nodes = self._index_contents(signature_indices,
884
            signature_filter)
885
        # copy signature keys and adjust values
886
        self.pb.update("Copying signature texts", 4)
887
        self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
888
            new_pack.signature_index)
889
        if 'pack' in debug.debug_flags:
890
            trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
891
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
892
                new_pack.signature_index.key_count(),
893
                time.time() - new_pack.start_time)
894
        new_pack._check_references()
895
        if not self._use_pack(new_pack):
896
            new_pack.abort()
897
            return None
898
        self.pb.update("Finishing pack", 5)
899
        new_pack.finish()
900
        self._pack_collection.allocate(new_pack)
901
        return new_pack
902
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
903
    def _least_readv_node_readv(self, nodes):
904
        """Generate request groups for nodes using the least readv's.
905
906
        :param nodes: An iterable of graph index nodes.
907
        :return: Total node count and an iterator of the data needed to perform
908
            readvs to obtain the data for nodes. Each item yielded by the
909
            iterator is a tuple with:
910
            index, readv_vector, node_vector. readv_vector is a list ready to
911
            hand to the transport readv method, and node_vector is a list of
912
            (key, eol_flag, references) for the node retrieved by the
913
            matching readv_vector.
914
        """
915
        # group by pack so we do one readv per pack
916
        nodes = sorted(nodes)
917
        total = len(nodes)
918
        request_groups = {}
919
        for index, key, value, references in nodes:
920
            if index not in request_groups:
921
                request_groups[index] = []
922
            request_groups[index].append((key, value, references))
923
        result = []
924
        for index, items in request_groups.iteritems():
925
            pack_readv_requests = []
926
            for key, value, references in items:
927
                # ---- KnitGraphIndex.get_position
928
                bits = value[1:].split(' ')
929
                offset, length = int(bits[0]), int(bits[1])
930
                pack_readv_requests.append(
931
                    ((offset, length), (key, value[0], references)))
932
            # linear scan up the pack to maximum range combining.
933
            pack_readv_requests.sort()
934
            # split out the readv and the node data.
935
            pack_readv = [readv for readv, node in pack_readv_requests]
936
            node_vector = [node for readv, node in pack_readv_requests]
937
            result.append((index, pack_readv, node_vector))
938
        return total, result
939
940
    def _revision_node_readv(self, revision_nodes):
941
        """Return the total revisions and the readv's to issue.
942
943
        :param revision_nodes: The revision index contents for the packs being
944
            incorporated into the new pack.
945
        :return: As per _least_readv_node_readv.
946
        """
947
        return self._least_readv_node_readv(revision_nodes)
948
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
949
950
class KnitReconcilePacker(KnitPacker):
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
951
    """A packer which regenerates indices etc as it copies.
952
953
    This is used by ``bzr reconcile`` to cause parent text pointers to be
954
    regenerated.
955
    """
956
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
957
    def __init__(self, *args, **kwargs):
958
        super(KnitReconcilePacker, self).__init__(*args, **kwargs)
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
959
        self._data_changed = False
960
961
    def _process_inventory_lines(self, inv_lines):
962
        """Generate a text key reference map rather for reconciling with."""
963
        repo = self._pack_collection.repo
964
        refs = repo._serializer._find_text_key_references(inv_lines)
965
        self._text_refs = refs
966
        # during reconcile we:
967
        #  - convert unreferenced texts to full texts
968
        #  - correct texts which reference a text not copied to be full texts
969
        #  - copy all others as-is but with corrected parents.
970
        #  - so at this point we don't know enough to decide what becomes a full
971
        #    text.
972
        self._text_filter = None
973
974
    def _copy_text_texts(self):
975
        """generate what texts we should have and then copy."""
976
        self.pb.update("Copying content texts", 3)
977
        # we have three major tasks here:
978
        # 1) generate the ideal index
979
        repo = self._pack_collection.repo
980
        ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
981
            _1, key, _2, refs in
982
            self.new_pack.revision_index.iter_all_entries()])
983
        ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
984
        # 2) generate a text_nodes list that contains all the deltas that can
985
        #    be used as-is, with corrected parents.
986
        ok_nodes = []
987
        bad_texts = []
988
        discarded_nodes = []
989
        NULL_REVISION = _mod_revision.NULL_REVISION
990
        text_index_map, text_nodes = self._get_text_nodes()
991
        for node in text_nodes:
992
            # 0 - index
993
            # 1 - key
994
            # 2 - value
995
            # 3 - refs
996
            try:
997
                ideal_parents = tuple(ideal_index[node[1]])
998
            except KeyError:
999
                discarded_nodes.append(node)
1000
                self._data_changed = True
1001
            else:
1002
                if ideal_parents == (NULL_REVISION,):
1003
                    ideal_parents = ()
1004
                if ideal_parents == node[3][0]:
1005
                    # no change needed.
1006
                    ok_nodes.append(node)
1007
                elif ideal_parents[0:1] == node[3][0][0:1]:
1008
                    # the left most parent is the same, or there are no parents
1009
                    # today. Either way, we can preserve the representation as
1010
                    # long as we change the refs to be inserted.
1011
                    self._data_changed = True
1012
                    ok_nodes.append((node[0], node[1], node[2],
1013
                        (ideal_parents, node[3][1])))
1014
                    self._data_changed = True
1015
                else:
1016
                    # Reinsert this text completely
1017
                    bad_texts.append((node[1], ideal_parents))
1018
                    self._data_changed = True
1019
        # we're finished with some data.
1020
        del ideal_index
1021
        del text_nodes
1022
        # 3) bulk copy the ok data
1023
        total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1024
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1025
            self.new_pack.text_index, readv_group_iter, total_items))
1026
        # 4) adhoc copy all the other texts.
1027
        # We have to topologically insert all texts otherwise we can fail to
1028
        # reconcile when parts of a single delta chain are preserved intact,
1029
        # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1030
        # reinserted, and if d3 has incorrect parents it will also be
1031
        # reinserted. If we insert d3 first, d2 is present (as it was bulk
1032
        # copied), so we will try to delta, but d2 is not currently able to be
1033
        # extracted because its basis d1 is not present. Topologically sorting
1034
        # addresses this. The following generates a sort for all the texts that
1035
        # are being inserted without having to reference the entire text key
1036
        # space (we only topo sort the revisions, which is smaller).
1037
        topo_order = tsort.topo_sort(ancestors)
1038
        rev_order = dict(zip(topo_order, range(len(topo_order))))
1039
        bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1040
        transaction = repo.get_transaction()
1041
        file_id_index = GraphIndexPrefixAdapter(
1042
            self.new_pack.text_index,
1043
            ('blank', ), 1,
1044
            add_nodes_callback=self.new_pack.text_index.add_nodes)
5757.8.7 by Jelmer Vernooij
Merge moving of _DirectPackAccess.
1045
        data_access = _DirectPackAccess(
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
1046
                {self.new_pack.text_index:self.new_pack.access_tuple()})
1047
        data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1048
            self.new_pack.access_tuple())
1049
        output_texts = KnitVersionedFiles(
1050
            _KnitGraphIndex(self.new_pack.text_index,
1051
                add_callback=self.new_pack.text_index.add_nodes,
1052
                deltas=True, parents=True, is_locked=repo.is_locked),
1053
            data_access=data_access, max_delta_chain=200)
1054
        for key, parent_keys in bad_texts:
1055
            # We refer to the new pack to delta data being output.
1056
            # A possible improvement would be to catch errors on short reads
1057
            # and only flush then.
1058
            self.new_pack.flush()
1059
            parents = []
1060
            for parent_key in parent_keys:
1061
                if parent_key[0] != key[0]:
1062
                    # Graph parents must match the fileid
1063
                    raise errors.BzrError('Mismatched key parent %r:%r' %
1064
                        (key, parent_keys))
1065
                parents.append(parent_key[1])
1066
            text_lines = osutils.split_lines(repo.texts.get_record_stream(
1067
                [key], 'unordered', True).next().get_bytes_as('fulltext'))
1068
            output_texts.add_lines(key, parent_keys, text_lines,
1069
                random_id=True, check_content=False)
1070
        # 5) check that nothing inserted has a reference outside the keyspace.
1071
        missing_text_keys = self.new_pack.text_index._external_references()
1072
        if missing_text_keys:
1073
            raise errors.BzrCheckError('Reference to missing compression parents %r'
1074
                % (missing_text_keys,))
1075
        self._log_copied_texts()
1076
1077
    def _use_pack(self, new_pack):
1078
        """Override _use_pack to check for reconcile having changed content."""
1079
        # XXX: we might be better checking this at the copy time.
1080
        original_inventory_keys = set()
1081
        inv_index = self._pack_collection.inventory_index.combined_index
1082
        for entry in inv_index.iter_all_entries():
1083
            original_inventory_keys.add(entry[1])
1084
        new_inventory_keys = set()
1085
        for entry in new_pack.inventory_index.iter_all_entries():
1086
            new_inventory_keys.add(entry[1])
1087
        if new_inventory_keys != original_inventory_keys:
1088
            self._data_changed = True
1089
        return new_pack.data_inserted() and self._data_changed
1090
1091
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
1092
class OptimisingKnitPacker(KnitPacker):
1093
    """A packer which spends more time to create better disk layouts."""
1094
1095
    def _revision_node_readv(self, revision_nodes):
1096
        """Return the total revisions and the readv's to issue.
1097
1098
        This sort places revisions in topological order with the ancestors
1099
        after the children.
1100
1101
        :param revision_nodes: The revision index contents for the packs being
1102
            incorporated into the new pack.
1103
        :return: As per _least_readv_node_readv.
1104
        """
1105
        # build an ancestors dict
1106
        ancestors = {}
1107
        by_key = {}
1108
        for index, key, value, references in revision_nodes:
1109
            ancestors[key] = references[0]
1110
            by_key[key] = (index, value, references)
1111
        order = tsort.topo_sort(ancestors)
1112
        total = len(order)
1113
        # Single IO is pathological, but it will work as a starting point.
1114
        requests = []
1115
        for key in reversed(order):
1116
            index, value, references = by_key[key]
1117
            # ---- KnitGraphIndex.get_position
1118
            bits = value[1:].split(' ')
1119
            offset, length = int(bits[0]), int(bits[1])
1120
            requests.append(
1121
                (index, [(offset, length)], [(key, value[0], references)]))
1122
        # TODO: combine requests in the same index that are in ascending order.
1123
        return total, requests
1124
1125
    def open_pack(self):
1126
        """Open a pack for the pack we are creating."""
1127
        new_pack = super(OptimisingKnitPacker, self).open_pack()
1128
        # Turn on the optimization flags for all the index builders.
1129
        new_pack.revision_index.set_optimize(for_size=True)
1130
        new_pack.inventory_index.set_optimize(for_size=True)
1131
        new_pack.text_index.set_optimize(for_size=True)
1132
        new_pack.signature_index.set_optimize(for_size=True)
1133
        return new_pack
1134
1135
1136
class KnitRepositoryPackCollection(RepositoryPackCollection):
1137
    """A knit pack collection."""
1138
1139
    pack_factory = NewPack
1140
    resumed_pack_factory = ResumedPack
1141
    normal_packer_class = KnitPacker
1142
    optimising_packer_class = OptimisingKnitPacker
1143
1144
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
1145