~bzr-pqm/bzr/bzr.dev

5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
1
# Copyright (C) 2007-2011 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Knit-based pack repository formats."""
18
6379.6.1 by Jelmer Vernooij
Import absolute_import in a few places.
19
from __future__ import absolute_import
20
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
21
from bzrlib.lazy_import import lazy_import
22
lazy_import(globals(), """
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
23
from itertools import izip
24
import time
25
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
26
from bzrlib import (
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
27
    controldir,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
28
    debug,
29
    errors,
5757.3.4 by Jelmer Vernooij
Fix some imports.
30
    knit,
31
    osutils,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
32
    pack,
5757.3.2 by Jelmer Vernooij
Fix import.
33
    revision as _mod_revision,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
34
    trace,
5757.3.4 by Jelmer Vernooij
Fix some imports.
35
    tsort,
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
36
    ui,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
37
    xml5,
38
    xml6,
39
    xml7,
40
    )
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
41
from bzrlib.knit import (
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
42
    _KnitGraphIndex,
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
43
    KnitPlainFactory,
44
    KnitVersionedFiles,
45
    )
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
46
""")
47
48
from bzrlib import (
49
    btree_index,
50
    )
51
from bzrlib.index import (
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
52
    CombinedGraphIndex,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
53
    GraphIndex,
5757.3.4 by Jelmer Vernooij
Fix some imports.
54
    GraphIndexPrefixAdapter,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
55
    InMemoryGraphIndex,
56
    )
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
57
from bzrlib.repofmt.knitrepo import (
58
    KnitRepository,
59
    )
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
60
from bzrlib.repofmt.pack_repo import (
5757.8.7 by Jelmer Vernooij
Merge moving of _DirectPackAccess.
61
    _DirectPackAccess,
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
62
    NewPack,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
63
    RepositoryFormatPack,
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
64
    ResumedPack,
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
65
    Packer,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
66
    PackCommitBuilder,
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
67
    PackRepository,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
68
    PackRootCommitBuilder,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
69
    RepositoryPackCollection,
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
70
    )
5815.4.1 by Jelmer Vernooij
Split versionedfile-specific stuff out into VersionedFileRepository.
71
from bzrlib.vf_repository import (
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
72
    StreamSource,
73
    )
74
75
5757.6.11 by Jelmer Vernooij
Fix space issue.
76
class KnitPackRepository(PackRepository, KnitRepository):
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
77
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
78
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
79
        _serializer):
5757.6.1 by Jelmer Vernooij
Don't make PackRepository derive from KnitRepository.
80
        PackRepository.__init__(self, _format, a_bzrdir, control_files,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
81
            _commit_builder_class, _serializer)
82
        if self._format.supports_chks:
83
            raise AssertionError("chk not supported")
84
        index_transport = self._transport.clone('indices')
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
85
        self._pack_collection = KnitRepositoryPackCollection(self,
86
            self._transport,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
87
            index_transport,
88
            self._transport.clone('upload'),
89
            self._transport.clone('packs'),
90
            _format.index_builder_class,
91
            _format.index_class,
5757.7.7 by Jelmer Vernooij
Drop chk support in knitpackrepo - was unused anyway, as we explicitly checked that the format didn't support chks.
92
            use_chk_index=False,
5757.4.1 by Jelmer Vernooij
Move knit-specific constructor to repofmt.knitpack_repo.
93
            )
94
        self.inventories = KnitVersionedFiles(
95
            _KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
96
                add_callback=self._pack_collection.inventory_index.add_callback,
97
                deltas=True, parents=True, is_locked=self.is_locked),
98
            data_access=self._pack_collection.inventory_index.data_access,
99
            max_delta_chain=200)
100
        self.revisions = KnitVersionedFiles(
101
            _KnitGraphIndex(self._pack_collection.revision_index.combined_index,
102
                add_callback=self._pack_collection.revision_index.add_callback,
103
                deltas=False, parents=True, is_locked=self.is_locked,
104
                track_external_parent_refs=True),
105
            data_access=self._pack_collection.revision_index.data_access,
106
            max_delta_chain=0)
107
        self.signatures = KnitVersionedFiles(
108
            _KnitGraphIndex(self._pack_collection.signature_index.combined_index,
109
                add_callback=self._pack_collection.signature_index.add_callback,
110
                deltas=False, parents=False, is_locked=self.is_locked),
111
            data_access=self._pack_collection.signature_index.data_access,
112
            max_delta_chain=0)
113
        self.texts = KnitVersionedFiles(
114
            _KnitGraphIndex(self._pack_collection.text_index.combined_index,
115
                add_callback=self._pack_collection.text_index.add_callback,
116
                deltas=True, parents=True, is_locked=self.is_locked),
117
            data_access=self._pack_collection.text_index.data_access,
118
            max_delta_chain=200)
119
        self.chk_bytes = None
120
        # True when the repository object is 'write locked' (as opposed to the
121
        # physical lock only taken out around changes to the pack-names list.)
122
        # Another way to represent this would be a decorator around the control
123
        # files object that presents logical locks as physical ones - if this
124
        # gets ugly consider that alternative design. RBC 20071011
125
        self._write_lock_count = 0
126
        self._transaction = None
127
        # for tests
128
        self._reconcile_does_inventory_gc = True
129
        self._reconcile_fixes_text_parents = True
130
        self._reconcile_backsup_inventory = False
131
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
132
    def _get_source(self, to_format):
133
        if to_format.network_name() == self._format.network_name():
134
            return KnitPackStreamSource(self, to_format)
135
        return PackRepository._get_source(self, to_format)
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
136
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
137
    def _reconcile_pack(self, collection, packs, extension, revs, pb):
138
        packer = KnitReconcilePacker(collection, packs, extension, revs)
139
        return packer.pack(pb)
140
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
141
142
class RepositoryFormatKnitPack1(RepositoryFormatPack):
143
    """A no-subtrees parameterized Pack repository.
144
145
    This format was introduced in 0.92.
146
    """
147
148
    repository_class = KnitPackRepository
149
    _commit_builder_class = PackCommitBuilder
150
    @property
151
    def _serializer(self):
152
        return xml5.serializer_v5
153
    # What index classes to use
154
    index_builder_class = InMemoryGraphIndex
155
    index_class = GraphIndex
156
157
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
158
        return controldir.format_registry.make_bzrdir('pack-0.92')
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
159
160
    def _ignore_setting_bzrdir(self, format):
161
        pass
162
163
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
164
6213.1.16 by Jelmer Vernooij
Feature support in repository.
165
    @classmethod
166
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
167
        """See RepositoryFormat.get_format_string()."""
168
        return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
169
170
    def get_format_description(self):
171
        """See RepositoryFormat.get_format_description()."""
172
        return "Packs containing knits without subtree support"
173
174
175
class RepositoryFormatKnitPack3(RepositoryFormatPack):
176
    """A subtrees parameterized Pack repository.
177
178
    This repository format uses the xml7 serializer to get:
179
     - support for recording full info about the tree root
180
     - support for recording tree-references
181
182
    This format was introduced in 0.92.
183
    """
184
185
    repository_class = KnitPackRepository
186
    _commit_builder_class = PackRootCommitBuilder
187
    rich_root_data = True
188
    experimental = True
189
    supports_tree_reference = True
190
    @property
191
    def _serializer(self):
192
        return xml7.serializer_v7
193
    # What index classes to use
194
    index_builder_class = InMemoryGraphIndex
195
    index_class = GraphIndex
196
197
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
198
        return controldir.format_registry.make_bzrdir(
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
199
            'pack-0.92-subtree')
200
201
    def _ignore_setting_bzrdir(self, format):
202
        pass
203
204
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
205
6213.1.16 by Jelmer Vernooij
Feature support in repository.
206
    @classmethod
207
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
208
        """See RepositoryFormat.get_format_string()."""
209
        return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
210
211
    def get_format_description(self):
212
        """See RepositoryFormat.get_format_description()."""
213
        return "Packs containing knits with subtree support\n"
214
215
216
class RepositoryFormatKnitPack4(RepositoryFormatPack):
217
    """A rich-root, no subtrees parameterized Pack repository.
218
219
    This repository format uses the xml6 serializer to get:
220
     - support for recording full info about the tree root
221
222
    This format was introduced in 1.0.
223
    """
224
225
    repository_class = KnitPackRepository
226
    _commit_builder_class = PackRootCommitBuilder
227
    rich_root_data = True
228
    supports_tree_reference = False
229
    @property
230
    def _serializer(self):
231
        return xml6.serializer_v6
232
    # What index classes to use
233
    index_builder_class = InMemoryGraphIndex
234
    index_class = GraphIndex
235
236
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
237
        return controldir.format_registry.make_bzrdir(
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
238
            'rich-root-pack')
239
240
    def _ignore_setting_bzrdir(self, format):
241
        pass
242
243
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
244
6213.1.16 by Jelmer Vernooij
Feature support in repository.
245
    @classmethod
246
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
247
        """See RepositoryFormat.get_format_string()."""
248
        return ("Bazaar pack repository format 1 with rich root"
249
                " (needs bzr 1.0)\n")
250
251
    def get_format_description(self):
252
        """See RepositoryFormat.get_format_description()."""
253
        return "Packs containing knits with rich root support\n"
254
255
256
class RepositoryFormatKnitPack5(RepositoryFormatPack):
257
    """Repository that supports external references to allow stacking.
258
259
    New in release 1.6.
260
261
    Supports external lookups, which results in non-truncated ghosts after
262
    reconcile compared to pack-0.92 formats.
263
    """
264
265
    repository_class = KnitPackRepository
266
    _commit_builder_class = PackCommitBuilder
267
    supports_external_lookups = True
268
    # What index classes to use
269
    index_builder_class = InMemoryGraphIndex
270
    index_class = GraphIndex
271
272
    @property
273
    def _serializer(self):
274
        return xml5.serializer_v5
275
276
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
277
        return controldir.format_registry.make_bzrdir('1.6')
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
278
279
    def _ignore_setting_bzrdir(self, format):
280
        pass
281
282
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
283
6213.1.16 by Jelmer Vernooij
Feature support in repository.
284
    @classmethod
285
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
286
        """See RepositoryFormat.get_format_string()."""
287
        return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
288
289
    def get_format_description(self):
290
        """See RepositoryFormat.get_format_description()."""
291
        return "Packs 5 (adds stacking support, requires bzr 1.6)"
292
293
294
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
295
    """A repository with rich roots and stacking.
296
297
    New in release 1.6.1.
298
299
    Supports stacking on other repositories, allowing data to be accessed
300
    without being stored locally.
301
    """
302
303
    repository_class = KnitPackRepository
304
    _commit_builder_class = PackRootCommitBuilder
305
    rich_root_data = True
306
    supports_tree_reference = False # no subtrees
307
    supports_external_lookups = True
308
    # What index classes to use
309
    index_builder_class = InMemoryGraphIndex
310
    index_class = GraphIndex
311
312
    @property
313
    def _serializer(self):
314
        return xml6.serializer_v6
315
316
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
317
        return controldir.format_registry.make_bzrdir(
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
318
            '1.6.1-rich-root')
319
320
    def _ignore_setting_bzrdir(self, format):
321
        pass
322
323
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
324
6213.1.16 by Jelmer Vernooij
Feature support in repository.
325
    @classmethod
326
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
327
        """See RepositoryFormat.get_format_string()."""
328
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
329
330
    def get_format_description(self):
331
        return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
332
333
334
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
335
    """A repository with rich roots and external references.
336
337
    New in release 1.6.
338
339
    Supports external lookups, which results in non-truncated ghosts after
340
    reconcile compared to pack-0.92 formats.
341
342
    This format was deprecated because the serializer it uses accidentally
343
    supported subtrees, when the format was not intended to. This meant that
344
    someone could accidentally fetch from an incorrect repository.
345
    """
346
347
    repository_class = KnitPackRepository
348
    _commit_builder_class = PackRootCommitBuilder
349
    rich_root_data = True
350
    supports_tree_reference = False # no subtrees
351
352
    supports_external_lookups = True
353
    # What index classes to use
354
    index_builder_class = InMemoryGraphIndex
355
    index_class = GraphIndex
356
357
    @property
358
    def _serializer(self):
359
        return xml7.serializer_v7
360
361
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
362
        matching = controldir.format_registry.make_bzrdir(
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
363
            '1.6.1-rich-root')
364
        matching.repository_format = self
365
        return matching
366
367
    def _ignore_setting_bzrdir(self, format):
368
        pass
369
370
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
371
6213.1.16 by Jelmer Vernooij
Feature support in repository.
372
    @classmethod
373
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
374
        """See RepositoryFormat.get_format_string()."""
375
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
376
377
    def get_format_description(self):
378
        return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
379
                " (deprecated)")
380
381
    def is_deprecated(self):
382
        return True
383
384
385
class RepositoryFormatKnitPack6(RepositoryFormatPack):
386
    """A repository with stacking and btree indexes,
387
    without rich roots or subtrees.
388
389
    This is equivalent to pack-1.6 with B+Tree indices.
390
    """
391
392
    repository_class = KnitPackRepository
393
    _commit_builder_class = PackCommitBuilder
394
    supports_external_lookups = True
395
    # What index classes to use
396
    index_builder_class = btree_index.BTreeBuilder
397
    index_class = btree_index.BTreeGraphIndex
398
399
    @property
400
    def _serializer(self):
401
        return xml5.serializer_v5
402
403
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
404
        return controldir.format_registry.make_bzrdir('1.9')
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
405
406
    def _ignore_setting_bzrdir(self, format):
407
        pass
408
409
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
410
6213.1.16 by Jelmer Vernooij
Feature support in repository.
411
    @classmethod
412
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
413
        """See RepositoryFormat.get_format_string()."""
414
        return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
415
416
    def get_format_description(self):
417
        """See RepositoryFormat.get_format_description()."""
418
        return "Packs 6 (uses btree indexes, requires bzr 1.9)"
419
420
421
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
422
    """A repository with rich roots, no subtrees, stacking and btree indexes.
423
424
    1.6-rich-root with B+Tree indices.
425
    """
426
427
    repository_class = KnitPackRepository
428
    _commit_builder_class = PackRootCommitBuilder
429
    rich_root_data = True
430
    supports_tree_reference = False # no subtrees
431
    supports_external_lookups = True
432
    # What index classes to use
433
    index_builder_class = btree_index.BTreeBuilder
434
    index_class = btree_index.BTreeGraphIndex
435
436
    @property
437
    def _serializer(self):
438
        return xml6.serializer_v6
439
440
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
441
        return controldir.format_registry.make_bzrdir(
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
442
            '1.9-rich-root')
443
444
    def _ignore_setting_bzrdir(self, format):
445
        pass
446
447
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
448
6213.1.16 by Jelmer Vernooij
Feature support in repository.
449
    @classmethod
450
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
451
        """See RepositoryFormat.get_format_string()."""
452
        return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
453
454
    def get_format_description(self):
455
        return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
456
457
458
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
459
    """A subtrees development repository.
460
461
    This format should be retained in 2.3, to provide an upgrade path from this
462
    to RepositoryFormat2aSubtree.  It can be removed in later releases.
463
464
    1.6.1-subtree[as it might have been] with B+Tree indices.
465
    """
466
467
    repository_class = KnitPackRepository
468
    _commit_builder_class = PackRootCommitBuilder
469
    rich_root_data = True
470
    experimental = True
471
    supports_tree_reference = True
472
    supports_external_lookups = True
473
    # What index classes to use
474
    index_builder_class = btree_index.BTreeBuilder
475
    index_class = btree_index.BTreeGraphIndex
476
477
    @property
478
    def _serializer(self):
479
        return xml7.serializer_v7
480
481
    def _get_matching_bzrdir(self):
6472.2.1 by Jelmer Vernooij
Use bzrdir.controldir for generic access to control directories.
482
        return controldir.format_registry.make_bzrdir(
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
483
            'development5-subtree')
484
485
    def _ignore_setting_bzrdir(self, format):
486
        pass
487
488
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
489
6213.1.16 by Jelmer Vernooij
Feature support in repository.
490
    @classmethod
491
    def get_format_string(cls):
5757.1.2 by Jelmer Vernooij
Add separate file for knit pack repository formats.
492
        """See RepositoryFormat.get_format_string()."""
493
        return ("Bazaar development format 2 with subtree support "
494
            "(needs bzr.dev from before 1.8)\n")
495
496
    def get_format_description(self):
497
        """See RepositoryFormat.get_format_description()."""
498
        return ("Development repository format, currently the same as "
499
            "1.6.1-subtree with B+Tree indices.\n")
5757.2.1 by Jelmer Vernooij
Don't have CHK stream source depend on knit pack stream source.
500
501
502
class KnitPackStreamSource(StreamSource):
503
    """A StreamSource used to transfer data between same-format KnitPack repos.
504
505
    This source assumes:
506
        1) Same serialization format for all objects
507
        2) Same root information
508
        3) XML format inventories
509
        4) Atomic inserts (so we can stream inventory texts before text
510
           content)
511
        5) No chk_bytes
512
    """
513
514
    def __init__(self, from_repository, to_format):
515
        super(KnitPackStreamSource, self).__init__(from_repository, to_format)
516
        self._text_keys = None
517
        self._text_fetch_order = 'unordered'
518
519
    def _get_filtered_inv_stream(self, revision_ids):
520
        from_repo = self.from_repository
521
        parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
522
        parent_keys = [(p,) for p in parent_ids]
523
        find_text_keys = from_repo._serializer._find_text_key_references
524
        parent_text_keys = set(find_text_keys(
525
            from_repo._inventory_xml_lines_for_keys(parent_keys)))
526
        content_text_keys = set()
527
        knit = KnitVersionedFiles(None, None)
528
        factory = KnitPlainFactory()
529
        def find_text_keys_from_content(record):
530
            if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
531
                raise ValueError("Unknown content storage kind for"
532
                    " inventory text: %s" % (record.storage_kind,))
533
            # It's a knit record, it has a _raw_record field (even if it was
534
            # reconstituted from a network stream).
535
            raw_data = record._raw_record
536
            # read the entire thing
537
            revision_id = record.key[-1]
538
            content, _ = knit._parse_record(revision_id, raw_data)
539
            if record.storage_kind == 'knit-delta-gz':
540
                line_iterator = factory.get_linedelta_content(content)
541
            elif record.storage_kind == 'knit-ft-gz':
542
                line_iterator = factory.get_fulltext_content(content)
543
            content_text_keys.update(find_text_keys(
544
                [(line, revision_id) for line in line_iterator]))
545
        revision_keys = [(r,) for r in revision_ids]
546
        def _filtered_inv_stream():
547
            source_vf = from_repo.inventories
548
            stream = source_vf.get_record_stream(revision_keys,
549
                                                 'unordered', False)
550
            for record in stream:
551
                if record.storage_kind == 'absent':
552
                    raise errors.NoSuchRevision(from_repo, record.key)
553
                find_text_keys_from_content(record)
554
                yield record
555
            self._text_keys = content_text_keys - parent_text_keys
556
        return ('inventories', _filtered_inv_stream())
557
558
    def _get_text_stream(self):
559
        # Note: We know we don't have to handle adding root keys, because both
560
        # the source and target are the identical network name.
561
        text_stream = self.from_repository.texts.get_record_stream(
562
                        self._text_keys, self._text_fetch_order, False)
563
        return ('texts', text_stream)
564
565
    def get_stream(self, search):
566
        revision_ids = search.get_keys()
567
        for stream_info in self._fetch_revision_texts(revision_ids):
568
            yield stream_info
569
        self._revision_keys = [(rev_id,) for rev_id in revision_ids]
570
        yield self._get_filtered_inv_stream(revision_ids)
571
        yield self._get_text_stream()
572
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
573
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
574
class KnitPacker(Packer):
575
    """Packer that works with knit packs."""
576
577
    def __init__(self, pack_collection, packs, suffix, revision_ids=None,
578
                 reload_func=None):
579
        super(KnitPacker, self).__init__(pack_collection, packs, suffix,
580
                                          revision_ids=revision_ids,
581
                                          reload_func=reload_func)
582
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
583
    def _pack_map_and_index_list(self, index_attribute):
584
        """Convert a list of packs to an index pack map and index list.
585
586
        :param index_attribute: The attribute that the desired index is found
587
            on.
588
        :return: A tuple (map, list) where map contains the dict from
589
            index:pack_tuple, and list contains the indices in the preferred
590
            access order.
591
        """
592
        indices = []
593
        pack_map = {}
594
        for pack_obj in self.packs:
595
            index = getattr(pack_obj, index_attribute)
596
            indices.append(index)
597
            pack_map[index] = pack_obj
598
        return pack_map, indices
599
600
    def _index_contents(self, indices, key_filter=None):
601
        """Get an iterable of the index contents from a pack_map.
602
603
        :param indices: The list of indices to query
604
        :param key_filter: An optional filter to limit the keys returned.
605
        """
606
        all_index = CombinedGraphIndex(indices)
607
        if key_filter is None:
608
            return all_index.iter_all_entries()
609
        else:
610
            return all_index.iter_entries(key_filter)
611
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
612
    def _copy_nodes(self, nodes, index_map, writer, write_index,
613
        output_lines=None):
614
        """Copy knit nodes between packs with no graph references.
615
616
        :param output_lines: Output full texts of copied items.
617
        """
618
        pb = ui.ui_factory.nested_progress_bar()
619
        try:
620
            return self._do_copy_nodes(nodes, index_map, writer,
621
                write_index, pb, output_lines=output_lines)
622
        finally:
623
            pb.finished()
624
625
    def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
626
        output_lines=None):
627
        # for record verification
628
        knit = KnitVersionedFiles(None, None)
629
        # plan a readv on each source pack:
630
        # group by pack
631
        nodes = sorted(nodes)
632
        # how to map this into knit.py - or knit.py into this?
633
        # we don't want the typical knit logic, we want grouping by pack
634
        # at this point - perhaps a helper library for the following code
635
        # duplication points?
636
        request_groups = {}
637
        for index, key, value in nodes:
638
            if index not in request_groups:
639
                request_groups[index] = []
640
            request_groups[index].append((key, value))
641
        record_index = 0
642
        pb.update("Copied record", record_index, len(nodes))
643
        for index, items in request_groups.iteritems():
644
            pack_readv_requests = []
645
            for key, value in items:
646
                # ---- KnitGraphIndex.get_position
647
                bits = value[1:].split(' ')
648
                offset, length = int(bits[0]), int(bits[1])
649
                pack_readv_requests.append((offset, length, (key, value[0])))
650
            # linear scan up the pack
651
            pack_readv_requests.sort()
652
            # copy the data
653
            pack_obj = index_map[index]
654
            transport, path = pack_obj.access_tuple()
655
            try:
656
                reader = pack.make_readv_reader(transport, path,
657
                    [offset[0:2] for offset in pack_readv_requests])
658
            except errors.NoSuchFile:
659
                if self._reload_func is not None:
660
                    self._reload_func()
661
                raise
662
            for (names, read_func), (_1, _2, (key, eol_flag)) in \
663
                izip(reader.iter_records(), pack_readv_requests):
664
                raw_data = read_func(None)
665
                # check the header only
666
                if output_lines is not None:
667
                    output_lines(knit._parse_record(key[-1], raw_data)[0])
668
                else:
669
                    df, _ = knit._parse_record_header(key, raw_data)
670
                    df.close()
671
                pos, size = writer.add_bytes_record(raw_data, names)
672
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
673
                pb.update("Copied record", record_index)
674
                record_index += 1
675
676
    def _copy_nodes_graph(self, index_map, writer, write_index,
677
        readv_group_iter, total_items, output_lines=False):
678
        """Copy knit nodes between packs.
679
680
        :param output_lines: Return lines present in the copied data as
681
            an iterator of line,version_id.
682
        """
683
        pb = ui.ui_factory.nested_progress_bar()
684
        try:
685
            for result in self._do_copy_nodes_graph(index_map, writer,
686
                write_index, output_lines, pb, readv_group_iter, total_items):
687
                yield result
688
        except Exception:
689
            # Python 2.4 does not permit try:finally: in a generator.
690
            pb.finished()
691
            raise
692
        else:
693
            pb.finished()
694
695
    def _do_copy_nodes_graph(self, index_map, writer, write_index,
696
        output_lines, pb, readv_group_iter, total_items):
697
        # for record verification
698
        knit = KnitVersionedFiles(None, None)
699
        # for line extraction when requested (inventories only)
700
        if output_lines:
701
            factory = KnitPlainFactory()
702
        record_index = 0
703
        pb.update("Copied record", record_index, total_items)
704
        for index, readv_vector, node_vector in readv_group_iter:
705
            # copy the data
706
            pack_obj = index_map[index]
707
            transport, path = pack_obj.access_tuple()
708
            try:
709
                reader = pack.make_readv_reader(transport, path, readv_vector)
710
            except errors.NoSuchFile:
711
                if self._reload_func is not None:
712
                    self._reload_func()
713
                raise
714
            for (names, read_func), (key, eol_flag, references) in \
715
                izip(reader.iter_records(), node_vector):
716
                raw_data = read_func(None)
717
                if output_lines:
718
                    # read the entire thing
719
                    content, _ = knit._parse_record(key[-1], raw_data)
720
                    if len(references[-1]) == 0:
721
                        line_iterator = factory.get_fulltext_content(content)
722
                    else:
723
                        line_iterator = factory.get_linedelta_content(content)
724
                    for line in line_iterator:
725
                        yield line, key
726
                else:
727
                    # check the header only
728
                    df, _ = knit._parse_record_header(key, raw_data)
729
                    df.close()
730
                pos, size = writer.add_bytes_record(raw_data, names)
731
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
732
                pb.update("Copied record", record_index)
733
                record_index += 1
734
735
    def _process_inventory_lines(self, inv_lines):
736
        """Use up the inv_lines generator and setup a text key filter."""
737
        repo = self._pack_collection.repo
738
        fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
739
            inv_lines, self.revision_keys)
740
        text_filter = []
741
        for fileid, file_revids in fileid_revisions.iteritems():
742
            text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
743
        self._text_filter = text_filter
744
745
    def _copy_inventory_texts(self):
746
        # select inventory keys
747
        inv_keys = self._revision_keys # currently the same keyspace, and note that
748
        # querying for keys here could introduce a bug where an inventory item
749
        # is missed, so do not change it to query separately without cross
750
        # checking like the text key check below.
751
        inventory_index_map, inventory_indices = self._pack_map_and_index_list(
752
            'inventory_index')
753
        inv_nodes = self._index_contents(inventory_indices, inv_keys)
754
        # copy inventory keys and adjust values
755
        # XXX: Should be a helper function to allow different inv representation
756
        # at this point.
757
        self.pb.update("Copying inventory texts", 2)
758
        total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
759
        # Only grab the output lines if we will be processing them
760
        output_lines = bool(self.revision_ids)
761
        inv_lines = self._copy_nodes_graph(inventory_index_map,
762
            self.new_pack._writer, self.new_pack.inventory_index,
763
            readv_group_iter, total_items, output_lines=output_lines)
764
        if self.revision_ids:
765
            self._process_inventory_lines(inv_lines)
766
        else:
767
            # eat the iterator to cause it to execute.
768
            list(inv_lines)
769
            self._text_filter = None
770
        if 'pack' in debug.debug_flags:
771
            trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
772
                time.ctime(), self._pack_collection._upload_transport.base,
773
                self.new_pack.random_name,
774
                self.new_pack.inventory_index.key_count(),
775
                time.time() - self.new_pack.start_time)
776
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
777
    def _update_pack_order(self, entries, index_to_pack_map):
778
        """Determine how we want our packs to be ordered.
779
780
        This changes the sort order of the self.packs list so that packs unused
781
        by 'entries' will be at the end of the list, so that future requests
782
        can avoid probing them.  Used packs will be at the front of the
783
        self.packs list, in the order of their first use in 'entries'.
784
785
        :param entries: A list of (index, ...) tuples
786
        :param index_to_pack_map: A mapping from index objects to pack objects.
787
        """
788
        packs = []
789
        seen_indexes = set()
790
        for entry in entries:
791
            index = entry[0]
792
            if index not in seen_indexes:
793
                packs.append(index_to_pack_map[index])
794
                seen_indexes.add(index)
795
        if len(packs) == len(self.packs):
796
            if 'pack' in debug.debug_flags:
797
                trace.mutter('Not changing pack list, all packs used.')
798
            return
799
        seen_packs = set(packs)
800
        for pack in self.packs:
801
            if pack not in seen_packs:
802
                packs.append(pack)
803
                seen_packs.add(pack)
804
        if 'pack' in debug.debug_flags:
805
            old_names = [p.access_tuple()[1] for p in self.packs]
806
            new_names = [p.access_tuple()[1] for p in packs]
807
            trace.mutter('Reordering packs\nfrom: %s\n  to: %s',
808
                   old_names, new_names)
809
        self.packs = packs
810
5757.7.1 by Jelmer Vernooij
Move Packer implementation to knitpack_repo.
811
    def _copy_revision_texts(self):
812
        # select revisions
813
        if self.revision_ids:
814
            revision_keys = [(revision_id,) for revision_id in self.revision_ids]
815
        else:
816
            revision_keys = None
817
        # select revision keys
818
        revision_index_map, revision_indices = self._pack_map_and_index_list(
819
            'revision_index')
820
        revision_nodes = self._index_contents(revision_indices, revision_keys)
821
        revision_nodes = list(revision_nodes)
822
        self._update_pack_order(revision_nodes, revision_index_map)
823
        # copy revision keys and adjust values
824
        self.pb.update("Copying revision texts", 1)
825
        total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
826
        list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
827
            self.new_pack.revision_index, readv_group_iter, total_items))
828
        if 'pack' in debug.debug_flags:
829
            trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
830
                time.ctime(), self._pack_collection._upload_transport.base,
831
                self.new_pack.random_name,
832
                self.new_pack.revision_index.key_count(),
833
                time.time() - self.new_pack.start_time)
834
        self._revision_keys = revision_keys
835
836
    def _get_text_nodes(self):
837
        text_index_map, text_indices = self._pack_map_and_index_list(
838
            'text_index')
839
        return text_index_map, self._index_contents(text_indices,
840
            self._text_filter)
841
842
    def _copy_text_texts(self):
843
        # select text keys
844
        text_index_map, text_nodes = self._get_text_nodes()
845
        if self._text_filter is not None:
846
            # We could return the keys copied as part of the return value from
847
            # _copy_nodes_graph but this doesn't work all that well with the
848
            # need to get line output too, so we check separately, and as we're
849
            # going to buffer everything anyway, we check beforehand, which
850
            # saves reading knit data over the wire when we know there are
851
            # mising records.
852
            text_nodes = set(text_nodes)
853
            present_text_keys = set(_node[1] for _node in text_nodes)
854
            missing_text_keys = set(self._text_filter) - present_text_keys
855
            if missing_text_keys:
856
                # TODO: raise a specific error that can handle many missing
857
                # keys.
858
                trace.mutter("missing keys during fetch: %r", missing_text_keys)
859
                a_missing_key = missing_text_keys.pop()
860
                raise errors.RevisionNotPresent(a_missing_key[1],
861
                    a_missing_key[0])
862
        # copy text keys and adjust values
863
        self.pb.update("Copying content texts", 3)
864
        total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
865
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
866
            self.new_pack.text_index, readv_group_iter, total_items))
867
        self._log_copied_texts()
868
869
    def _create_pack_from_packs(self):
870
        self.pb.update("Opening pack", 0, 5)
871
        self.new_pack = self.open_pack()
872
        new_pack = self.new_pack
873
        # buffer data - we won't be reading-back during the pack creation and
874
        # this makes a significant difference on sftp pushes.
875
        new_pack.set_write_cache_size(1024*1024)
876
        if 'pack' in debug.debug_flags:
877
            plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
878
                for a_pack in self.packs]
879
            if self.revision_ids is not None:
880
                rev_count = len(self.revision_ids)
881
            else:
882
                rev_count = 'all'
883
            trace.mutter('%s: create_pack: creating pack from source packs: '
884
                '%s%s %s revisions wanted %s t=0',
885
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
886
                plain_pack_list, rev_count)
887
        self._copy_revision_texts()
888
        self._copy_inventory_texts()
889
        self._copy_text_texts()
890
        # select signature keys
891
        signature_filter = self._revision_keys # same keyspace
892
        signature_index_map, signature_indices = self._pack_map_and_index_list(
893
            'signature_index')
894
        signature_nodes = self._index_contents(signature_indices,
895
            signature_filter)
896
        # copy signature keys and adjust values
897
        self.pb.update("Copying signature texts", 4)
898
        self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
899
            new_pack.signature_index)
900
        if 'pack' in debug.debug_flags:
901
            trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
902
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
903
                new_pack.signature_index.key_count(),
904
                time.time() - new_pack.start_time)
905
        new_pack._check_references()
906
        if not self._use_pack(new_pack):
907
            new_pack.abort()
908
            return None
909
        self.pb.update("Finishing pack", 5)
910
        new_pack.finish()
911
        self._pack_collection.allocate(new_pack)
912
        return new_pack
913
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
914
    def _least_readv_node_readv(self, nodes):
915
        """Generate request groups for nodes using the least readv's.
916
917
        :param nodes: An iterable of graph index nodes.
918
        :return: Total node count and an iterator of the data needed to perform
919
            readvs to obtain the data for nodes. Each item yielded by the
920
            iterator is a tuple with:
921
            index, readv_vector, node_vector. readv_vector is a list ready to
922
            hand to the transport readv method, and node_vector is a list of
923
            (key, eol_flag, references) for the node retrieved by the
924
            matching readv_vector.
925
        """
926
        # group by pack so we do one readv per pack
927
        nodes = sorted(nodes)
928
        total = len(nodes)
929
        request_groups = {}
930
        for index, key, value, references in nodes:
931
            if index not in request_groups:
932
                request_groups[index] = []
933
            request_groups[index].append((key, value, references))
934
        result = []
935
        for index, items in request_groups.iteritems():
936
            pack_readv_requests = []
937
            for key, value, references in items:
938
                # ---- KnitGraphIndex.get_position
939
                bits = value[1:].split(' ')
940
                offset, length = int(bits[0]), int(bits[1])
941
                pack_readv_requests.append(
942
                    ((offset, length), (key, value[0], references)))
943
            # linear scan up the pack to maximum range combining.
944
            pack_readv_requests.sort()
945
            # split out the readv and the node data.
946
            pack_readv = [readv for readv, node in pack_readv_requests]
947
            node_vector = [node for readv, node in pack_readv_requests]
948
            result.append((index, pack_readv, node_vector))
949
        return total, result
950
951
    def _revision_node_readv(self, revision_nodes):
952
        """Return the total revisions and the readv's to issue.
953
954
        :param revision_nodes: The revision index contents for the packs being
955
            incorporated into the new pack.
956
        :return: As per _least_readv_node_readv.
957
        """
958
        return self._least_readv_node_readv(revision_nodes)
959
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
960
961
class KnitReconcilePacker(KnitPacker):
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
962
    """A packer which regenerates indices etc as it copies.
963
964
    This is used by ``bzr reconcile`` to cause parent text pointers to be
965
    regenerated.
966
    """
967
5757.4.2 by Jelmer Vernooij
Add specific KnitPacker class.
968
    def __init__(self, *args, **kwargs):
969
        super(KnitReconcilePacker, self).__init__(*args, **kwargs)
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
970
        self._data_changed = False
971
972
    def _process_inventory_lines(self, inv_lines):
973
        """Generate a text key reference map rather for reconciling with."""
974
        repo = self._pack_collection.repo
975
        refs = repo._serializer._find_text_key_references(inv_lines)
976
        self._text_refs = refs
977
        # during reconcile we:
978
        #  - convert unreferenced texts to full texts
979
        #  - correct texts which reference a text not copied to be full texts
980
        #  - copy all others as-is but with corrected parents.
981
        #  - so at this point we don't know enough to decide what becomes a full
982
        #    text.
983
        self._text_filter = None
984
985
    def _copy_text_texts(self):
986
        """generate what texts we should have and then copy."""
987
        self.pb.update("Copying content texts", 3)
988
        # we have three major tasks here:
989
        # 1) generate the ideal index
990
        repo = self._pack_collection.repo
991
        ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
992
            _1, key, _2, refs in
993
            self.new_pack.revision_index.iter_all_entries()])
994
        ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
995
        # 2) generate a text_nodes list that contains all the deltas that can
996
        #    be used as-is, with corrected parents.
997
        ok_nodes = []
998
        bad_texts = []
999
        discarded_nodes = []
1000
        NULL_REVISION = _mod_revision.NULL_REVISION
1001
        text_index_map, text_nodes = self._get_text_nodes()
1002
        for node in text_nodes:
1003
            # 0 - index
1004
            # 1 - key
1005
            # 2 - value
1006
            # 3 - refs
1007
            try:
1008
                ideal_parents = tuple(ideal_index[node[1]])
1009
            except KeyError:
1010
                discarded_nodes.append(node)
1011
                self._data_changed = True
1012
            else:
1013
                if ideal_parents == (NULL_REVISION,):
1014
                    ideal_parents = ()
1015
                if ideal_parents == node[3][0]:
1016
                    # no change needed.
1017
                    ok_nodes.append(node)
1018
                elif ideal_parents[0:1] == node[3][0][0:1]:
1019
                    # the left most parent is the same, or there are no parents
1020
                    # today. Either way, we can preserve the representation as
1021
                    # long as we change the refs to be inserted.
1022
                    self._data_changed = True
1023
                    ok_nodes.append((node[0], node[1], node[2],
1024
                        (ideal_parents, node[3][1])))
1025
                    self._data_changed = True
1026
                else:
1027
                    # Reinsert this text completely
1028
                    bad_texts.append((node[1], ideal_parents))
1029
                    self._data_changed = True
1030
        # we're finished with some data.
1031
        del ideal_index
1032
        del text_nodes
1033
        # 3) bulk copy the ok data
1034
        total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1035
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1036
            self.new_pack.text_index, readv_group_iter, total_items))
1037
        # 4) adhoc copy all the other texts.
1038
        # We have to topologically insert all texts otherwise we can fail to
1039
        # reconcile when parts of a single delta chain are preserved intact,
1040
        # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1041
        # reinserted, and if d3 has incorrect parents it will also be
1042
        # reinserted. If we insert d3 first, d2 is present (as it was bulk
1043
        # copied), so we will try to delta, but d2 is not currently able to be
1044
        # extracted because its basis d1 is not present. Topologically sorting
1045
        # addresses this. The following generates a sort for all the texts that
1046
        # are being inserted without having to reference the entire text key
1047
        # space (we only topo sort the revisions, which is smaller).
1048
        topo_order = tsort.topo_sort(ancestors)
1049
        rev_order = dict(zip(topo_order, range(len(topo_order))))
1050
        bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1051
        transaction = repo.get_transaction()
1052
        file_id_index = GraphIndexPrefixAdapter(
1053
            self.new_pack.text_index,
1054
            ('blank', ), 1,
1055
            add_nodes_callback=self.new_pack.text_index.add_nodes)
5757.8.7 by Jelmer Vernooij
Merge moving of _DirectPackAccess.
1056
        data_access = _DirectPackAccess(
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
1057
                {self.new_pack.text_index:self.new_pack.access_tuple()})
1058
        data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1059
            self.new_pack.access_tuple())
1060
        output_texts = KnitVersionedFiles(
1061
            _KnitGraphIndex(self.new_pack.text_index,
1062
                add_callback=self.new_pack.text_index.add_nodes,
1063
                deltas=True, parents=True, is_locked=repo.is_locked),
1064
            data_access=data_access, max_delta_chain=200)
1065
        for key, parent_keys in bad_texts:
1066
            # We refer to the new pack to delta data being output.
1067
            # A possible improvement would be to catch errors on short reads
1068
            # and only flush then.
1069
            self.new_pack.flush()
1070
            parents = []
1071
            for parent_key in parent_keys:
1072
                if parent_key[0] != key[0]:
1073
                    # Graph parents must match the fileid
1074
                    raise errors.BzrError('Mismatched key parent %r:%r' %
1075
                        (key, parent_keys))
1076
                parents.append(parent_key[1])
1077
            text_lines = osutils.split_lines(repo.texts.get_record_stream(
1078
                [key], 'unordered', True).next().get_bytes_as('fulltext'))
1079
            output_texts.add_lines(key, parent_keys, text_lines,
1080
                random_id=True, check_content=False)
1081
        # 5) check that nothing inserted has a reference outside the keyspace.
1082
        missing_text_keys = self.new_pack.text_index._external_references()
1083
        if missing_text_keys:
1084
            raise errors.BzrCheckError('Reference to missing compression parents %r'
1085
                % (missing_text_keys,))
1086
        self._log_copied_texts()
1087
1088
    def _use_pack(self, new_pack):
1089
        """Override _use_pack to check for reconcile having changed content."""
1090
        # XXX: we might be better checking this at the copy time.
1091
        original_inventory_keys = set()
1092
        inv_index = self._pack_collection.inventory_index.combined_index
1093
        for entry in inv_index.iter_all_entries():
1094
            original_inventory_keys.add(entry[1])
1095
        new_inventory_keys = set()
1096
        for entry in new_pack.inventory_index.iter_all_entries():
1097
            new_inventory_keys.add(entry[1])
1098
        if new_inventory_keys != original_inventory_keys:
1099
            self._data_changed = True
1100
        return new_pack.data_inserted() and self._data_changed
1101
1102
5757.7.3 by Jelmer Vernooij
Move more knitpack-specific functionality out of Packer.
1103
class OptimisingKnitPacker(KnitPacker):
1104
    """A packer which spends more time to create better disk layouts."""
1105
1106
    def _revision_node_readv(self, revision_nodes):
1107
        """Return the total revisions and the readv's to issue.
1108
1109
        This sort places revisions in topological order with the ancestors
1110
        after the children.
1111
1112
        :param revision_nodes: The revision index contents for the packs being
1113
            incorporated into the new pack.
1114
        :return: As per _least_readv_node_readv.
1115
        """
1116
        # build an ancestors dict
1117
        ancestors = {}
1118
        by_key = {}
1119
        for index, key, value, references in revision_nodes:
1120
            ancestors[key] = references[0]
1121
            by_key[key] = (index, value, references)
1122
        order = tsort.topo_sort(ancestors)
1123
        total = len(order)
1124
        # Single IO is pathological, but it will work as a starting point.
1125
        requests = []
1126
        for key in reversed(order):
1127
            index, value, references = by_key[key]
1128
            # ---- KnitGraphIndex.get_position
1129
            bits = value[1:].split(' ')
1130
            offset, length = int(bits[0]), int(bits[1])
1131
            requests.append(
1132
                (index, [(offset, length)], [(key, value[0], references)]))
1133
        # TODO: combine requests in the same index that are in ascending order.
1134
        return total, requests
1135
1136
    def open_pack(self):
1137
        """Open a pack for the pack we are creating."""
1138
        new_pack = super(OptimisingKnitPacker, self).open_pack()
1139
        # Turn on the optimization flags for all the index builders.
1140
        new_pack.revision_index.set_optimize(for_size=True)
1141
        new_pack.inventory_index.set_optimize(for_size=True)
1142
        new_pack.text_index.set_optimize(for_size=True)
1143
        new_pack.signature_index.set_optimize(for_size=True)
1144
        return new_pack
1145
1146
1147
class KnitRepositoryPackCollection(RepositoryPackCollection):
1148
    """A knit pack collection."""
1149
1150
    pack_factory = NewPack
1151
    resumed_pack_factory = ResumedPack
1152
    normal_packer_class = KnitPacker
1153
    optimising_packer_class = OptimisingKnitPacker
1154
1155
5757.3.1 by Jelmer Vernooij
Move _pack_reconcile.
1156