~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/gc_repo.py

Bring the groupcompress plugin into the brisbane-core branch.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# groupcompress, a bzr plugin providing improved disk utilisation
 
2
# Copyright (C) 2008 Canonical Limited.
 
3
 
4
# This program is free software; you can redistribute it and/or modify
 
5
# it under the terms of the GNU General Public License version 2 as published
 
6
# by the Free Software Foundation.
 
7
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 
16
 
17
 
 
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
 
19
 
 
20
import md5
 
21
import time
 
22
 
 
23
from bzrlib import (
 
24
    debug,
 
25
    errors,
 
26
    knit,
 
27
    inventory,
 
28
    pack,
 
29
    repository,
 
30
    trace,
 
31
    ui,
 
32
    )
 
33
from bzrlib.btree_index import (
 
34
    BTreeBuilder,
 
35
    BTreeGraphIndex,
 
36
    )
 
37
from bzrlib.index import GraphIndex, GraphIndexBuilder
 
38
from bzrlib.repository import InterPackRepo
 
39
from bzrlib.groupcompress import (
 
40
    _GCGraphIndex,
 
41
    GroupCompressVersionedFiles,
 
42
    )
 
43
from bzrlib.osutils import rand_chars
 
44
from bzrlib.repofmt.pack_repo import (
 
45
    Pack,
 
46
    NewPack,
 
47
    KnitPackRepository,
 
48
    RepositoryPackCollection,
 
49
    RepositoryFormatKnitPack6,
 
50
    RepositoryFormatKnitPack6RichRoot,
 
51
    Packer,
 
52
    ReconcilePacker,
 
53
    OptimisingPacker,
 
54
    )
 
55
try:
 
56
    from bzrlib.repofmt.pack_repo import (
 
57
    CHKInventoryRepository,
 
58
    RepositoryFormatPackDevelopment5,
 
59
    RepositoryFormatPackDevelopment5Hash16,
 
60
##    RepositoryFormatPackDevelopment5Hash16b,
 
61
##    RepositoryFormatPackDevelopment5Hash63,
 
62
##    RepositoryFormatPackDevelopment5Hash127a,
 
63
##    RepositoryFormatPackDevelopment5Hash127b,
 
64
    RepositoryFormatPackDevelopment5Hash255,
 
65
    )
 
66
    from bzrlib import chk_map
 
67
    chk_support = True
 
68
except ImportError:
 
69
    chk_support = False
 
70
 
 
71
 
 
72
def open_pack(self):
 
73
    return self._pack_collection.pack_factory(self._pack_collection,
 
74
        upload_suffix=self.suffix,
 
75
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
 
76
 
 
77
 
 
78
Packer.open_pack = open_pack
 
79
 
 
80
 
 
81
class GCPack(NewPack):
 
82
 
 
83
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
 
84
        """Create a NewPack instance.
 
85
 
 
86
        :param pack_collection: A PackCollection into which this is being
 
87
            inserted.
 
88
        :param upload_suffix: An optional suffix to be given to any temporary
 
89
            files created during the pack creation. e.g '.autopack'
 
90
        :param file_mode: An optional file mode to create the new files with.
 
91
        """
 
92
        # replaced from bzr.dev to:
 
93
        # - change inventory reference list length to 1
 
94
        # - change texts reference lists to 1
 
95
        # TODO: patch this to be parameterised upstream
 
96
        
 
97
        # The relative locations of the packs are constrained, but all are
 
98
        # passed in because the caller has them, so as to avoid object churn.
 
99
        index_builder_class = pack_collection._index_builder_class
 
100
        if chk_support:
 
101
            # from brisbane-core
 
102
            if pack_collection.chk_index is not None:
 
103
                chk_index = index_builder_class(reference_lists=0)
 
104
            else:
 
105
                chk_index = None
 
106
            Pack.__init__(self,
 
107
                # Revisions: parents list, no text compression.
 
108
                index_builder_class(reference_lists=1),
 
109
                # Inventory: We want to map compression only, but currently the
 
110
                # knit code hasn't been updated enough to understand that, so we
 
111
                # have a regular 2-list index giving parents and compression
 
112
                # source.
 
113
                index_builder_class(reference_lists=1),
 
114
                # Texts: compression and per file graph, for all fileids - so two
 
115
                # reference lists and two elements in the key tuple.
 
116
                index_builder_class(reference_lists=1, key_elements=2),
 
117
                # Signatures: Just blobs to store, no compression, no parents
 
118
                # listing.
 
119
                index_builder_class(reference_lists=0),
 
120
                # CHK based storage - just blobs, no compression or parents.
 
121
                chk_index=chk_index
 
122
                )
 
123
        else:
 
124
            # from bzr.dev
 
125
            Pack.__init__(self,
 
126
                # Revisions: parents list, no text compression.
 
127
                index_builder_class(reference_lists=1),
 
128
                # Inventory: compressed, with graph for compatibility with other
 
129
                # existing bzrlib code.
 
130
                index_builder_class(reference_lists=1),
 
131
                # Texts: per file graph:
 
132
                index_builder_class(reference_lists=1, key_elements=2),
 
133
                # Signatures: Just blobs to store, no compression, no parents
 
134
                # listing.
 
135
                index_builder_class(reference_lists=0),
 
136
                )
 
137
        self._pack_collection = pack_collection
 
138
        # When we make readonly indices, we need this.
 
139
        self.index_class = pack_collection._index_class
 
140
        # where should the new pack be opened
 
141
        self.upload_transport = pack_collection._upload_transport
 
142
        # where are indices written out to
 
143
        self.index_transport = pack_collection._index_transport
 
144
        # where is the pack renamed to when it is finished?
 
145
        self.pack_transport = pack_collection._pack_transport
 
146
        # What file mode to upload the pack and indices with.
 
147
        self._file_mode = file_mode
 
148
        # tracks the content written to the .pack file.
 
149
        self._hash = md5.new()
 
150
        # a four-tuple with the length in bytes of the indices, once the pack
 
151
        # is finalised. (rev, inv, text, sigs)
 
152
        self.index_sizes = None
 
153
        # How much data to cache when writing packs. Note that this is not
 
154
        # synchronised with reads, because it's not in the transport layer, so
 
155
        # is not safe unless the client knows it won't be reading from the pack
 
156
        # under creation.
 
157
        self._cache_limit = 0
 
158
        # the temporary pack file name.
 
159
        self.random_name = rand_chars(20) + upload_suffix
 
160
        # when was this pack started ?
 
161
        self.start_time = time.time()
 
162
        # open an output stream for the data added to the pack.
 
163
        self.write_stream = self.upload_transport.open_write_stream(
 
164
            self.random_name, mode=self._file_mode)
 
165
        if 'pack' in debug.debug_flags:
 
166
            trace.mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
 
167
                time.ctime(), self.upload_transport.base, self.random_name,
 
168
                time.time() - self.start_time)
 
169
        # A list of byte sequences to be written to the new pack, and the 
 
170
        # aggregate size of them.  Stored as a list rather than separate 
 
171
        # variables so that the _write_data closure below can update them.
 
172
        self._buffer = [[], 0]
 
173
        # create a callable for adding data 
 
174
        #
 
175
        # robertc says- this is a closure rather than a method on the object
 
176
        # so that the variables are locals, and faster than accessing object
 
177
        # members.
 
178
        def _write_data(bytes, flush=False, _buffer=self._buffer,
 
179
            _write=self.write_stream.write, _update=self._hash.update):
 
180
            _buffer[0].append(bytes)
 
181
            _buffer[1] += len(bytes)
 
182
            # buffer cap
 
183
            if _buffer[1] > self._cache_limit or flush:
 
184
                bytes = ''.join(_buffer[0])
 
185
                _write(bytes)
 
186
                _update(bytes)
 
187
                _buffer[:] = [[], 0]
 
188
        # expose this on self, for the occasion when clients want to add data.
 
189
        self._write_data = _write_data
 
190
        # a pack writer object to serialise pack records.
 
191
        self._writer = pack.ContainerWriter(self._write_data)
 
192
        self._writer.begin()
 
193
        # what state is the pack in? (open, finished, aborted)
 
194
        self._state = 'open'
 
195
 
 
196
 
 
197
RepositoryPackCollection.pack_factory = NewPack
 
198
 
 
199
class GCRepositoryPackCollection(RepositoryPackCollection):
 
200
 
 
201
    pack_factory = GCPack
 
202
 
 
203
    def _make_index(self, name, suffix):
 
204
        """Overridden to use BTreeGraphIndex objects."""
 
205
        size_offset = self._suffix_offsets[suffix]
 
206
        index_name = name + suffix
 
207
        index_size = self._names[name][size_offset]
 
208
        return BTreeGraphIndex(
 
209
            self._index_transport, index_name, index_size)
 
210
 
 
211
    def _start_write_group(self):
 
212
        # Overridden to add 'self.pack_factory()'
 
213
        # Do not permit preparation for writing if we're not in a 'write lock'.
 
214
        if not self.repo.is_write_locked():
 
215
            raise errors.NotWriteLocked(self)
 
216
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
 
217
            file_mode=self.repo.bzrdir._get_file_mode())
 
218
        # allow writing: queue writes to a new index
 
219
        self.revision_index.add_writable_index(self._new_pack.revision_index,
 
220
            self._new_pack)
 
221
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
 
222
            self._new_pack)
 
223
        self.text_index.add_writable_index(self._new_pack.text_index,
 
224
            self._new_pack)
 
225
        self.signature_index.add_writable_index(self._new_pack.signature_index,
 
226
            self._new_pack)
 
227
        if chk_support and self.chk_index is not None:
 
228
            self.chk_index.add_writable_index(self._new_pack.chk_index,
 
229
                self._new_pack)
 
230
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
 
231
 
 
232
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
 
233
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
 
234
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
 
235
        self.repo.texts._index._add_callback = self.text_index.add_callback
 
236
 
 
237
    def _get_filtered_inv_stream(self, source_vf, keys, pb=None):
 
238
        """Filter the texts of inventories, to find the chk pages."""
 
239
        id_roots = []
 
240
        p_id_roots = []
 
241
        id_roots_set = set()
 
242
        p_id_roots_set = set()
 
243
        total_keys = len(keys)
 
244
        def _filter_inv_stream(stream):
 
245
            for idx, record in enumerate(stream):
 
246
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
 
247
                bytes = record.get_bytes_as('fulltext')
 
248
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
 
249
                if pb is not None:
 
250
                    pb.update('inv', idx, total_keys)
 
251
                key = chk_inv.id_to_entry.key()
 
252
                if key not in id_roots_set:
 
253
                    id_roots.append(key)
 
254
                    id_roots_set.add(key)
 
255
                p_id_map = chk_inv.parent_id_basename_to_file_id
 
256
                if p_id_map is not None:
 
257
                    key = p_id_map.key()
 
258
                    if key not in p_id_roots_set:
 
259
                        p_id_roots_set.add(key)
 
260
                        p_id_roots.append(key)
 
261
                yield record
 
262
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
 
263
        return _filter_inv_stream(stream), id_roots, p_id_roots
 
264
 
 
265
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
 
266
        # We want to stream the keys from 'id_roots', and things they
 
267
        # reference, and then stream things from p_id_roots and things they
 
268
        # reference, and then any remaining keys that we didn't get to.
 
269
 
 
270
        # We also group referenced texts together, so if one root references a
 
271
        # text with prefix 'a', and another root references a node with prefix
 
272
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
 
273
        # This keeps 'similar' nodes together.
 
274
 
 
275
        # Note: We probably actually want multiple streams here, to help the
 
276
        #       client understand that the different levels won't compress well
 
277
        #       against each other.
 
278
        #       Test the difference between using one Group per level, and
 
279
        #       using 1 Group per prefix. (so '' (root) would get a group, then
 
280
        #       all the references to search-key 'a' would get a group, etc.)
 
281
        total_keys = len(keys)
 
282
        remaining_keys = set(keys)
 
283
        counter = [0]
 
284
        def _get_referenced_stream(root_keys):
 
285
            cur_keys = root_keys
 
286
            while cur_keys:
 
287
                keys_by_search_prefix = {}
 
288
                remaining_keys.difference_update(cur_keys)
 
289
                next_keys = set()
 
290
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
 
291
                                                     True)
 
292
                def next_stream():
 
293
                    for record in stream:
 
294
                        bytes = record.get_bytes_as('fulltext')
 
295
                        # We don't care about search_key_func for this code,
 
296
                        # because we only care about external references.
 
297
                        node = chk_map._deserialise(bytes, record.key,
 
298
                                                    search_key_func=None)
 
299
                        common_base = node._search_prefix
 
300
                        if isinstance(node, chk_map.InternalNode):
 
301
                            for prefix, value in node._items.iteritems():
 
302
                                if not isinstance(value, tuple):
 
303
                                    raise AssertionError("value is %s when"
 
304
                                        " tuple expected" % (value.__class__))
 
305
                                if value not in next_keys:
 
306
                                    keys_by_search_prefix.setdefault(prefix,
 
307
                                        []).append(value)
 
308
                                    next_keys.add(value)
 
309
                        counter[0] += 1
 
310
                        if pb is not None:
 
311
                            pb.update('chk node', counter[0], total_keys)
 
312
                        yield record
 
313
                yield next_stream()
 
314
                # Double check that we won't be emitting any keys twice
 
315
                next_keys = next_keys.intersection(remaining_keys)
 
316
                cur_keys = []
 
317
                for prefix in sorted(keys_by_search_prefix):
 
318
                    cur_keys.extend(keys_by_search_prefix[prefix])
 
319
        for stream in _get_referenced_stream(id_roots):
 
320
            yield stream
 
321
        for stream in _get_referenced_stream(p_id_roots):
 
322
            yield stream
 
323
        if remaining_keys:
 
324
            trace.note('There were %d keys in the chk index, which were not'
 
325
                       ' referenced from inventories', len(remaining_keys))
 
326
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
 
327
                                                 True)
 
328
            yield stream
 
329
 
 
330
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
 
331
                                 reload_func=None):
 
332
        """Execute a series of pack operations.
 
333
 
 
334
        :param pack_operations: A list of [revision_count, packs_to_combine].
 
335
        :param _packer_class: The class of packer to use (default: Packer).
 
336
        :return: None.
 
337
        """
 
338
        for revision_count, packs in pack_operations:
 
339
            # we may have no-ops from the setup logic
 
340
            if len(packs) == 0:
 
341
                continue
 
342
            # Create a new temp VersionedFile instance based on these packs,
 
343
            # and then just fetch everything into the target
 
344
 
 
345
            to_copy = [('revision_index', 'revisions'),
 
346
                       ('inventory_index', 'inventories'),
 
347
                       ('text_index', 'texts'),
 
348
                       ('signature_index', 'signatures'),
 
349
                      ]
 
350
            # TODO: This is a very non-optimal ordering for chk_bytes. The
 
351
            #       issue is that pages that are similar are not transmitted
 
352
            #       together. Perhaps get_record_stream('gc-optimal') should be
 
353
            #       taught about how to group chk pages?
 
354
            has_chk = False
 
355
            if getattr(self, 'chk_index', None) is not None:
 
356
                has_chk = True
 
357
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
 
358
 
 
359
            # Shouldn't we start_write_group around this?
 
360
            if self._new_pack is not None:
 
361
                raise errors.BzrError('call to %s.pack() while another pack is'
 
362
                                      ' being written.'
 
363
                                      % (self.__class__.__name__,))
 
364
            new_pack = self.pack_factory(self, '.autopack',
 
365
                file_mode=self.repo.bzrdir._get_file_mode())
 
366
            new_pack.set_write_cache_size(1024*1024)
 
367
            # TODO: A better alternative is to probably use Packer.open_pack(), and
 
368
            #       then create a GroupCompressVersionedFiles() around the
 
369
            #       target pack to insert into.
 
370
            pb = ui.ui_factory.nested_progress_bar()
 
371
            try:
 
372
                for idx, (index_name, vf_name) in enumerate(to_copy):
 
373
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
 
374
                    keys = set()
 
375
                    new_index = getattr(new_pack, index_name)
 
376
                    new_index.set_optimize(for_size=True)
 
377
                    for pack in packs:
 
378
                        source_index = getattr(pack, index_name)
 
379
                        keys.update(e[1] for e in source_index.iter_all_entries())
 
380
                    trace.mutter('repacking %s with %d keys',
 
381
                                 vf_name, len(keys))
 
382
                    source_vf = getattr(self.repo, vf_name)
 
383
                    target_access = knit._DirectPackAccess({})
 
384
                    target_access.set_writer(new_pack._writer, new_index,
 
385
                                             new_pack.access_tuple())
 
386
                    target_vf = GroupCompressVersionedFiles(
 
387
                        _GCGraphIndex(new_index,
 
388
                                      add_callback=new_index.add_nodes,
 
389
                                      parents=source_vf._index._parents,
 
390
                                      is_locked=self.repo.is_locked),
 
391
                        access=target_access,
 
392
                        delta=source_vf._delta)
 
393
                    stream = None
 
394
                    child_pb = ui.ui_factory.nested_progress_bar()
 
395
                    try:
 
396
                        if has_chk:
 
397
                            if vf_name == 'inventories':
 
398
                                stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
 
399
                                    source_vf, keys, pb=child_pb)
 
400
                            elif vf_name == 'chk_bytes':
 
401
                                for stream in self._get_chk_stream(source_vf, keys,
 
402
                                                    id_roots, p_id_roots,
 
403
                                                    pb=child_pb):
 
404
                                    target_vf.insert_record_stream(stream)
 
405
                                # No more to copy
 
406
                                stream = []
 
407
                        if stream is None:
 
408
                            def pb_stream():
 
409
                                substream = source_vf.get_record_stream(keys, 'gc-optimal', True)
 
410
                                for idx, record in enumerate(substream):
 
411
                                    child_pb.update(vf_name, idx + 1, len(keys))
 
412
                                    yield record
 
413
                            stream = pb_stream()
 
414
                        target_vf.insert_record_stream(stream)
 
415
                    finally:
 
416
                        child_pb.finished()
 
417
                new_pack._check_references() # shouldn't be needed
 
418
            except:
 
419
                pb.finished()
 
420
                new_pack.abort()
 
421
                raise
 
422
            else:
 
423
                pb.finished()
 
424
                if not new_pack.data_inserted():
 
425
                    raise AssertionError('We copied from pack files,'
 
426
                                         ' but had no data copied')
 
427
                    # we need to abort somehow, because we don't want to remove
 
428
                    # the other packs
 
429
                new_pack.finish()
 
430
                self.allocate(new_pack)
 
431
            for pack in packs:
 
432
                self._remove_pack_from_memory(pack)
 
433
        # record the newly available packs and stop advertising the old
 
434
        # packs
 
435
        self._save_pack_names(clear_obsolete_packs=True)
 
436
        # Move the old packs out of the way now they are no longer referenced.
 
437
        for revision_count, packs in pack_operations:
 
438
            self._obsolete_packs(packs)
 
439
 
 
440
 
 
441
 
 
442
class GCPackRepository(KnitPackRepository):
 
443
    """GC customisation of KnitPackRepository."""
 
444
 
 
445
    # Note: I think the CHK support can be dropped from this class as it's
 
446
    # implemented via the GCCHKPackRepository class defined next. IGC 20090301
 
447
 
 
448
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
 
449
        _serializer):
 
450
        """Overridden to change pack collection class."""
 
451
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
 
452
            _commit_builder_class, _serializer)
 
453
        # and now replace everything it did :)
 
454
        index_transport = self._transport.clone('indices')
 
455
        if chk_support:
 
456
            self._pack_collection = GCRepositoryPackCollection(self,
 
457
                self._transport, index_transport,
 
458
                self._transport.clone('upload'),
 
459
                self._transport.clone('packs'),
 
460
                _format.index_builder_class,
 
461
                _format.index_class,
 
462
                use_chk_index=self._format.supports_chks,
 
463
                )
 
464
        else:
 
465
            self._pack_collection = GCRepositoryPackCollection(self,
 
466
                self._transport, index_transport,
 
467
                self._transport.clone('upload'),
 
468
                self._transport.clone('packs'),
 
469
                _format.index_builder_class,
 
470
                _format.index_class)
 
471
        self.inventories = GroupCompressVersionedFiles(
 
472
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
 
473
                add_callback=self._pack_collection.inventory_index.add_callback,
 
474
                parents=True, is_locked=self.is_locked),
 
475
            access=self._pack_collection.inventory_index.data_access)
 
476
        self.revisions = GroupCompressVersionedFiles(
 
477
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
 
478
                add_callback=self._pack_collection.revision_index.add_callback,
 
479
                parents=True, is_locked=self.is_locked),
 
480
            access=self._pack_collection.revision_index.data_access,
 
481
            delta=False)
 
482
        self.signatures = GroupCompressVersionedFiles(
 
483
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
 
484
                add_callback=self._pack_collection.signature_index.add_callback,
 
485
                parents=False, is_locked=self.is_locked),
 
486
            access=self._pack_collection.signature_index.data_access,
 
487
            delta=False)
 
488
        self.texts = GroupCompressVersionedFiles(
 
489
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
 
490
                add_callback=self._pack_collection.text_index.add_callback,
 
491
                parents=True, is_locked=self.is_locked),
 
492
            access=self._pack_collection.text_index.data_access)
 
493
        if chk_support and _format.supports_chks:
 
494
            # No graph, no compression:- references from chks are between
 
495
            # different objects not temporal versions of the same; and without
 
496
            # some sort of temporal structure knit compression will just fail.
 
497
            self.chk_bytes = GroupCompressVersionedFiles(
 
498
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
 
499
                    add_callback=self._pack_collection.chk_index.add_callback,
 
500
                    parents=False, is_locked=self.is_locked),
 
501
                access=self._pack_collection.chk_index.data_access)
 
502
        else:
 
503
            self.chk_bytes = None
 
504
        # True when the repository object is 'write locked' (as opposed to the
 
505
        # physical lock only taken out around changes to the pack-names list.) 
 
506
        # Another way to represent this would be a decorator around the control
 
507
        # files object that presents logical locks as physical ones - if this
 
508
        # gets ugly consider that alternative design. RBC 20071011
 
509
        self._write_lock_count = 0
 
510
        self._transaction = None
 
511
        # for tests
 
512
        self._reconcile_does_inventory_gc = True
 
513
        self._reconcile_fixes_text_parents = True
 
514
        self._reconcile_backsup_inventory = False
 
515
 
 
516
 
 
517
if chk_support:
 
518
    class GCCHKPackRepository(CHKInventoryRepository):
 
519
        """GC customisation of CHKInventoryRepository."""
 
520
 
 
521
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
 
522
            _serializer):
 
523
            """Overridden to change pack collection class."""
 
524
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
 
525
                _commit_builder_class, _serializer)
 
526
            # and now replace everything it did :)
 
527
            index_transport = self._transport.clone('indices')
 
528
            self._pack_collection = GCRepositoryPackCollection(self,
 
529
                self._transport, index_transport,
 
530
                self._transport.clone('upload'),
 
531
                self._transport.clone('packs'),
 
532
                _format.index_builder_class,
 
533
                _format.index_class,
 
534
                use_chk_index=self._format.supports_chks,
 
535
                )
 
536
            self.inventories = GroupCompressVersionedFiles(
 
537
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
 
538
                    add_callback=self._pack_collection.inventory_index.add_callback,
 
539
                    parents=True, is_locked=self.is_locked),
 
540
                access=self._pack_collection.inventory_index.data_access)
 
541
            self.revisions = GroupCompressVersionedFiles(
 
542
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
 
543
                    add_callback=self._pack_collection.revision_index.add_callback,
 
544
                    parents=True, is_locked=self.is_locked),
 
545
                access=self._pack_collection.revision_index.data_access,
 
546
                delta=False)
 
547
            self.signatures = GroupCompressVersionedFiles(
 
548
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
 
549
                    add_callback=self._pack_collection.signature_index.add_callback,
 
550
                    parents=False, is_locked=self.is_locked),
 
551
                access=self._pack_collection.signature_index.data_access,
 
552
                delta=False)
 
553
            self.texts = GroupCompressVersionedFiles(
 
554
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
 
555
                    add_callback=self._pack_collection.text_index.add_callback,
 
556
                    parents=True, is_locked=self.is_locked),
 
557
                access=self._pack_collection.text_index.data_access)
 
558
            assert _format.supports_chks
 
559
            # No parents, individual CHK pages don't have specific ancestry
 
560
            self.chk_bytes = GroupCompressVersionedFiles(
 
561
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
 
562
                    add_callback=self._pack_collection.chk_index.add_callback,
 
563
                    parents=False, is_locked=self.is_locked),
 
564
                access=self._pack_collection.chk_index.data_access)
 
565
            # True when the repository object is 'write locked' (as opposed to the
 
566
            # physical lock only taken out around changes to the pack-names list.)
 
567
            # Another way to represent this would be a decorator around the control
 
568
            # files object that presents logical locks as physical ones - if this
 
569
            # gets ugly consider that alternative design. RBC 20071011
 
570
            self._write_lock_count = 0
 
571
            self._transaction = None
 
572
            # for tests
 
573
            self._reconcile_does_inventory_gc = True
 
574
            self._reconcile_fixes_text_parents = True
 
575
            self._reconcile_backsup_inventory = False
 
576
 
 
577
 
 
578
class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):
 
579
    """A B+Tree index using pack repository."""
 
580
 
 
581
    repository_class = GCPackRepository
 
582
    rich_root_data = False
 
583
    # Note: We cannot unpack a delta that references a text we haven't
 
584
    # seen yet. There are 2 options, work in fulltexts, or require
 
585
    # topological sorting. Using fulltexts is more optimal for local
 
586
    # operations, because the source can be smart about extracting
 
587
    # multiple in-a-row (and sharing strings). Topological is better
 
588
    # for remote, because we access less data.
 
589
    _fetch_order = 'unordered'
 
590
    _fetch_gc_optimal = True
 
591
    _fetch_uses_deltas = False
 
592
 
 
593
    def get_format_string(self):
 
594
        """See RepositoryFormat.get_format_string()."""
 
595
        return ("Bazaar development format - btree+gc "
 
596
            "(needs bzr.dev from 1.13)\n")
 
597
 
 
598
    def get_format_description(self):
 
599
        """See RepositoryFormat.get_format_description()."""
 
600
        return ("Development repository format - btree+groupcompress "
 
601
            ", interoperates with pack-0.92\n")
 
602
 
 
603
 
 
604
if chk_support:
 
605
    from bzrlib import chk_serializer
 
606
    class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
 
607
        """A hashed CHK+group compress pack repository."""
 
608
 
 
609
        repository_class = GCCHKPackRepository
 
610
        rich_root_data = True
 
611
        # Note: We cannot unpack a delta that references a text we haven't
 
612
        # seen yet. There are 2 options, work in fulltexts, or require
 
613
        # topological sorting. Using fulltexts is more optimal for local
 
614
        # operations, because the source can be smart about extracting
 
615
        # multiple in-a-row (and sharing strings). Topological is better
 
616
        # for remote, because we access less data.
 
617
        _fetch_order = 'unordered'
 
618
        _fetch_gc_optimal = True
 
619
        _fetch_uses_deltas = False
 
620
 
 
621
        def get_format_string(self):
 
622
            """See RepositoryFormat.get_format_string()."""
 
623
            return ('Bazaar development format - hash16chk+gc rich-root'
 
624
                    ' (needs bzr.dev from 1.13)\n')
 
625
 
 
626
        def get_format_description(self):
 
627
            """See RepositoryFormat.get_format_description()."""
 
628
            return ("Development repository format - hash16chk+groupcompress")
 
629
 
 
630
 
 
631
    class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):
 
632
        """A hashed CHK+group compress pack repository."""
 
633
 
 
634
        repository_class = GCCHKPackRepository
 
635
        # Setting this to True causes us to use InterModel1And2, so for now set
 
636
        # it to False which uses InterDifferingSerializer. When IM1&2 is
 
637
        # removed (as it is in bzr.dev) we can set this back to True.
 
638
        rich_root_data = True
 
639
 
 
640
        def get_format_string(self):
 
641
            """See RepositoryFormat.get_format_string()."""
 
642
            return ('Bazaar development format - hash255chk+gc rich-root'
 
643
                    ' (needs bzr.dev from 1.13)\n')
 
644
 
 
645
        def get_format_description(self):
 
646
            """See RepositoryFormat.get_format_description()."""
 
647
            return ("Development repository format - hash255chk+groupcompress")
 
648
 
 
649
 
 
650
    chk_serializer_255_bigpage = chk_serializer.CHKSerializer(65536, 'hash-255-way')
 
651
    class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):
 
652
        """A hashed CHK+group compress pack repository."""
 
653
 
 
654
        repository_class = GCCHKPackRepository
 
655
        # For right now, setting this to True gives us InterModel1And2 rather
 
656
        # than InterDifferingSerializer
 
657
        rich_root_data = True
 
658
        _serializer = chk_serializer_255_bigpage
 
659
        # Note: We cannot unpack a delta that references a text we haven't
 
660
        # seen yet. There are 2 options, work in fulltexts, or require
 
661
        # topological sorting. Using fulltexts is more optimal for local
 
662
        # operations, because the source can be smart about extracting
 
663
        # multiple in-a-row (and sharing strings). Topological is better
 
664
        # for remote, because we access less data.
 
665
        _fetch_order = 'unordered'
 
666
        _fetch_gc_optimal = True
 
667
        _fetch_uses_deltas = False
 
668
 
 
669
        def get_format_string(self):
 
670
            """See RepositoryFormat.get_format_string()."""
 
671
            return ('Bazaar development format - hash255chk+gc rich-root bigpage'
 
672
                    ' (needs bzr.dev from 1.13)\n')
 
673
 
 
674
        def get_format_description(self):
 
675
            """See RepositoryFormat.get_format_description()."""
 
676
            return ("Development repository format - hash255chk+groupcompress + bigpage")
 
677
 
 
678
 
 
679
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
 
680
    """Be incompatible with the regular fetch code."""
 
681
    formats = (RepositoryFormatPackGCPlain,)
 
682
    if chk_support:
 
683
        formats = formats + (RepositoryFormatPackGCCHK16,
 
684
                             RepositoryFormatPackGCCHK255,
 
685
                             RepositoryFormatPackGCCHK255Big)
 
686
    if isinstance(source._format, formats) or isinstance(target._format, formats):
 
687
        return False
 
688
    else:
 
689
        return orig_method(source, target)
 
690
 
 
691
 
 
692
InterPackRepo.is_compatible = staticmethod(pack_incompatible)