1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Repostory formats using B+Tree indices and groupcompress compression."""
33
revision as _mod_revision,
37
from bzrlib.index import GraphIndex, GraphIndexBuilder
38
from bzrlib.groupcompress import (
40
GroupCompressVersionedFiles,
42
from bzrlib.repofmt.pack_repo import (
46
PackRootCommitBuilder,
47
RepositoryPackCollection,
48
RepositoryFormatKnitPack6,
50
CHKInventoryRepository,
51
RepositoryFormatPackDevelopment5Hash16,
52
RepositoryFormatPackDevelopment5Hash255,
57
class GCPack(NewPack):
59
def __init__(self, pack_collection, upload_suffix='', file_mode=None):
60
"""Create a NewPack instance.
62
:param pack_collection: A PackCollection into which this is being
64
:param upload_suffix: An optional suffix to be given to any temporary
65
files created during the pack creation. e.g '.autopack'
66
:param file_mode: An optional file mode to create the new files with.
68
# replaced from NewPack to:
69
# - change inventory reference list length to 1
70
# - change texts reference lists to 1
71
# TODO: patch this to be parameterised
73
# The relative locations of the packs are constrained, but all are
74
# passed in because the caller has them, so as to avoid object churn.
75
index_builder_class = pack_collection._index_builder_class
77
if pack_collection.chk_index is not None:
78
chk_index = index_builder_class(reference_lists=0)
82
# Revisions: parents list, no text compression.
83
index_builder_class(reference_lists=1),
84
# Inventory: We want to map compression only, but currently the
85
# knit code hasn't been updated enough to understand that, so we
86
# have a regular 2-list index giving parents and compression
88
index_builder_class(reference_lists=1),
89
# Texts: compression and per file graph, for all fileids - so two
90
# reference lists and two elements in the key tuple.
91
index_builder_class(reference_lists=1, key_elements=2),
92
# Signatures: Just blobs to store, no compression, no parents
94
index_builder_class(reference_lists=0),
95
# CHK based storage - just blobs, no compression or parents.
98
self._pack_collection = pack_collection
99
# When we make readonly indices, we need this.
100
self.index_class = pack_collection._index_class
101
# where should the new pack be opened
102
self.upload_transport = pack_collection._upload_transport
103
# where are indices written out to
104
self.index_transport = pack_collection._index_transport
105
# where is the pack renamed to when it is finished?
106
self.pack_transport = pack_collection._pack_transport
107
# What file mode to upload the pack and indices with.
108
self._file_mode = file_mode
109
# tracks the content written to the .pack file.
110
self._hash = osutils.md5()
111
# a four-tuple with the length in bytes of the indices, once the pack
112
# is finalised. (rev, inv, text, sigs)
113
self.index_sizes = None
114
# How much data to cache when writing packs. Note that this is not
115
# synchronised with reads, because it's not in the transport layer, so
116
# is not safe unless the client knows it won't be reading from the pack
118
self._cache_limit = 0
119
# the temporary pack file name.
120
self.random_name = osutils.rand_chars(20) + upload_suffix
121
# when was this pack started ?
122
self.start_time = time.time()
123
# open an output stream for the data added to the pack.
124
self.write_stream = self.upload_transport.open_write_stream(
125
self.random_name, mode=self._file_mode)
126
if 'pack' in debug.debug_flags:
127
trace.mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
128
time.ctime(), self.upload_transport.base, self.random_name,
129
time.time() - self.start_time)
130
# A list of byte sequences to be written to the new pack, and the
131
# aggregate size of them. Stored as a list rather than separate
132
# variables so that the _write_data closure below can update them.
133
self._buffer = [[], 0]
134
# create a callable for adding data
136
# robertc says- this is a closure rather than a method on the object
137
# so that the variables are locals, and faster than accessing object
139
def _write_data(bytes, flush=False, _buffer=self._buffer,
140
_write=self.write_stream.write, _update=self._hash.update):
141
_buffer[0].append(bytes)
142
_buffer[1] += len(bytes)
144
if _buffer[1] > self._cache_limit or flush:
145
bytes = ''.join(_buffer[0])
149
# expose this on self, for the occasion when clients want to add data.
150
self._write_data = _write_data
151
# a pack writer object to serialise pack records.
152
self._writer = pack.ContainerWriter(self._write_data)
154
# what state is the pack in? (open, finished, aborted)
157
def _check_references(self):
158
"""Make sure our external references are present.
160
Packs are allowed to have deltas whose base is not in the pack, but it
161
must be present somewhere in this collection. It is not allowed to
162
have deltas based on a fallback repository.
163
(See <https://bugs.launchpad.net/bzr/+bug/288751>)
165
# Groupcompress packs don't have any external references
168
class GCCHKPacker(Packer):
169
"""This class understand what it takes to collect a GCCHK repo."""
171
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
173
super(GCCHKPacker, self).__init__(pack_collection, packs, suffix,
174
revision_ids=revision_ids,
175
reload_func=reload_func)
176
self._pack_collection = pack_collection
177
# ATM, We only support this for GCCHK repositories
178
assert pack_collection.chk_index is not None
179
self._gather_text_refs = False
180
self._chk_id_roots = []
181
self._chk_p_id_roots = []
182
self._text_refs = None
183
# set by .pack() if self.revision_ids is not None
184
self.revision_keys = None
186
def _get_progress_stream(self, source_vf, keys, message, pb):
188
substream = source_vf.get_record_stream(keys, 'groupcompress', True)
189
for idx, record in enumerate(substream):
191
pb.update(message, idx + 1, len(keys))
195
def _get_filtered_inv_stream(self, source_vf, keys, message, pb=None):
196
"""Filter the texts of inventories, to find the chk pages."""
197
total_keys = len(keys)
198
def _filtered_inv_stream():
200
p_id_roots_set = set()
201
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
202
for idx, record in enumerate(stream):
203
bytes = record.get_bytes_as('fulltext')
204
chk_inv = inventory.CHKInventory.deserialise(None, bytes,
207
pb.update('inv', idx, total_keys)
208
key = chk_inv.id_to_entry.key()
209
if key not in id_roots_set:
210
self._chk_id_roots.append(key)
211
id_roots_set.add(key)
212
p_id_map = chk_inv.parent_id_basename_to_file_id
213
assert p_id_map is not None
215
if key not in p_id_roots_set:
216
p_id_roots_set.add(key)
217
self._chk_p_id_roots.append(key)
219
# We have finished processing all of the inventory records, we
220
# don't need these sets anymore
222
p_id_roots_set.clear()
223
return _filtered_inv_stream()
225
def _get_chk_streams(self, source_vf, keys, pb=None):
226
# We want to stream the keys from 'id_roots', and things they
227
# reference, and then stream things from p_id_roots and things they
228
# reference, and then any remaining keys that we didn't get to.
230
# We also group referenced texts together, so if one root references a
231
# text with prefix 'a', and another root references a node with prefix
232
# 'a', we want to yield those nodes before we yield the nodes for 'b'
233
# This keeps 'similar' nodes together.
235
# Note: We probably actually want multiple streams here, to help the
236
# client understand that the different levels won't compress well
237
# against each other.
238
# Test the difference between using one Group per level, and
239
# using 1 Group per prefix. (so '' (root) would get a group, then
240
# all the references to search-key 'a' would get a group, etc.)
241
total_keys = len(keys)
242
remaining_keys = set(keys)
244
if self._gather_text_refs:
245
bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
246
self._text_refs = set()
247
def _get_referenced_stream(root_keys, parse_leaf_nodes=False):
250
keys_by_search_prefix = {}
251
remaining_keys.difference_update(cur_keys)
253
def handle_internal_node(node):
254
for prefix, value in node._items.iteritems():
255
# We don't want to request the same key twice, and we
256
# want to order it by the first time it is seen.
257
# Even further, we don't want to request a key which is
258
# not in this group of pack files (it should be in the
259
# repo, but it doesn't have to be in the group being
261
# TODO: consider how to treat externally referenced chk
262
# pages as 'external_references' so that we
263
# always fill them in for stacked branches
264
if value not in next_keys and value in remaining_keys:
265
keys_by_search_prefix.setdefault(prefix,
268
def handle_leaf_node(node):
269
# Store is None, because we know we have a LeafNode, and we
270
# just want its entries
271
for file_id, bytes in node.iteritems(None):
272
name_utf8, file_id, revision_id = bytes_to_info(bytes)
273
self._text_refs.add((file_id, revision_id))
275
stream = source_vf.get_record_stream(cur_keys,
276
'as-requested', True)
277
for record in stream:
278
bytes = record.get_bytes_as('fulltext')
279
# We don't care about search_key_func for this code,
280
# because we only care about external references.
281
node = chk_map._deserialise(bytes, record.key,
282
search_key_func=None)
283
common_base = node._search_prefix
284
if isinstance(node, chk_map.InternalNode):
285
handle_internal_node(node)
286
elif parse_leaf_nodes:
287
handle_leaf_node(node)
290
pb.update('chk node', counter[0], total_keys)
293
# Double check that we won't be emitting any keys twice
294
# If we get rid of the pre-calculation of all keys, we could
295
# turn this around and do
296
# next_keys.difference_update(seen_keys)
297
# However, we also may have references to chk pages in another
298
# pack file during autopack. We filter earlier, so we should no
299
# longer need to do this
300
# next_keys = next_keys.intersection(remaining_keys)
302
for prefix in sorted(keys_by_search_prefix):
303
cur_keys.extend(keys_by_search_prefix.pop(prefix))
304
for stream in _get_referenced_stream(self._chk_id_roots,
305
self._gather_text_refs):
307
del self._chk_id_roots
308
# while it isn't really possible for chk_id_roots to not be in the
309
# local group of packs, it is possible that the tree shape has not
310
# changed recently, so we need to filter _chk_p_id_roots by the
312
chk_p_id_roots = [key for key in self._chk_p_id_roots
313
if key in remaining_keys]
314
del self._chk_p_id_roots
315
for stream in _get_referenced_stream(chk_p_id_roots, False):
318
trace.mutter('There were %d keys in the chk index, %d of which'
319
' were not referenced', total_keys,
321
if self.revision_ids is None:
322
stream = source_vf.get_record_stream(remaining_keys,
326
def _build_vf(self, index_name, parents, delta, for_write=False):
327
"""Build a VersionedFiles instance on top of this group of packs."""
328
index_name = index_name + '_index'
330
access = knit._DirectPackAccess(index_to_pack)
333
assert self.new_pack is not None
334
index = getattr(self.new_pack, index_name)
335
index_to_pack[index] = self.new_pack.access_tuple()
336
index.set_optimize(for_size=True)
337
access.set_writer(self.new_pack._writer, index,
338
self.new_pack.access_tuple())
339
add_callback = index.add_nodes
342
for pack in self.packs:
343
sub_index = getattr(pack, index_name)
344
index_to_pack[sub_index] = pack.access_tuple()
345
indices.append(sub_index)
346
index = _mod_index.CombinedGraphIndex(indices)
348
vf = GroupCompressVersionedFiles(
350
add_callback=add_callback,
352
is_locked=self._pack_collection.repo.is_locked),
357
def _build_vfs(self, index_name, parents, delta):
358
"""Build the source and target VersionedFiles."""
359
source_vf = self._build_vf(index_name, parents,
360
delta, for_write=False)
361
target_vf = self._build_vf(index_name, parents,
362
delta, for_write=True)
363
return source_vf, target_vf
365
def _copy_stream(self, source_vf, target_vf, keys, message, vf_to_stream,
367
trace.mutter('repacking %d %s', len(keys), message)
368
self.pb.update('repacking %s' % (message,), pb_offset)
369
child_pb = ui.ui_factory.nested_progress_bar()
371
stream = vf_to_stream(source_vf, keys, message, child_pb)
372
for _ in target_vf._insert_record_stream(stream,
379
def _copy_revision_texts(self):
380
source_vf, target_vf = self._build_vfs('revision', True, False)
381
if not self.revision_keys:
382
# We are doing a full fetch, aka 'pack'
383
self.revision_keys = source_vf.keys()
384
self._copy_stream(source_vf, target_vf, self.revision_keys,
385
'revisions', self._get_progress_stream, 1)
387
def _copy_inventory_texts(self):
388
source_vf, target_vf = self._build_vfs('inventory', True, True)
389
self._copy_stream(source_vf, target_vf, self.revision_keys,
390
'inventories', self._get_filtered_inv_stream, 2)
392
def _copy_chk_texts(self):
393
source_vf, target_vf = self._build_vfs('chk', False, False)
394
# TODO: This is technically spurious... if it is a performance issue,
396
total_keys = source_vf.keys()
397
trace.mutter('repacking chk: %d id_to_entry roots,'
398
' %d p_id_map roots, %d total keys',
399
len(self._chk_id_roots), len(self._chk_p_id_roots),
401
self.pb.update('repacking chk', 3)
402
child_pb = ui.ui_factory.nested_progress_bar()
404
for stream in self._get_chk_streams(source_vf, total_keys,
406
for _ in target_vf._insert_record_stream(stream,
413
def _copy_text_texts(self):
414
source_vf, target_vf = self._build_vfs('text', True, True)
415
# XXX: We don't walk the chk map to determine referenced (file_id,
416
# revision_id) keys. We don't do it yet because you really need
417
# to filter out the ones that are present in the parents of the
418
# rev just before the ones you are copying, otherwise the filter
419
# is grabbing too many keys...
420
text_keys = source_vf.keys()
421
self._copy_stream(source_vf, target_vf, text_keys,
422
'text', self._get_progress_stream, 4)
424
def _copy_signature_texts(self):
425
source_vf, target_vf = self._build_vfs('signature', False, False)
426
signature_keys = source_vf.keys()
427
signature_keys.intersection(self.revision_keys)
428
self._copy_stream(source_vf, target_vf, signature_keys,
429
'signatures', self._get_progress_stream, 5)
431
def _create_pack_from_packs(self):
432
self.pb.update('repacking', 0, 7)
433
self.new_pack = self.open_pack()
434
# Is this necessary for GC ?
435
self.new_pack.set_write_cache_size(1024*1024)
436
self._copy_revision_texts()
437
self._copy_inventory_texts()
438
self._copy_chk_texts()
439
self._copy_text_texts()
440
self._copy_signature_texts()
441
self.new_pack._check_references()
442
if not self._use_pack(self.new_pack):
443
self.new_pack.abort()
445
self.pb.update('finishing repack', 6, 7)
446
self.new_pack.finish()
447
self._pack_collection.allocate(self.new_pack)
451
class GCCHKReconcilePacker(GCCHKPacker):
452
"""A packer which regenerates indices etc as it copies.
454
This is used by ``bzr reconcile`` to cause parent text pointers to be
458
def __init__(self, *args, **kwargs):
459
super(GCCHKReconcilePacker, self).__init__(*args, **kwargs)
460
self._data_changed = False
461
self._gather_text_refs = True
463
def _copy_inventory_texts(self):
464
source_vf, target_vf = self._build_vfs('inventory', True, True)
465
self._copy_stream(source_vf, target_vf, self.revision_keys,
466
'inventories', self._get_filtered_inv_stream, 2)
467
if source_vf.keys() != self.revision_keys:
468
self._data_changed = True
470
def _copy_text_texts(self):
471
"""generate what texts we should have and then copy."""
472
source_vf, target_vf = self._build_vfs('text', True, True)
473
trace.mutter('repacking %d texts', len(self._text_refs))
474
self.pb.update("repacking texts", 4)
475
# we have three major tasks here:
476
# 1) generate the ideal index
477
repo = self._pack_collection.repo
478
# We want the one we just wrote, so base it on self.new_pack
479
revision_vf = self._build_vf('revision', True, False, for_write=True)
480
ancestor_keys = revision_vf.get_parent_map(revision_vf.keys())
481
# Strip keys back into revision_ids.
482
ancestors = dict((k[0], tuple([p[0] for p in parents]))
483
for k, parents in ancestor_keys.iteritems())
485
# TODO: _generate_text_key_index should be much cheaper to generate from
486
# a chk repository, rather than the current implementation
487
ideal_index = repo._generate_text_key_index(None, ancestors)
488
file_id_parent_map = source_vf.get_parent_map(self._text_refs)
489
# 2) generate a keys list that contains all the entries that can
490
# be used as-is, with corrected parents.
492
new_parent_keys = {} # (key, parent_keys)
494
NULL_REVISION = _mod_revision.NULL_REVISION
495
for key in self._text_refs:
501
ideal_parents = tuple(ideal_index[key])
503
discarded_keys.append(key)
504
self._data_changed = True
506
if ideal_parents == (NULL_REVISION,):
508
source_parents = file_id_parent_map[key]
509
if ideal_parents == source_parents:
513
# We need to change the parent graph, but we don't need to
514
# re-insert the text (since we don't pun the compression
515
# parent with the parents list)
516
self._data_changed = True
517
new_parent_keys[key] = ideal_parents
518
# we're finished with some data.
520
del file_id_parent_map
521
# 3) bulk copy the data, updating records than need it
522
def _update_parents_for_texts():
523
stream = source_vf.get_record_stream(self._text_refs,
524
'groupcompress', False)
525
for record in stream:
526
if record.key in new_parent_keys:
527
record.parents = new_parent_keys[record.key]
529
target_vf.insert_record_stream(_update_parents_for_texts())
531
def _use_pack(self, new_pack):
532
"""Override _use_pack to check for reconcile having changed content."""
533
return new_pack.data_inserted() and self._data_changed
536
class GCRepositoryPackCollection(RepositoryPackCollection):
538
pack_factory = GCPack
540
def _already_packed(self):
541
"""Is the collection already packed?"""
542
# Always repack GC repositories for now
545
def _execute_pack_operations(self, pack_operations,
546
_packer_class=GCCHKPacker,
548
"""Execute a series of pack operations.
550
:param pack_operations: A list of [revision_count, packs_to_combine].
551
:param _packer_class: The class of packer to use (default: Packer).
554
# XXX: Copied across from RepositoryPackCollection simply because we
555
# want to override the _packer_class ... :(
556
for revision_count, packs in pack_operations:
557
# we may have no-ops from the setup logic
560
packer = GCCHKPacker(self, packs, '.autopack',
561
reload_func=reload_func)
564
except errors.RetryWithNewPacks:
565
# An exception is propagating out of this context, make sure
566
# this packer has cleaned up. Packer() doesn't set its new_pack
567
# state into the RepositoryPackCollection object, so we only
568
# have access to it directly here.
569
if packer.new_pack is not None:
570
packer.new_pack.abort()
573
self._remove_pack_from_memory(pack)
574
# record the newly available packs and stop advertising the old
576
self._save_pack_names(clear_obsolete_packs=True)
577
# Move the old packs out of the way now they are no longer referenced.
578
for revision_count, packs in pack_operations:
579
self._obsolete_packs(packs)
582
# XXX: This format is scheduled for termination
584
# class GCPackRepository(KnitPackRepository):
585
# """GC customisation of KnitPackRepository."""
587
# def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
589
# """Overridden to change pack collection class."""
590
# KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
591
# _commit_builder_class, _serializer)
592
# # and now replace everything it did :)
593
# index_transport = self._transport.clone('indices')
594
# self._pack_collection = GCRepositoryPackCollection(self,
595
# self._transport, index_transport,
596
# self._transport.clone('upload'),
597
# self._transport.clone('packs'),
598
# _format.index_builder_class,
599
# _format.index_class,
600
# use_chk_index=self._format.supports_chks,
602
# self.inventories = GroupCompressVersionedFiles(
603
# _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
604
# add_callback=self._pack_collection.inventory_index.add_callback,
605
# parents=True, is_locked=self.is_locked),
606
# access=self._pack_collection.inventory_index.data_access)
607
# self.revisions = GroupCompressVersionedFiles(
608
# _GCGraphIndex(self._pack_collection.revision_index.combined_index,
609
# add_callback=self._pack_collection.revision_index.add_callback,
610
# parents=True, is_locked=self.is_locked),
611
# access=self._pack_collection.revision_index.data_access,
613
# self.signatures = GroupCompressVersionedFiles(
614
# _GCGraphIndex(self._pack_collection.signature_index.combined_index,
615
# add_callback=self._pack_collection.signature_index.add_callback,
616
# parents=False, is_locked=self.is_locked),
617
# access=self._pack_collection.signature_index.data_access,
619
# self.texts = GroupCompressVersionedFiles(
620
# _GCGraphIndex(self._pack_collection.text_index.combined_index,
621
# add_callback=self._pack_collection.text_index.add_callback,
622
# parents=True, is_locked=self.is_locked),
623
# access=self._pack_collection.text_index.data_access)
624
# if _format.supports_chks:
625
# # No graph, no compression:- references from chks are between
626
# # different objects not temporal versions of the same; and without
627
# # some sort of temporal structure knit compression will just fail.
628
# self.chk_bytes = GroupCompressVersionedFiles(
629
# _GCGraphIndex(self._pack_collection.chk_index.combined_index,
630
# add_callback=self._pack_collection.chk_index.add_callback,
631
# parents=False, is_locked=self.is_locked),
632
# access=self._pack_collection.chk_index.data_access)
634
# self.chk_bytes = None
635
# # True when the repository object is 'write locked' (as opposed to the
636
# # physical lock only taken out around changes to the pack-names list.)
637
# # Another way to represent this would be a decorator around the control
638
# # files object that presents logical locks as physical ones - if this
639
# # gets ugly consider that alternative design. RBC 20071011
640
# self._write_lock_count = 0
641
# self._transaction = None
643
# self._reconcile_does_inventory_gc = True
644
# self._reconcile_fixes_text_parents = True
645
# self._reconcile_backsup_inventory = False
647
# def suspend_write_group(self):
648
# raise errors.UnsuspendableWriteGroup(self)
650
# def _resume_write_group(self, tokens):
651
# raise errors.UnsuspendableWriteGroup(self)
653
# def _reconcile_pack(self, collection, packs, extension, revs, pb):
655
# return packer.pack(pb)
658
class GCCHKPackRepository(CHKInventoryRepository):
659
"""GC customisation of CHKInventoryRepository."""
661
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
663
"""Overridden to change pack collection class."""
664
KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
665
_commit_builder_class, _serializer)
666
# and now replace everything it did :)
667
index_transport = self._transport.clone('indices')
668
self._pack_collection = GCRepositoryPackCollection(self,
669
self._transport, index_transport,
670
self._transport.clone('upload'),
671
self._transport.clone('packs'),
672
_format.index_builder_class,
674
use_chk_index=self._format.supports_chks,
676
self.inventories = GroupCompressVersionedFiles(
677
_GCGraphIndex(self._pack_collection.inventory_index.combined_index,
678
add_callback=self._pack_collection.inventory_index.add_callback,
679
parents=True, is_locked=self.is_locked),
680
access=self._pack_collection.inventory_index.data_access)
681
self.revisions = GroupCompressVersionedFiles(
682
_GCGraphIndex(self._pack_collection.revision_index.combined_index,
683
add_callback=self._pack_collection.revision_index.add_callback,
684
parents=True, is_locked=self.is_locked),
685
access=self._pack_collection.revision_index.data_access,
687
self.signatures = GroupCompressVersionedFiles(
688
_GCGraphIndex(self._pack_collection.signature_index.combined_index,
689
add_callback=self._pack_collection.signature_index.add_callback,
690
parents=False, is_locked=self.is_locked),
691
access=self._pack_collection.signature_index.data_access,
693
self.texts = GroupCompressVersionedFiles(
694
_GCGraphIndex(self._pack_collection.text_index.combined_index,
695
add_callback=self._pack_collection.text_index.add_callback,
696
parents=True, is_locked=self.is_locked),
697
access=self._pack_collection.text_index.data_access)
698
# No parents, individual CHK pages don't have specific ancestry
699
self.chk_bytes = GroupCompressVersionedFiles(
700
_GCGraphIndex(self._pack_collection.chk_index.combined_index,
701
add_callback=self._pack_collection.chk_index.add_callback,
702
parents=False, is_locked=self.is_locked),
703
access=self._pack_collection.chk_index.data_access)
704
# True when the repository object is 'write locked' (as opposed to the
705
# physical lock only taken out around changes to the pack-names list.)
706
# Another way to represent this would be a decorator around the control
707
# files object that presents logical locks as physical ones - if this
708
# gets ugly consider that alternative design. RBC 20071011
709
self._write_lock_count = 0
710
self._transaction = None
712
self._reconcile_does_inventory_gc = True
713
self._reconcile_fixes_text_parents = True
714
self._reconcile_backsup_inventory = False
716
def suspend_write_group(self):
717
raise errors.UnsuspendableWriteGroup(self)
719
def _resume_write_group(self, tokens):
720
raise errors.UnsuspendableWriteGroup(self)
722
def _reconcile_pack(self, collection, packs, extension, revs, pb):
723
# assert revs is None
724
packer = GCCHKReconcilePacker(collection, packs, extension)
725
return packer.pack(pb)
728
# This format has been disabled for now. It is not expected that this will be a
729
# useful next-generation format.
731
# class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):
732
# """A B+Tree index using pack repository."""
734
# repository_class = GCPackRepository
735
# rich_root_data = False
736
# # Note: We cannot unpack a delta that references a text we haven't
737
# # seen yet. There are 2 options, work in fulltexts, or require
738
# # topological sorting. Using fulltexts is more optimal for local
739
# # operations, because the source can be smart about extracting
740
# # multiple in-a-row (and sharing strings). Topological is better
741
# # for remote, because we access less data.
742
# _fetch_order = 'unordered'
743
# _fetch_uses_deltas = False
745
# def _get_matching_bzrdir(self):
746
# return bzrdir.format_registry.make_bzrdir('gc-no-rich-root')
748
# def _ignore_setting_bzrdir(self, format):
751
# _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
753
# def get_format_string(self):
754
# """See RepositoryFormat.get_format_string()."""
755
# return ("Bazaar development format - btree+gc "
756
# "(needs bzr.dev from 1.13)\n")
758
# def get_format_description(self):
759
# """See RepositoryFormat.get_format_description()."""
760
# return ("Development repository format - btree+groupcompress "
761
# ", interoperates with pack-0.92\n")
764
class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
765
"""A hashed CHK+group compress pack repository."""
767
repository_class = GCCHKPackRepository
768
_commit_builder_class = PackRootCommitBuilder
769
rich_root_data = True
770
supports_external_lookups = True
771
supports_tree_reference = True
773
# Note: We cannot unpack a delta that references a text we haven't
774
# seen yet. There are 2 options, work in fulltexts, or require
775
# topological sorting. Using fulltexts is more optimal for local
776
# operations, because the source can be smart about extracting
777
# multiple in-a-row (and sharing strings). Topological is better
778
# for remote, because we access less data.
779
_fetch_order = 'unordered'
780
_fetch_uses_deltas = False
782
def _get_matching_bzrdir(self):
783
return bzrdir.format_registry.make_bzrdir('gc-chk16')
785
def _ignore_setting_bzrdir(self, format):
788
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
790
def get_format_string(self):
791
"""See RepositoryFormat.get_format_string()."""
792
return ('Bazaar development format - hash16chk+gc rich-root'
793
' (needs bzr.dev from 1.13)\n')
795
def get_format_description(self):
796
"""See RepositoryFormat.get_format_description()."""
797
return ("Development repository format - hash16chk+groupcompress")
799
def check_conversion_target(self, target_format):
800
if not target_format.rich_root_data:
801
raise errors.BadConversionTarget(
802
'Does not support rich root data.', target_format)
803
if not getattr(target_format, 'supports_tree_reference', False):
804
raise errors.BadConversionTarget(
805
'Does not support nested trees', target_format)
808
class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):
809
"""A hashed CHK+group compress pack repository."""
811
repository_class = GCCHKPackRepository
813
# Setting this to True causes us to use InterModel1And2, so for now set
814
# it to False which uses InterDifferingSerializer. When IM1&2 is
815
# removed (as it is in bzr.dev) we can set this back to True.
816
_commit_builder_class = PackRootCommitBuilder
817
rich_root_data = True
819
def _get_matching_bzrdir(self):
820
return bzrdir.format_registry.make_bzrdir('gc-chk255')
822
def _ignore_setting_bzrdir(self, format):
825
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
827
def get_format_string(self):
828
"""See RepositoryFormat.get_format_string()."""
829
return ('Bazaar development format - hash255chk+gc rich-root'
830
' (needs bzr.dev from 1.13)\n')
832
def get_format_description(self):
833
"""See RepositoryFormat.get_format_description()."""
834
return ("Development repository format - hash255chk+groupcompress")
836
def check_conversion_target(self, target_format):
837
if not target_format.rich_root_data:
838
raise errors.BadConversionTarget(
839
'Does not support rich root data.', target_format)
840
if not getattr(target_format, 'supports_tree_reference', False):
841
raise errors.BadConversionTarget(
842
'Does not support nested trees', target_format)
845
class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):
846
"""A hashed CHK+group compress pack repository."""
848
repository_class = GCCHKPackRepository
850
# For right now, setting this to True gives us InterModel1And2 rather
851
# than InterDifferingSerializer
852
_commit_builder_class = PackRootCommitBuilder
853
rich_root_data = True
854
_serializer = chk_serializer.chk_serializer_255_bigpage
855
# Note: We cannot unpack a delta that references a text we haven't
856
# seen yet. There are 2 options, work in fulltexts, or require
857
# topological sorting. Using fulltexts is more optimal for local
858
# operations, because the source can be smart about extracting
859
# multiple in-a-row (and sharing strings). Topological is better
860
# for remote, because we access less data.
861
_fetch_order = 'unordered'
862
_fetch_uses_deltas = False
864
def _get_matching_bzrdir(self):
865
return bzrdir.format_registry.make_bzrdir('gc-chk255-big')
867
def _ignore_setting_bzrdir(self, format):
870
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
872
def get_format_string(self):
873
"""See RepositoryFormat.get_format_string()."""
874
return ('Bazaar development format - hash255chk+gc rich-root bigpage'
875
' (needs bzr.dev from 1.13)\n')
877
def get_format_description(self):
878
"""See RepositoryFormat.get_format_description()."""
879
return ("Development repository format - hash255chk+groupcompress + bigpage")
881
def check_conversion_target(self, target_format):
882
if not target_format.rich_root_data:
883
raise errors.BadConversionTarget(
884
'Does not support rich root data.', target_format)
885
if not getattr(target_format, 'supports_tree_reference', False):
886
raise errors.BadConversionTarget(
887
'Does not support nested trees', target_format)