1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib.lazy_import import lazy_import
18
lazy_import(globals(), """
19
from itertools import izip
33
from bzrlib.index import (
37
GraphIndexPrefixAdapter,
40
from bzrlib.knit import (
46
from bzrlib import tsort
56
from bzrlib.decorators import needs_write_lock
57
from bzrlib.btree_index import (
61
from bzrlib.index import (
65
from bzrlib.repofmt.knitrepo import KnitRepository
66
from bzrlib.repository import (
68
MetaDirRepositoryFormat,
72
import bzrlib.revision as _mod_revision
73
from bzrlib.trace import (
79
class PackCommitBuilder(CommitBuilder):
80
"""A subclass of CommitBuilder to add texts with pack semantics.
82
Specifically this uses one knit object rather than one knit object per
83
added text, reducing memory and object pressure.
86
def __init__(self, repository, parents, config, timestamp=None,
87
timezone=None, committer=None, revprops=None,
89
CommitBuilder.__init__(self, repository, parents, config,
90
timestamp=timestamp, timezone=timezone, committer=committer,
91
revprops=revprops, revision_id=revision_id)
92
self._file_graph = graph.Graph(
93
repository._pack_collection.text_index.combined_index)
95
def _heads(self, file_id, revision_ids):
96
keys = [(file_id, revision_id) for revision_id in revision_ids]
97
return set([key[1] for key in self._file_graph.heads(keys)])
100
class PackRootCommitBuilder(RootCommitBuilder):
101
"""A subclass of RootCommitBuilder to add texts with pack semantics.
103
Specifically this uses one knit object rather than one knit object per
104
added text, reducing memory and object pressure.
107
def __init__(self, repository, parents, config, timestamp=None,
108
timezone=None, committer=None, revprops=None,
110
CommitBuilder.__init__(self, repository, parents, config,
111
timestamp=timestamp, timezone=timezone, committer=committer,
112
revprops=revprops, revision_id=revision_id)
113
self._file_graph = graph.Graph(
114
repository._pack_collection.text_index.combined_index)
116
def _heads(self, file_id, revision_ids):
117
keys = [(file_id, revision_id) for revision_id in revision_ids]
118
return set([key[1] for key in self._file_graph.heads(keys)])
122
"""An in memory proxy for a pack and its indices.
124
This is a base class that is not directly used, instead the classes
125
ExistingPack and NewPack are used.
128
def __init__(self, revision_index, inventory_index, text_index,
130
"""Create a pack instance.
132
:param revision_index: A GraphIndex for determining what revisions are
133
present in the Pack and accessing the locations of their texts.
134
:param inventory_index: A GraphIndex for determining what inventories are
135
present in the Pack and accessing the locations of their
137
:param text_index: A GraphIndex for determining what file texts
138
are present in the pack and accessing the locations of their
139
texts/deltas (via (fileid, revisionid) tuples).
140
:param signature_index: A GraphIndex for determining what signatures are
141
present in the Pack and accessing the locations of their texts.
143
self.revision_index = revision_index
144
self.inventory_index = inventory_index
145
self.text_index = text_index
146
self.signature_index = signature_index
148
def access_tuple(self):
149
"""Return a tuple (transport, name) for the pack content."""
150
return self.pack_transport, self.file_name()
153
"""Get the file name for the pack on disk."""
154
return self.name + '.pack'
156
def get_revision_count(self):
157
return self.revision_index.key_count()
159
def inventory_index_name(self, name):
160
"""The inv index is the name + .iix."""
161
return self.index_name('inventory', name)
163
def revision_index_name(self, name):
164
"""The revision index is the name + .rix."""
165
return self.index_name('revision', name)
167
def signature_index_name(self, name):
168
"""The signature index is the name + .six."""
169
return self.index_name('signature', name)
171
def text_index_name(self, name):
172
"""The text index is the name + .tix."""
173
return self.index_name('text', name)
176
class ExistingPack(Pack):
177
"""An in memory proxy for an existing .pack and its disk indices."""
179
def __init__(self, pack_transport, name, revision_index, inventory_index,
180
text_index, signature_index):
181
"""Create an ExistingPack object.
183
:param pack_transport: The transport where the pack file resides.
184
:param name: The name of the pack on disk in the pack_transport.
186
Pack.__init__(self, revision_index, inventory_index, text_index,
189
self.pack_transport = pack_transport
190
if None in (revision_index, inventory_index, text_index,
191
signature_index, name, pack_transport):
192
raise AssertionError()
194
def __eq__(self, other):
195
return self.__dict__ == other.__dict__
197
def __ne__(self, other):
198
return not self.__eq__(other)
201
return "<bzrlib.repofmt.pack_repo.Pack object at 0x%x, %s, %s" % (
202
id(self), self.pack_transport, self.name)
206
"""An in memory proxy for a pack which is being created."""
208
# A map of index 'type' to the file extension and position in the
210
index_definitions = {
211
'revision': ('.rix', 0),
212
'inventory': ('.iix', 1),
214
'signature': ('.six', 3),
217
def __init__(self, pack_collection, upload_suffix='', file_mode=None):
218
"""Create a NewPack instance.
220
:param pack_collection: A PackCollection into which this is being inserted.
221
:param upload_suffix: An optional suffix to be given to any temporary
222
files created during the pack creation. e.g '.autopack'
223
:param file_mode: Unix permissions for newly created file.
225
# The relative locations of the packs are constrained, but all are
226
# passed in because the caller has them, so as to avoid object churn.
227
index_builder_class = pack_collection._index_builder_class
229
# Revisions: parents list, no text compression.
230
index_builder_class(reference_lists=1),
231
# Inventory: We want to map compression only, but currently the
232
# knit code hasn't been updated enough to understand that, so we
233
# have a regular 2-list index giving parents and compression
235
index_builder_class(reference_lists=2),
236
# Texts: compression and per file graph, for all fileids - so two
237
# reference lists and two elements in the key tuple.
238
index_builder_class(reference_lists=2, key_elements=2),
239
# Signatures: Just blobs to store, no compression, no parents
241
index_builder_class(reference_lists=0),
243
self._pack_collection = pack_collection
244
# When we make readonly indices, we need this.
245
self.index_class = pack_collection._index_class
246
# where should the new pack be opened
247
self.upload_transport = pack_collection._upload_transport
248
# where are indices written out to
249
self.index_transport = pack_collection._index_transport
250
# where is the pack renamed to when it is finished?
251
self.pack_transport = pack_collection._pack_transport
252
# What file mode to upload the pack and indices with.
253
self._file_mode = file_mode
254
# tracks the content written to the .pack file.
255
self._hash = osutils.md5()
256
# a four-tuple with the length in bytes of the indices, once the pack
257
# is finalised. (rev, inv, text, sigs)
258
self.index_sizes = None
259
# How much data to cache when writing packs. Note that this is not
260
# synchronised with reads, because it's not in the transport layer, so
261
# is not safe unless the client knows it won't be reading from the pack
263
self._cache_limit = 0
264
# the temporary pack file name.
265
self.random_name = osutils.rand_chars(20) + upload_suffix
266
# when was this pack started ?
267
self.start_time = time.time()
268
# open an output stream for the data added to the pack.
269
self.write_stream = self.upload_transport.open_write_stream(
270
self.random_name, mode=self._file_mode)
271
if 'pack' in debug.debug_flags:
272
mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
273
time.ctime(), self.upload_transport.base, self.random_name,
274
time.time() - self.start_time)
275
# A list of byte sequences to be written to the new pack, and the
276
# aggregate size of them. Stored as a list rather than separate
277
# variables so that the _write_data closure below can update them.
278
self._buffer = [[], 0]
279
# create a callable for adding data
281
# robertc says- this is a closure rather than a method on the object
282
# so that the variables are locals, and faster than accessing object
284
def _write_data(bytes, flush=False, _buffer=self._buffer,
285
_write=self.write_stream.write, _update=self._hash.update):
286
_buffer[0].append(bytes)
287
_buffer[1] += len(bytes)
289
if _buffer[1] > self._cache_limit or flush:
290
bytes = ''.join(_buffer[0])
294
# expose this on self, for the occasion when clients want to add data.
295
self._write_data = _write_data
296
# a pack writer object to serialise pack records.
297
self._writer = pack.ContainerWriter(self._write_data)
299
# what state is the pack in? (open, finished, aborted)
303
"""Cancel creating this pack."""
304
self._state = 'aborted'
305
self.write_stream.close()
306
# Remove the temporary pack file.
307
self.upload_transport.delete(self.random_name)
308
# The indices have no state on disk.
310
def access_tuple(self):
311
"""Return a tuple (transport, name) for the pack content."""
312
if self._state == 'finished':
313
return Pack.access_tuple(self)
314
elif self._state == 'open':
315
return self.upload_transport, self.random_name
317
raise AssertionError(self._state)
319
def _check_references(self):
320
"""Make sure our external references are present.
322
Packs are allowed to have deltas whose base is not in the pack, but it
323
must be present somewhere in this collection. It is not allowed to
324
have deltas based on a fallback repository.
325
(See <https://bugs.launchpad.net/bzr/+bug/288751>)
328
for (index_name, external_refs, index) in [
330
self.text_index._external_references(),
331
self._pack_collection.text_index.combined_index),
333
self.inventory_index._external_references(),
334
self._pack_collection.inventory_index.combined_index),
336
missing = external_refs.difference(
337
k for (idx, k, v, r) in
338
index.iter_entries(external_refs))
340
missing_items[index_name] = sorted(list(missing))
342
from pprint import pformat
343
raise errors.BzrCheckError(
344
"Newly created pack file %r has delta references to "
345
"items not in its repository:\n%s"
346
% (self, pformat(missing_items)))
348
def data_inserted(self):
349
"""True if data has been added to this pack."""
350
return bool(self.get_revision_count() or
351
self.inventory_index.key_count() or
352
self.text_index.key_count() or
353
self.signature_index.key_count())
356
"""Finish the new pack.
359
- finalises the content
360
- assigns a name (the md5 of the content, currently)
361
- writes out the associated indices
362
- renames the pack into place.
363
- stores the index size tuple for the pack in the index_sizes
368
self._write_data('', flush=True)
369
self.name = self._hash.hexdigest()
370
self._check_references()
372
# XXX: It'd be better to write them all to temporary names, then
373
# rename them all into place, so that the window when only some are
374
# visible is smaller. On the other hand none will be seen until
375
# they're in the names list.
376
self.index_sizes = [None, None, None, None]
377
self._write_index('revision', self.revision_index, 'revision')
378
self._write_index('inventory', self.inventory_index, 'inventory')
379
self._write_index('text', self.text_index, 'file texts')
380
self._write_index('signature', self.signature_index,
381
'revision signatures')
382
self.write_stream.close()
383
# Note that this will clobber an existing pack with the same name,
384
# without checking for hash collisions. While this is undesirable this
385
# is something that can be rectified in a subsequent release. One way
386
# to rectify it may be to leave the pack at the original name, writing
387
# its pack-names entry as something like 'HASH: index-sizes
388
# temporary-name'. Allocate that and check for collisions, if it is
389
# collision free then rename it into place. If clients know this scheme
390
# they can handle missing-file errors by:
391
# - try for HASH.pack
392
# - try for temporary-name
393
# - refresh the pack-list to see if the pack is now absent
394
self.upload_transport.rename(self.random_name,
395
'../packs/' + self.name + '.pack')
396
self._state = 'finished'
397
if 'pack' in debug.debug_flags:
398
# XXX: size might be interesting?
399
mutter('%s: create_pack: pack renamed into place: %s%s->%s%s t+%6.3fs',
400
time.ctime(), self.upload_transport.base, self.random_name,
401
self.pack_transport, self.name,
402
time.time() - self.start_time)
405
"""Flush any current data."""
407
bytes = ''.join(self._buffer[0])
408
self.write_stream.write(bytes)
409
self._hash.update(bytes)
410
self._buffer[:] = [[], 0]
412
def index_name(self, index_type, name):
413
"""Get the disk name of an index type for pack name 'name'."""
414
return name + NewPack.index_definitions[index_type][0]
416
def index_offset(self, index_type):
417
"""Get the position in a index_size array for a given index type."""
418
return NewPack.index_definitions[index_type][1]
420
def _replace_index_with_readonly(self, index_type):
421
setattr(self, index_type + '_index',
422
self.index_class(self.index_transport,
423
self.index_name(index_type, self.name),
424
self.index_sizes[self.index_offset(index_type)]))
426
def set_write_cache_size(self, size):
427
self._cache_limit = size
429
def _write_index(self, index_type, index, label):
430
"""Write out an index.
432
:param index_type: The type of index to write - e.g. 'revision'.
433
:param index: The index object to serialise.
434
:param label: What label to give the index e.g. 'revision'.
436
index_name = self.index_name(index_type, self.name)
437
self.index_sizes[self.index_offset(index_type)] = \
438
self.index_transport.put_file(index_name, index.finish(),
439
mode=self._file_mode)
440
if 'pack' in debug.debug_flags:
441
# XXX: size might be interesting?
442
mutter('%s: create_pack: wrote %s index: %s%s t+%6.3fs',
443
time.ctime(), label, self.upload_transport.base,
444
self.random_name, time.time() - self.start_time)
445
# Replace the writable index on this object with a readonly,
446
# presently unloaded index. We should alter
447
# the index layer to make its finish() error if add_node is
448
# subsequently used. RBC
449
self._replace_index_with_readonly(index_type)
452
class AggregateIndex(object):
453
"""An aggregated index for the RepositoryPackCollection.
455
AggregateIndex is reponsible for managing the PackAccess object,
456
Index-To-Pack mapping, and all indices list for a specific type of index
457
such as 'revision index'.
459
A CombinedIndex provides an index on a single key space built up
460
from several on-disk indices. The AggregateIndex builds on this
461
to provide a knit access layer, and allows having up to one writable
462
index within the collection.
464
# XXX: Probably 'can be written to' could/should be separated from 'acts
465
# like a knit index' -- mbp 20071024
467
def __init__(self, reload_func=None):
468
"""Create an AggregateIndex.
470
:param reload_func: A function to call if we find we are missing an
471
index. Should have the form reload_func() => True if the list of
472
active pack files has changed.
474
self._reload_func = reload_func
475
self.index_to_pack = {}
476
self.combined_index = CombinedGraphIndex([], reload_func=reload_func)
477
self.data_access = _DirectPackAccess(self.index_to_pack,
478
reload_func=reload_func)
479
self.add_callback = None
481
def replace_indices(self, index_to_pack, indices):
482
"""Replace the current mappings with fresh ones.
484
This should probably not be used eventually, rather incremental add and
485
removal of indices. It has been added during refactoring of existing
488
:param index_to_pack: A mapping from index objects to
489
(transport, name) tuples for the pack file data.
490
:param indices: A list of indices.
492
# refresh the revision pack map dict without replacing the instance.
493
self.index_to_pack.clear()
494
self.index_to_pack.update(index_to_pack)
495
# XXX: API break - clearly a 'replace' method would be good?
496
self.combined_index._indices[:] = indices
497
# the current add nodes callback for the current writable index if
499
self.add_callback = None
501
def add_index(self, index, pack):
502
"""Add index to the aggregate, which is an index for Pack pack.
504
Future searches on the aggregate index will seach this new index
505
before all previously inserted indices.
507
:param index: An Index for the pack.
508
:param pack: A Pack instance.
510
# expose it to the index map
511
self.index_to_pack[index] = pack.access_tuple()
512
# put it at the front of the linear index list
513
self.combined_index.insert_index(0, index)
515
def add_writable_index(self, index, pack):
516
"""Add an index which is able to have data added to it.
518
There can be at most one writable index at any time. Any
519
modifications made to the knit are put into this index.
521
:param index: An index from the pack parameter.
522
:param pack: A Pack instance.
524
if self.add_callback is not None:
525
raise AssertionError(
526
"%s already has a writable index through %s" % \
527
(self, self.add_callback))
528
# allow writing: queue writes to a new index
529
self.add_index(index, pack)
530
# Updates the index to packs mapping as a side effect,
531
self.data_access.set_writer(pack._writer, index, pack.access_tuple())
532
self.add_callback = index.add_nodes
535
"""Reset all the aggregate data to nothing."""
536
self.data_access.set_writer(None, None, (None, None))
537
self.index_to_pack.clear()
538
del self.combined_index._indices[:]
539
self.add_callback = None
541
def remove_index(self, index, pack):
542
"""Remove index from the indices used to answer queries.
544
:param index: An index from the pack parameter.
545
:param pack: A Pack instance.
547
del self.index_to_pack[index]
548
self.combined_index._indices.remove(index)
549
if (self.add_callback is not None and
550
getattr(index, 'add_nodes', None) == self.add_callback):
551
self.add_callback = None
552
self.data_access.set_writer(None, None, (None, None))
555
class Packer(object):
556
"""Create a pack from packs."""
558
def __init__(self, pack_collection, packs, suffix, revision_ids=None):
561
:param pack_collection: A RepositoryPackCollection object where the
562
new pack is being written to.
563
:param packs: The packs to combine.
564
:param suffix: The suffix to use on the temporary files for the pack.
565
:param revision_ids: Revision ids to limit the pack to.
569
self.revision_ids = revision_ids
570
# The pack object we are creating.
572
self._pack_collection = pack_collection
573
# The index layer keys for the revisions being copied. None for 'all
575
self._revision_keys = None
576
# What text keys to copy. None for 'all texts'. This is set by
577
# _copy_inventory_texts
578
self._text_filter = None
581
def _extra_init(self):
582
"""A template hook to allow extending the constructor trivially."""
584
def _pack_map_and_index_list(self, index_attribute):
585
"""Convert a list of packs to an index pack map and index list.
587
:param index_attribute: The attribute that the desired index is found
589
:return: A tuple (map, list) where map contains the dict from
590
index:pack_tuple, and list contains the indices in the preferred
595
for pack_obj in self.packs:
596
index = getattr(pack_obj, index_attribute)
597
indices.append(index)
598
pack_map[index] = pack_obj
599
return pack_map, indices
601
def _index_contents(self, indices, key_filter=None):
602
"""Get an iterable of the index contents from a pack_map.
604
:param indices: The list of indices to query
605
:param key_filter: An optional filter to limit the keys returned.
607
all_index = CombinedGraphIndex(indices)
608
if key_filter is None:
609
return all_index.iter_all_entries()
611
return all_index.iter_entries(key_filter)
613
def pack(self, pb=None):
614
"""Create a new pack by reading data from other packs.
616
This does little more than a bulk copy of data. One key difference
617
is that data with the same item key across multiple packs is elided
618
from the output. The new pack is written into the current pack store
619
along with its indices, and the name added to the pack names. The
620
source packs are not altered and are not required to be in the current
623
:param pb: An optional progress bar to use. A nested bar is created if
625
:return: A Pack object, or None if nothing was copied.
627
# open a pack - using the same name as the last temporary file
628
# - which has already been flushed, so its safe.
629
# XXX: - duplicate code warning with start_write_group; fix before
630
# considering 'done'.
631
if self._pack_collection._new_pack is not None:
632
raise errors.BzrError('call to create_pack_from_packs while '
633
'another pack is being written.')
634
if self.revision_ids is not None:
635
if len(self.revision_ids) == 0:
636
# silly fetch request.
639
self.revision_ids = frozenset(self.revision_ids)
640
self.revision_keys = frozenset((revid,) for revid in
643
self.pb = ui.ui_factory.nested_progress_bar()
647
return self._create_pack_from_packs()
653
"""Open a pack for the pack we are creating."""
654
return NewPack(self._pack_collection, upload_suffix=self.suffix,
655
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
657
def _update_pack_order(self, entries, index_to_pack_map):
658
"""Determine how we want our packs to be ordered.
660
This changes the sort order of the self.packs list so that packs unused
661
by 'entries' will be at the end of the list, so that future requests
662
can avoid probing them. Used packs will be at the front of the
663
self.packs list, in the order of their first use in 'entries'.
665
:param entries: A list of (index, ...) tuples
666
:param index_to_pack_map: A mapping from index objects to pack objects.
670
for entry in entries:
672
if index not in seen_indexes:
673
packs.append(index_to_pack_map[index])
674
seen_indexes.add(index)
675
if len(packs) == len(self.packs):
676
if 'pack' in debug.debug_flags:
677
mutter('Not changing pack list, all packs used.')
679
seen_packs = set(packs)
680
for pack in self.packs:
681
if pack not in seen_packs:
684
if 'pack' in debug.debug_flags:
685
old_names = [p.access_tuple()[1] for p in self.packs]
686
new_names = [p.access_tuple()[1] for p in packs]
687
mutter('Reordering packs\nfrom: %s\n to: %s',
688
old_names, new_names)
691
def _copy_revision_texts(self):
692
"""Copy revision data to the new pack."""
694
if self.revision_ids:
695
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
698
# select revision keys
699
revision_index_map, revision_indices = self._pack_map_and_index_list(
701
revision_nodes = self._index_contents(revision_indices, revision_keys)
702
revision_nodes = list(revision_nodes)
703
self._update_pack_order(revision_nodes, revision_index_map)
704
# copy revision keys and adjust values
705
self.pb.update("Copying revision texts", 1)
706
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
707
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
708
self.new_pack.revision_index, readv_group_iter, total_items))
709
if 'pack' in debug.debug_flags:
710
mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
711
time.ctime(), self._pack_collection._upload_transport.base,
712
self.new_pack.random_name,
713
self.new_pack.revision_index.key_count(),
714
time.time() - self.new_pack.start_time)
715
self._revision_keys = revision_keys
717
def _copy_inventory_texts(self):
718
"""Copy the inventory texts to the new pack.
720
self._revision_keys is used to determine what inventories to copy.
722
Sets self._text_filter appropriately.
724
# select inventory keys
725
inv_keys = self._revision_keys # currently the same keyspace, and note that
726
# querying for keys here could introduce a bug where an inventory item
727
# is missed, so do not change it to query separately without cross
728
# checking like the text key check below.
729
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
731
inv_nodes = self._index_contents(inventory_indices, inv_keys)
732
# copy inventory keys and adjust values
733
# XXX: Should be a helper function to allow different inv representation
735
self.pb.update("Copying inventory texts", 2)
736
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
737
# Only grab the output lines if we will be processing them
738
output_lines = bool(self.revision_ids)
739
inv_lines = self._copy_nodes_graph(inventory_index_map,
740
self.new_pack._writer, self.new_pack.inventory_index,
741
readv_group_iter, total_items, output_lines=output_lines)
742
if self.revision_ids:
743
self._process_inventory_lines(inv_lines)
745
# eat the iterator to cause it to execute.
747
self._text_filter = None
748
if 'pack' in debug.debug_flags:
749
mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
750
time.ctime(), self._pack_collection._upload_transport.base,
751
self.new_pack.random_name,
752
self.new_pack.inventory_index.key_count(),
753
time.time() - self.new_pack.start_time)
755
def _copy_text_texts(self):
757
text_index_map, text_nodes = self._get_text_nodes()
758
if self._text_filter is not None:
759
# We could return the keys copied as part of the return value from
760
# _copy_nodes_graph but this doesn't work all that well with the
761
# need to get line output too, so we check separately, and as we're
762
# going to buffer everything anyway, we check beforehand, which
763
# saves reading knit data over the wire when we know there are
765
text_nodes = set(text_nodes)
766
present_text_keys = set(_node[1] for _node in text_nodes)
767
missing_text_keys = set(self._text_filter) - present_text_keys
768
if missing_text_keys:
769
# TODO: raise a specific error that can handle many missing
771
a_missing_key = missing_text_keys.pop()
772
raise errors.RevisionNotPresent(a_missing_key[1],
774
# copy text keys and adjust values
775
self.pb.update("Copying content texts", 3)
776
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
777
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
778
self.new_pack.text_index, readv_group_iter, total_items))
779
self._log_copied_texts()
781
def _create_pack_from_packs(self):
782
self.pb.update("Opening pack", 0, 5)
783
self.new_pack = self.open_pack()
784
new_pack = self.new_pack
785
# buffer data - we won't be reading-back during the pack creation and
786
# this makes a significant difference on sftp pushes.
787
new_pack.set_write_cache_size(1024*1024)
788
if 'pack' in debug.debug_flags:
789
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
790
for a_pack in self.packs]
791
if self.revision_ids is not None:
792
rev_count = len(self.revision_ids)
795
mutter('%s: create_pack: creating pack from source packs: '
796
'%s%s %s revisions wanted %s t=0',
797
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
798
plain_pack_list, rev_count)
799
self._copy_revision_texts()
800
self._copy_inventory_texts()
801
self._copy_text_texts()
802
# select signature keys
803
signature_filter = self._revision_keys # same keyspace
804
signature_index_map, signature_indices = self._pack_map_and_index_list(
806
signature_nodes = self._index_contents(signature_indices,
808
# copy signature keys and adjust values
809
self.pb.update("Copying signature texts", 4)
810
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
811
new_pack.signature_index)
812
if 'pack' in debug.debug_flags:
813
mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
814
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
815
new_pack.signature_index.key_count(),
816
time.time() - new_pack.start_time)
817
new_pack._check_references()
818
if not self._use_pack(new_pack):
821
self.pb.update("Finishing pack", 5)
823
self._pack_collection.allocate(new_pack)
826
def _copy_nodes(self, nodes, index_map, writer, write_index):
827
"""Copy knit nodes between packs with no graph references."""
828
pb = ui.ui_factory.nested_progress_bar()
830
return self._do_copy_nodes(nodes, index_map, writer,
835
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
836
# for record verification
837
knit = KnitVersionedFiles(None, None)
838
# plan a readv on each source pack:
840
nodes = sorted(nodes)
841
# how to map this into knit.py - or knit.py into this?
842
# we don't want the typical knit logic, we want grouping by pack
843
# at this point - perhaps a helper library for the following code
844
# duplication points?
846
for index, key, value in nodes:
847
if index not in request_groups:
848
request_groups[index] = []
849
request_groups[index].append((key, value))
851
pb.update("Copied record", record_index, len(nodes))
852
for index, items in request_groups.iteritems():
853
pack_readv_requests = []
854
for key, value in items:
855
# ---- KnitGraphIndex.get_position
856
bits = value[1:].split(' ')
857
offset, length = int(bits[0]), int(bits[1])
858
pack_readv_requests.append((offset, length, (key, value[0])))
859
# linear scan up the pack
860
pack_readv_requests.sort()
862
pack_obj = index_map[index]
863
transport, path = pack_obj.access_tuple()
864
reader = pack.make_readv_reader(transport, path,
865
[offset[0:2] for offset in pack_readv_requests])
866
for (names, read_func), (_1, _2, (key, eol_flag)) in \
867
izip(reader.iter_records(), pack_readv_requests):
868
raw_data = read_func(None)
869
# check the header only
870
df, _ = knit._parse_record_header(key, raw_data)
872
pos, size = writer.add_bytes_record(raw_data, names)
873
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
874
pb.update("Copied record", record_index)
877
def _copy_nodes_graph(self, index_map, writer, write_index,
878
readv_group_iter, total_items, output_lines=False):
879
"""Copy knit nodes between packs.
881
:param output_lines: Return lines present in the copied data as
882
an iterator of line,version_id.
884
pb = ui.ui_factory.nested_progress_bar()
886
for result in self._do_copy_nodes_graph(index_map, writer,
887
write_index, output_lines, pb, readv_group_iter, total_items):
890
# Python 2.4 does not permit try:finally: in a generator.
896
def _do_copy_nodes_graph(self, index_map, writer, write_index,
897
output_lines, pb, readv_group_iter, total_items):
898
# for record verification
899
knit = KnitVersionedFiles(None, None)
900
# for line extraction when requested (inventories only)
902
factory = KnitPlainFactory()
904
pb.update("Copied record", record_index, total_items)
905
for index, readv_vector, node_vector in readv_group_iter:
907
pack_obj = index_map[index]
908
transport, path = pack_obj.access_tuple()
909
reader = pack.make_readv_reader(transport, path, readv_vector)
910
for (names, read_func), (key, eol_flag, references) in \
911
izip(reader.iter_records(), node_vector):
912
raw_data = read_func(None)
914
# read the entire thing
915
content, _ = knit._parse_record(key[-1], raw_data)
916
if len(references[-1]) == 0:
917
line_iterator = factory.get_fulltext_content(content)
919
line_iterator = factory.get_linedelta_content(content)
920
for line in line_iterator:
923
# check the header only
924
df, _ = knit._parse_record_header(key, raw_data)
926
pos, size = writer.add_bytes_record(raw_data, names)
927
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
928
pb.update("Copied record", record_index)
931
def _get_text_nodes(self):
932
text_index_map, text_indices = self._pack_map_and_index_list(
934
return text_index_map, self._index_contents(text_indices,
937
def _least_readv_node_readv(self, nodes):
938
"""Generate request groups for nodes using the least readv's.
940
:param nodes: An iterable of graph index nodes.
941
:return: Total node count and an iterator of the data needed to perform
942
readvs to obtain the data for nodes. Each item yielded by the
943
iterator is a tuple with:
944
index, readv_vector, node_vector. readv_vector is a list ready to
945
hand to the transport readv method, and node_vector is a list of
946
(key, eol_flag, references) for the the node retrieved by the
947
matching readv_vector.
949
# group by pack so we do one readv per pack
950
nodes = sorted(nodes)
953
for index, key, value, references in nodes:
954
if index not in request_groups:
955
request_groups[index] = []
956
request_groups[index].append((key, value, references))
958
for index, items in request_groups.iteritems():
959
pack_readv_requests = []
960
for key, value, references in items:
961
# ---- KnitGraphIndex.get_position
962
bits = value[1:].split(' ')
963
offset, length = int(bits[0]), int(bits[1])
964
pack_readv_requests.append(
965
((offset, length), (key, value[0], references)))
966
# linear scan up the pack to maximum range combining.
967
pack_readv_requests.sort()
968
# split out the readv and the node data.
969
pack_readv = [readv for readv, node in pack_readv_requests]
970
node_vector = [node for readv, node in pack_readv_requests]
971
result.append((index, pack_readv, node_vector))
974
def _log_copied_texts(self):
975
if 'pack' in debug.debug_flags:
976
mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
977
time.ctime(), self._pack_collection._upload_transport.base,
978
self.new_pack.random_name,
979
self.new_pack.text_index.key_count(),
980
time.time() - self.new_pack.start_time)
982
def _process_inventory_lines(self, inv_lines):
983
"""Use up the inv_lines generator and setup a text key filter."""
984
repo = self._pack_collection.repo
985
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
986
inv_lines, self.revision_keys)
988
for fileid, file_revids in fileid_revisions.iteritems():
989
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
990
self._text_filter = text_filter
992
def _revision_node_readv(self, revision_nodes):
993
"""Return the total revisions and the readv's to issue.
995
:param revision_nodes: The revision index contents for the packs being
996
incorporated into the new pack.
997
:return: As per _least_readv_node_readv.
999
return self._least_readv_node_readv(revision_nodes)
1001
def _use_pack(self, new_pack):
1002
"""Return True if new_pack should be used.
1004
:param new_pack: The pack that has just been created.
1005
:return: True if the pack should be used.
1007
return new_pack.data_inserted()
1010
class OptimisingPacker(Packer):
1011
"""A packer which spends more time to create better disk layouts."""
1013
def _revision_node_readv(self, revision_nodes):
1014
"""Return the total revisions and the readv's to issue.
1016
This sort places revisions in topological order with the ancestors
1019
:param revision_nodes: The revision index contents for the packs being
1020
incorporated into the new pack.
1021
:return: As per _least_readv_node_readv.
1023
# build an ancestors dict
1026
for index, key, value, references in revision_nodes:
1027
ancestors[key] = references[0]
1028
by_key[key] = (index, value, references)
1029
order = tsort.topo_sort(ancestors)
1031
# Single IO is pathological, but it will work as a starting point.
1033
for key in reversed(order):
1034
index, value, references = by_key[key]
1035
# ---- KnitGraphIndex.get_position
1036
bits = value[1:].split(' ')
1037
offset, length = int(bits[0]), int(bits[1])
1039
(index, [(offset, length)], [(key, value[0], references)]))
1040
# TODO: combine requests in the same index that are in ascending order.
1041
return total, requests
1043
def open_pack(self):
1044
"""Open a pack for the pack we are creating."""
1045
new_pack = super(OptimisingPacker, self).open_pack()
1046
# Turn on the optimization flags for all the index builders.
1047
new_pack.revision_index.set_optimize(for_size=True)
1048
new_pack.inventory_index.set_optimize(for_size=True)
1049
new_pack.text_index.set_optimize(for_size=True)
1050
new_pack.signature_index.set_optimize(for_size=True)
1054
class ReconcilePacker(Packer):
1055
"""A packer which regenerates indices etc as it copies.
1057
This is used by ``bzr reconcile`` to cause parent text pointers to be
1061
def _extra_init(self):
1062
self._data_changed = False
1064
def _process_inventory_lines(self, inv_lines):
1065
"""Generate a text key reference map rather for reconciling with."""
1066
repo = self._pack_collection.repo
1067
refs = repo._find_text_key_references_from_xml_inventory_lines(
1069
self._text_refs = refs
1070
# during reconcile we:
1071
# - convert unreferenced texts to full texts
1072
# - correct texts which reference a text not copied to be full texts
1073
# - copy all others as-is but with corrected parents.
1074
# - so at this point we don't know enough to decide what becomes a full
1076
self._text_filter = None
1078
def _copy_text_texts(self):
1079
"""generate what texts we should have and then copy."""
1080
self.pb.update("Copying content texts", 3)
1081
# we have three major tasks here:
1082
# 1) generate the ideal index
1083
repo = self._pack_collection.repo
1084
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
1085
_1, key, _2, refs in
1086
self.new_pack.revision_index.iter_all_entries()])
1087
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
1088
# 2) generate a text_nodes list that contains all the deltas that can
1089
# be used as-is, with corrected parents.
1092
discarded_nodes = []
1093
NULL_REVISION = _mod_revision.NULL_REVISION
1094
text_index_map, text_nodes = self._get_text_nodes()
1095
for node in text_nodes:
1101
ideal_parents = tuple(ideal_index[node[1]])
1103
discarded_nodes.append(node)
1104
self._data_changed = True
1106
if ideal_parents == (NULL_REVISION,):
1108
if ideal_parents == node[3][0]:
1110
ok_nodes.append(node)
1111
elif ideal_parents[0:1] == node[3][0][0:1]:
1112
# the left most parent is the same, or there are no parents
1113
# today. Either way, we can preserve the representation as
1114
# long as we change the refs to be inserted.
1115
self._data_changed = True
1116
ok_nodes.append((node[0], node[1], node[2],
1117
(ideal_parents, node[3][1])))
1118
self._data_changed = True
1120
# Reinsert this text completely
1121
bad_texts.append((node[1], ideal_parents))
1122
self._data_changed = True
1123
# we're finished with some data.
1126
# 3) bulk copy the ok data
1127
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1128
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1129
self.new_pack.text_index, readv_group_iter, total_items))
1130
# 4) adhoc copy all the other texts.
1131
# We have to topologically insert all texts otherwise we can fail to
1132
# reconcile when parts of a single delta chain are preserved intact,
1133
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1134
# reinserted, and if d3 has incorrect parents it will also be
1135
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1136
# copied), so we will try to delta, but d2 is not currently able to be
1137
# extracted because it's basis d1 is not present. Topologically sorting
1138
# addresses this. The following generates a sort for all the texts that
1139
# are being inserted without having to reference the entire text key
1140
# space (we only topo sort the revisions, which is smaller).
1141
topo_order = tsort.topo_sort(ancestors)
1142
rev_order = dict(zip(topo_order, range(len(topo_order))))
1143
bad_texts.sort(key=lambda key:rev_order[key[0][1]])
1144
transaction = repo.get_transaction()
1145
file_id_index = GraphIndexPrefixAdapter(
1146
self.new_pack.text_index,
1148
add_nodes_callback=self.new_pack.text_index.add_nodes)
1149
data_access = _DirectPackAccess(
1150
{self.new_pack.text_index:self.new_pack.access_tuple()})
1151
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1152
self.new_pack.access_tuple())
1153
output_texts = KnitVersionedFiles(
1154
_KnitGraphIndex(self.new_pack.text_index,
1155
add_callback=self.new_pack.text_index.add_nodes,
1156
deltas=True, parents=True, is_locked=repo.is_locked),
1157
data_access=data_access, max_delta_chain=200)
1158
for key, parent_keys in bad_texts:
1159
# We refer to the new pack to delta data being output.
1160
# A possible improvement would be to catch errors on short reads
1161
# and only flush then.
1162
self.new_pack.flush()
1164
for parent_key in parent_keys:
1165
if parent_key[0] != key[0]:
1166
# Graph parents must match the fileid
1167
raise errors.BzrError('Mismatched key parent %r:%r' %
1169
parents.append(parent_key[1])
1170
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1171
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1172
output_texts.add_lines(key, parent_keys, text_lines,
1173
random_id=True, check_content=False)
1174
# 5) check that nothing inserted has a reference outside the keyspace.
1175
missing_text_keys = self.new_pack.text_index._external_references()
1176
if missing_text_keys:
1177
raise errors.BzrCheckError('Reference to missing compression parents %r'
1178
% (missing_text_keys,))
1179
self._log_copied_texts()
1181
def _use_pack(self, new_pack):
1182
"""Override _use_pack to check for reconcile having changed content."""
1183
# XXX: we might be better checking this at the copy time.
1184
original_inventory_keys = set()
1185
inv_index = self._pack_collection.inventory_index.combined_index
1186
for entry in inv_index.iter_all_entries():
1187
original_inventory_keys.add(entry[1])
1188
new_inventory_keys = set()
1189
for entry in new_pack.inventory_index.iter_all_entries():
1190
new_inventory_keys.add(entry[1])
1191
if new_inventory_keys != original_inventory_keys:
1192
self._data_changed = True
1193
return new_pack.data_inserted() and self._data_changed
1196
class RepositoryPackCollection(object):
1197
"""Management of packs within a repository.
1199
:ivar _names: map of {pack_name: (index_size,)}
1202
def __init__(self, repo, transport, index_transport, upload_transport,
1203
pack_transport, index_builder_class, index_class):
1204
"""Create a new RepositoryPackCollection.
1206
:param transport: Addresses the repository base directory
1207
(typically .bzr/repository/).
1208
:param index_transport: Addresses the directory containing indices.
1209
:param upload_transport: Addresses the directory into which packs are written
1210
while they're being created.
1211
:param pack_transport: Addresses the directory of existing complete packs.
1212
:param index_builder_class: The index builder class to use.
1213
:param index_class: The index class to use.
1216
self.transport = transport
1217
self._index_transport = index_transport
1218
self._upload_transport = upload_transport
1219
self._pack_transport = pack_transport
1220
self._index_builder_class = index_builder_class
1221
self._index_class = index_class
1222
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}
1225
self._packs_by_name = {}
1226
# the previous pack-names content
1227
self._packs_at_load = None
1228
# when a pack is being created by this object, the state of that pack.
1229
self._new_pack = None
1230
# aggregated revision index data
1231
self.revision_index = AggregateIndex(self.reload_pack_names)
1232
self.inventory_index = AggregateIndex(self.reload_pack_names)
1233
self.text_index = AggregateIndex(self.reload_pack_names)
1234
self.signature_index = AggregateIndex(self.reload_pack_names)
1236
def add_pack_to_memory(self, pack):
1237
"""Make a Pack object available to the repository to satisfy queries.
1239
:param pack: A Pack object.
1241
if pack.name in self._packs_by_name:
1242
raise AssertionError()
1243
self.packs.append(pack)
1244
self._packs_by_name[pack.name] = pack
1245
self.revision_index.add_index(pack.revision_index, pack)
1246
self.inventory_index.add_index(pack.inventory_index, pack)
1247
self.text_index.add_index(pack.text_index, pack)
1248
self.signature_index.add_index(pack.signature_index, pack)
1250
def all_packs(self):
1251
"""Return a list of all the Pack objects this repository has.
1253
Note that an in-progress pack being created is not returned.
1255
:return: A list of Pack objects for all the packs in the repository.
1258
for name in self.names():
1259
result.append(self.get_pack_by_name(name))
1263
"""Pack the pack collection incrementally.
1265
This will not attempt global reorganisation or recompression,
1266
rather it will just ensure that the total number of packs does
1267
not grow without bound. It uses the _max_pack_count method to
1268
determine if autopacking is needed, and the pack_distribution
1269
method to determine the number of revisions in each pack.
1271
If autopacking takes place then the packs name collection will have
1272
been flushed to disk - packing requires updating the name collection
1273
in synchronisation with certain steps. Otherwise the names collection
1276
:return: True if packing took place.
1278
# XXX: Should not be needed when the management of indices is sane.
1279
total_revisions = self.revision_index.combined_index.key_count()
1280
total_packs = len(self._names)
1281
if self._max_pack_count(total_revisions) >= total_packs:
1283
# XXX: the following may want to be a class, to pack with a given
1285
# determine which packs need changing
1286
pack_distribution = self.pack_distribution(total_revisions)
1288
for pack in self.all_packs():
1289
revision_count = pack.get_revision_count()
1290
if revision_count == 0:
1291
# revision less packs are not generated by normal operation,
1292
# only by operations like sign-my-commits, and thus will not
1293
# tend to grow rapdily or without bound like commit containing
1294
# packs do - leave them alone as packing them really should
1295
# group their data with the relevant commit, and that may
1296
# involve rewriting ancient history - which autopack tries to
1297
# avoid. Alternatively we could not group the data but treat
1298
# each of these as having a single revision, and thus add
1299
# one revision for each to the total revision count, to get
1300
# a matching distribution.
1302
existing_packs.append((revision_count, pack))
1303
pack_operations = self.plan_autopack_combinations(
1304
existing_packs, pack_distribution)
1305
num_new_packs = len(pack_operations)
1306
num_old_packs = sum([len(po[1]) for po in pack_operations])
1307
num_revs_affected = sum([po[0] for po in pack_operations])
1308
mutter('Auto-packing repository %s, which has %d pack files, '
1309
'containing %d revisions. Packing %d files into %d affecting %d'
1310
' revisions', self, total_packs, total_revisions, num_old_packs,
1311
num_new_packs, num_revs_affected)
1312
self._execute_pack_operations(pack_operations)
1315
def _execute_pack_operations(self, pack_operations, _packer_class=Packer):
1316
"""Execute a series of pack operations.
1318
:param pack_operations: A list of [revision_count, packs_to_combine].
1319
:param _packer_class: The class of packer to use (default: Packer).
1322
for revision_count, packs in pack_operations:
1323
# we may have no-ops from the setup logic
1326
_packer_class(self, packs, '.autopack').pack()
1328
self._remove_pack_from_memory(pack)
1329
# record the newly available packs and stop advertising the old
1331
self._save_pack_names(clear_obsolete_packs=True)
1332
# Move the old packs out of the way now they are no longer referenced.
1333
for revision_count, packs in pack_operations:
1334
self._obsolete_packs(packs)
1336
def lock_names(self):
1337
"""Acquire the mutex around the pack-names index.
1339
This cannot be used in the middle of a read-only transaction on the
1342
self.repo.control_files.lock_write()
1345
"""Pack the pack collection totally."""
1346
self.ensure_loaded()
1347
total_packs = len(self._names)
1349
# This is arguably wrong because we might not be optimal, but for
1350
# now lets leave it in. (e.g. reconcile -> one pack. But not
1353
total_revisions = self.revision_index.combined_index.key_count()
1354
# XXX: the following may want to be a class, to pack with a given
1356
mutter('Packing repository %s, which has %d pack files, '
1357
'containing %d revisions into 1 packs.', self, total_packs,
1359
# determine which packs need changing
1360
pack_distribution = [1]
1361
pack_operations = [[0, []]]
1362
for pack in self.all_packs():
1363
pack_operations[-1][0] += pack.get_revision_count()
1364
pack_operations[-1][1].append(pack)
1365
self._execute_pack_operations(pack_operations, OptimisingPacker)
1367
def plan_autopack_combinations(self, existing_packs, pack_distribution):
1368
"""Plan a pack operation.
1370
:param existing_packs: The packs to pack. (A list of (revcount, Pack)
1372
:param pack_distribution: A list with the number of revisions desired
1375
if len(existing_packs) <= len(pack_distribution):
1377
existing_packs.sort(reverse=True)
1378
pack_operations = [[0, []]]
1379
# plan out what packs to keep, and what to reorganise
1380
while len(existing_packs):
1381
# take the largest pack, and if its less than the head of the
1382
# distribution chart we will include its contents in the new pack
1383
# for that position. If its larger, we remove its size from the
1384
# distribution chart
1385
next_pack_rev_count, next_pack = existing_packs.pop(0)
1386
if next_pack_rev_count >= pack_distribution[0]:
1387
# this is already packed 'better' than this, so we can
1388
# not waste time packing it.
1389
while next_pack_rev_count > 0:
1390
next_pack_rev_count -= pack_distribution[0]
1391
if next_pack_rev_count >= 0:
1393
del pack_distribution[0]
1395
# didn't use that entire bucket up
1396
pack_distribution[0] = -next_pack_rev_count
1398
# add the revisions we're going to add to the next output pack
1399
pack_operations[-1][0] += next_pack_rev_count
1400
# allocate this pack to the next pack sub operation
1401
pack_operations[-1][1].append(next_pack)
1402
if pack_operations[-1][0] >= pack_distribution[0]:
1403
# this pack is used up, shift left.
1404
del pack_distribution[0]
1405
pack_operations.append([0, []])
1406
# Now that we know which pack files we want to move, shove them all
1407
# into a single pack file.
1409
final_pack_list = []
1410
for num_revs, pack_files in pack_operations:
1411
final_rev_count += num_revs
1412
final_pack_list.extend(pack_files)
1413
if len(final_pack_list) == 1:
1414
raise AssertionError('We somehow generated an autopack with a'
1415
' single pack file being moved.')
1417
return [[final_rev_count, final_pack_list]]
1419
def ensure_loaded(self):
1420
# NB: if you see an assertion error here, its probably access against
1421
# an unlocked repo. Naughty.
1422
if not self.repo.is_locked():
1423
raise errors.ObjectNotLocked(self.repo)
1424
if self._names is None:
1426
self._packs_at_load = set()
1427
for index, key, value in self._iter_disk_pack_index():
1429
self._names[name] = self._parse_index_sizes(value)
1430
self._packs_at_load.add((key, value))
1431
# populate all the metadata.
1434
def _parse_index_sizes(self, value):
1435
"""Parse a string of index sizes."""
1436
return tuple([int(digits) for digits in value.split(' ')])
1438
def get_pack_by_name(self, name):
1439
"""Get a Pack object by name.
1441
:param name: The name of the pack - e.g. '123456'
1442
:return: A Pack object.
1445
return self._packs_by_name[name]
1447
rev_index = self._make_index(name, '.rix')
1448
inv_index = self._make_index(name, '.iix')
1449
txt_index = self._make_index(name, '.tix')
1450
sig_index = self._make_index(name, '.six')
1451
result = ExistingPack(self._pack_transport, name, rev_index,
1452
inv_index, txt_index, sig_index)
1453
self.add_pack_to_memory(result)
1456
def allocate(self, a_new_pack):
1457
"""Allocate name in the list of packs.
1459
:param a_new_pack: A NewPack instance to be added to the collection of
1460
packs for this repository.
1462
self.ensure_loaded()
1463
if a_new_pack.name in self._names:
1464
raise errors.BzrError(
1465
'Pack %r already exists in %s' % (a_new_pack.name, self))
1466
self._names[a_new_pack.name] = tuple(a_new_pack.index_sizes)
1467
self.add_pack_to_memory(a_new_pack)
1469
def _iter_disk_pack_index(self):
1470
"""Iterate over the contents of the pack-names index.
1472
This is used when loading the list from disk, and before writing to
1473
detect updates from others during our write operation.
1474
:return: An iterator of the index contents.
1476
return self._index_class(self.transport, 'pack-names', None
1477
).iter_all_entries()
1479
def _make_index(self, name, suffix):
1480
size_offset = self._suffix_offsets[suffix]
1481
index_name = name + suffix
1482
index_size = self._names[name][size_offset]
1483
return self._index_class(
1484
self._index_transport, index_name, index_size)
1486
def _max_pack_count(self, total_revisions):
1487
"""Return the maximum number of packs to use for total revisions.
1489
:param total_revisions: The total number of revisions in the
1492
if not total_revisions:
1494
digits = str(total_revisions)
1496
for digit in digits:
1497
result += int(digit)
1501
"""Provide an order to the underlying names."""
1502
return sorted(self._names.keys())
1504
def _obsolete_packs(self, packs):
1505
"""Move a number of packs which have been obsoleted out of the way.
1507
Each pack and its associated indices are moved out of the way.
1509
Note: for correctness this function should only be called after a new
1510
pack names index has been written without these pack names, and with
1511
the names of packs that contain the data previously available via these
1514
:param packs: The packs to obsolete.
1515
:param return: None.
1518
pack.pack_transport.rename(pack.file_name(),
1519
'../obsolete_packs/' + pack.file_name())
1520
# TODO: Probably needs to know all possible indices for this pack
1521
# - or maybe list the directory and move all indices matching this
1522
# name whether we recognize it or not?
1523
for suffix in ('.iix', '.six', '.tix', '.rix'):
1524
self._index_transport.rename(pack.name + suffix,
1525
'../obsolete_packs/' + pack.name + suffix)
1527
def pack_distribution(self, total_revisions):
1528
"""Generate a list of the number of revisions to put in each pack.
1530
:param total_revisions: The total number of revisions in the
1533
if total_revisions == 0:
1535
digits = reversed(str(total_revisions))
1537
for exponent, count in enumerate(digits):
1538
size = 10 ** exponent
1539
for pos in range(int(count)):
1541
return list(reversed(result))
1543
def _pack_tuple(self, name):
1544
"""Return a tuple with the transport and file name for a pack name."""
1545
return self._pack_transport, name + '.pack'
1547
def _remove_pack_from_memory(self, pack):
1548
"""Remove pack from the packs accessed by this repository.
1550
Only affects memory state, until self._save_pack_names() is invoked.
1552
self._names.pop(pack.name)
1553
self._packs_by_name.pop(pack.name)
1554
self._remove_pack_indices(pack)
1555
self.packs.remove(pack)
1557
def _remove_pack_indices(self, pack):
1558
"""Remove the indices for pack from the aggregated indices."""
1559
self.revision_index.remove_index(pack.revision_index, pack)
1560
self.inventory_index.remove_index(pack.inventory_index, pack)
1561
self.text_index.remove_index(pack.text_index, pack)
1562
self.signature_index.remove_index(pack.signature_index, pack)
1565
"""Clear all cached data."""
1566
# cached revision data
1567
self.repo._revision_knit = None
1568
self.revision_index.clear()
1569
# cached signature data
1570
self.repo._signature_knit = None
1571
self.signature_index.clear()
1572
# cached file text data
1573
self.text_index.clear()
1574
self.repo._text_knit = None
1575
# cached inventory data
1576
self.inventory_index.clear()
1577
# remove the open pack
1578
self._new_pack = None
1579
# information about packs.
1582
self._packs_by_name = {}
1583
self._packs_at_load = None
1585
def _unlock_names(self):
1586
"""Release the mutex around the pack-names index."""
1587
self.repo.control_files.unlock()
1589
def _diff_pack_names(self):
1590
"""Read the pack names from disk, and compare it to the one in memory.
1592
:return: (disk_nodes, deleted_nodes, new_nodes)
1593
disk_nodes The final set of nodes that should be referenced
1594
deleted_nodes Nodes which have been removed from when we started
1595
new_nodes Nodes that are newly introduced
1597
# load the disk nodes across
1599
for index, key, value in self._iter_disk_pack_index():
1600
disk_nodes.add((key, value))
1602
# do a two-way diff against our original content
1603
current_nodes = set()
1604
for name, sizes in self._names.iteritems():
1606
((name, ), ' '.join(str(size) for size in sizes)))
1608
# Packs no longer present in the repository, which were present when we
1609
# locked the repository
1610
deleted_nodes = self._packs_at_load - current_nodes
1611
# Packs which this process is adding
1612
new_nodes = current_nodes - self._packs_at_load
1614
# Update the disk_nodes set to include the ones we are adding, and
1615
# remove the ones which were removed by someone else
1616
disk_nodes.difference_update(deleted_nodes)
1617
disk_nodes.update(new_nodes)
1619
return disk_nodes, deleted_nodes, new_nodes
1621
def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
1622
"""Given the correct set of pack files, update our saved info.
1624
:return: (removed, added, modified)
1625
removed pack names removed from self._names
1626
added pack names added to self._names
1627
modified pack names that had changed value
1632
## self._packs_at_load = disk_nodes
1633
new_names = dict(disk_nodes)
1634
# drop no longer present nodes
1635
for pack in self.all_packs():
1636
if (pack.name,) not in new_names:
1637
removed.append(pack.name)
1638
self._remove_pack_from_memory(pack)
1639
# add new nodes/refresh existing ones
1640
for key, value in disk_nodes:
1642
sizes = self._parse_index_sizes(value)
1643
if name in self._names:
1645
if sizes != self._names[name]:
1646
# the pack for name has had its indices replaced - rare but
1647
# important to handle. XXX: probably can never happen today
1648
# because the three-way merge code above does not handle it
1649
# - you may end up adding the same key twice to the new
1650
# disk index because the set values are the same, unless
1651
# the only index shows up as deleted by the set difference
1652
# - which it may. Until there is a specific test for this,
1653
# assume its broken. RBC 20071017.
1654
self._remove_pack_from_memory(self.get_pack_by_name(name))
1655
self._names[name] = sizes
1656
self.get_pack_by_name(name)
1657
modified.append(name)
1660
self._names[name] = sizes
1661
self.get_pack_by_name(name)
1663
return removed, added, modified
1665
def _save_pack_names(self, clear_obsolete_packs=False):
1666
"""Save the list of packs.
1668
This will take out the mutex around the pack names list for the
1669
duration of the method call. If concurrent updates have been made, a
1670
three-way merge between the current list and the current in memory list
1673
:param clear_obsolete_packs: If True, clear out the contents of the
1674
obsolete_packs directory.
1678
builder = self._index_builder_class()
1679
disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
1680
# TODO: handle same-name, index-size-changes here -
1681
# e.g. use the value from disk, not ours, *unless* we're the one
1683
for key, value in disk_nodes:
1684
builder.add_node(key, value)
1685
self.transport.put_file('pack-names', builder.finish(),
1686
mode=self.repo.bzrdir._get_file_mode())
1687
# move the baseline forward
1688
self._packs_at_load = disk_nodes
1689
if clear_obsolete_packs:
1690
self._clear_obsolete_packs()
1692
self._unlock_names()
1693
# synchronise the memory packs list with what we just wrote:
1694
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1696
def reload_pack_names(self):
1697
"""Sync our pack listing with what is present in the repository.
1699
This should be called when we find out that something we thought was
1700
present is now missing. This happens when another process re-packs the
1703
# This is functionally similar to _save_pack_names, but we don't write
1704
# out the new value.
1705
disk_nodes, _, _ = self._diff_pack_names()
1706
self._packs_at_load = disk_nodes
1708
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1709
if removed or added or modified:
1713
def _clear_obsolete_packs(self):
1714
"""Delete everything from the obsolete-packs directory.
1716
obsolete_pack_transport = self.transport.clone('obsolete_packs')
1717
for filename in obsolete_pack_transport.list_dir('.'):
1719
obsolete_pack_transport.delete(filename)
1720
except (errors.PathError, errors.TransportError), e:
1721
warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
1723
def _start_write_group(self):
1724
# Do not permit preparation for writing if we're not in a 'write lock'.
1725
if not self.repo.is_write_locked():
1726
raise errors.NotWriteLocked(self)
1727
self._new_pack = NewPack(self, upload_suffix='.pack',
1728
file_mode=self.repo.bzrdir._get_file_mode())
1729
# allow writing: queue writes to a new index
1730
self.revision_index.add_writable_index(self._new_pack.revision_index,
1732
self.inventory_index.add_writable_index(self._new_pack.inventory_index,
1734
self.text_index.add_writable_index(self._new_pack.text_index,
1736
self.signature_index.add_writable_index(self._new_pack.signature_index,
1739
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
1740
self.repo.revisions._index._add_callback = self.revision_index.add_callback
1741
self.repo.signatures._index._add_callback = self.signature_index.add_callback
1742
self.repo.texts._index._add_callback = self.text_index.add_callback
1744
def _abort_write_group(self):
1745
# FIXME: just drop the transient index.
1746
# forget what names there are
1747
if self._new_pack is not None:
1749
self._new_pack.abort()
1751
# XXX: If we aborted while in the middle of finishing the write
1752
# group, _remove_pack_indices can fail because the indexes are
1753
# already gone. If they're not there we shouldn't fail in this
1754
# case. -- mbp 20081113
1755
self._remove_pack_indices(self._new_pack)
1756
self._new_pack = None
1757
self.repo._text_knit = None
1759
def _commit_write_group(self):
1760
self._remove_pack_indices(self._new_pack)
1761
if self._new_pack.data_inserted():
1762
# get all the data to disk and read to use
1763
self._new_pack.finish()
1764
self.allocate(self._new_pack)
1765
self._new_pack = None
1766
if not self.autopack():
1767
# when autopack takes no steps, the names list is still
1769
self._save_pack_names()
1771
self._new_pack.abort()
1772
self._new_pack = None
1773
self.repo._text_knit = None
1776
class KnitPackRepository(KnitRepository):
1777
"""Repository with knit objects stored inside pack containers.
1779
The layering for a KnitPackRepository is:
1781
Graph | HPSS | Repository public layer |
1782
===================================================
1783
Tuple based apis below, string based, and key based apis above
1784
---------------------------------------------------
1786
Provides .texts, .revisions etc
1787
This adapts the N-tuple keys to physical knit records which only have a
1788
single string identifier (for historical reasons), which in older formats
1789
was always the revision_id, and in the mapped code for packs is always
1790
the last element of key tuples.
1791
---------------------------------------------------
1793
A separate GraphIndex is used for each of the
1794
texts/inventories/revisions/signatures contained within each individual
1795
pack file. The GraphIndex layer works in N-tuples and is unaware of any
1797
===================================================
1801
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
1803
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
1804
_commit_builder_class, _serializer)
1805
index_transport = self._transport.clone('indices')
1806
self._pack_collection = RepositoryPackCollection(self, self._transport,
1808
self._transport.clone('upload'),
1809
self._transport.clone('packs'),
1810
_format.index_builder_class,
1811
_format.index_class)
1812
self.inventories = KnitVersionedFiles(
1813
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
1814
add_callback=self._pack_collection.inventory_index.add_callback,
1815
deltas=True, parents=True, is_locked=self.is_locked),
1816
data_access=self._pack_collection.inventory_index.data_access,
1817
max_delta_chain=200)
1818
self.revisions = KnitVersionedFiles(
1819
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
1820
add_callback=self._pack_collection.revision_index.add_callback,
1821
deltas=False, parents=True, is_locked=self.is_locked),
1822
data_access=self._pack_collection.revision_index.data_access,
1824
self.signatures = KnitVersionedFiles(
1825
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
1826
add_callback=self._pack_collection.signature_index.add_callback,
1827
deltas=False, parents=False, is_locked=self.is_locked),
1828
data_access=self._pack_collection.signature_index.data_access,
1830
self.texts = KnitVersionedFiles(
1831
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
1832
add_callback=self._pack_collection.text_index.add_callback,
1833
deltas=True, parents=True, is_locked=self.is_locked),
1834
data_access=self._pack_collection.text_index.data_access,
1835
max_delta_chain=200)
1836
# True when the repository object is 'write locked' (as opposed to the
1837
# physical lock only taken out around changes to the pack-names list.)
1838
# Another way to represent this would be a decorator around the control
1839
# files object that presents logical locks as physical ones - if this
1840
# gets ugly consider that alternative design. RBC 20071011
1841
self._write_lock_count = 0
1842
self._transaction = None
1844
self._reconcile_does_inventory_gc = True
1845
self._reconcile_fixes_text_parents = True
1846
self._reconcile_backsup_inventory = False
1847
self._fetch_order = 'unordered'
1849
def _warn_if_deprecated(self):
1850
# This class isn't deprecated, but one sub-format is
1851
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
1852
from bzrlib import repository
1853
if repository._deprecation_warning_done:
1855
repository._deprecation_warning_done = True
1856
warning("Format %s for %s is deprecated - please use"
1857
" 'bzr upgrade --1.6.1-rich-root'"
1858
% (self._format, self.bzrdir.transport.base))
1860
def _abort_write_group(self):
1861
self._pack_collection._abort_write_group()
1863
def _find_inconsistent_revision_parents(self):
1864
"""Find revisions with incorrectly cached parents.
1866
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
1867
parents-in-revision).
1869
if not self.is_locked():
1870
raise errors.ObjectNotLocked(self)
1871
pb = ui.ui_factory.nested_progress_bar()
1874
revision_nodes = self._pack_collection.revision_index \
1875
.combined_index.iter_all_entries()
1876
index_positions = []
1877
# Get the cached index values for all revisions, and also the location
1878
# in each index of the revision text so we can perform linear IO.
1879
for index, key, value, refs in revision_nodes:
1880
pos, length = value[1:].split(' ')
1881
index_positions.append((index, int(pos), key[0],
1882
tuple(parent[0] for parent in refs[0])))
1883
pb.update("Reading revision index.", 0, 0)
1884
index_positions.sort()
1885
batch_count = len(index_positions) / 1000 + 1
1886
pb.update("Checking cached revision graph.", 0, batch_count)
1887
for offset in xrange(batch_count):
1888
pb.update("Checking cached revision graph.", offset)
1889
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
1892
rev_ids = [item[2] for item in to_query]
1893
revs = self.get_revisions(rev_ids)
1894
for revision, item in zip(revs, to_query):
1895
index_parents = item[3]
1896
rev_parents = tuple(revision.parent_ids)
1897
if index_parents != rev_parents:
1898
result.append((revision.revision_id, index_parents, rev_parents))
1903
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1904
def get_parents(self, revision_ids):
1905
"""See graph._StackedParentsProvider.get_parents."""
1906
parent_map = self.get_parent_map(revision_ids)
1907
return [parent_map.get(r, None) for r in revision_ids]
1909
def _make_parents_provider(self):
1910
return graph.CachingParentsProvider(self)
1912
def _refresh_data(self):
1913
if self._write_lock_count == 1 or (
1914
self.control_files._lock_count == 1 and
1915
self.control_files._lock_mode == 'r'):
1916
# forget what names there are
1917
self._pack_collection.reset()
1918
# XXX: Better to do an in-memory merge when acquiring a new lock -
1919
# factor out code from _save_pack_names.
1920
self._pack_collection.ensure_loaded()
1922
def _start_write_group(self):
1923
self._pack_collection._start_write_group()
1925
def _commit_write_group(self):
1926
return self._pack_collection._commit_write_group()
1928
def get_transaction(self):
1929
if self._write_lock_count:
1930
return self._transaction
1932
return self.control_files.get_transaction()
1934
def is_locked(self):
1935
return self._write_lock_count or self.control_files.is_locked()
1937
def is_write_locked(self):
1938
return self._write_lock_count
1940
def lock_write(self, token=None):
1941
if not self._write_lock_count and self.is_locked():
1942
raise errors.ReadOnlyError(self)
1943
self._write_lock_count += 1
1944
if self._write_lock_count == 1:
1945
self._transaction = transactions.WriteTransaction()
1946
for repo in self._fallback_repositories:
1947
# Writes don't affect fallback repos
1949
self._refresh_data()
1951
def lock_read(self):
1952
if self._write_lock_count:
1953
self._write_lock_count += 1
1955
self.control_files.lock_read()
1956
for repo in self._fallback_repositories:
1957
# Writes don't affect fallback repos
1959
self._refresh_data()
1961
def leave_lock_in_place(self):
1962
# not supported - raise an error
1963
raise NotImplementedError(self.leave_lock_in_place)
1965
def dont_leave_lock_in_place(self):
1966
# not supported - raise an error
1967
raise NotImplementedError(self.dont_leave_lock_in_place)
1971
"""Compress the data within the repository.
1973
This will pack all the data to a single pack. In future it may
1974
recompress deltas or do other such expensive operations.
1976
self._pack_collection.pack()
1979
def reconcile(self, other=None, thorough=False):
1980
"""Reconcile this repository."""
1981
from bzrlib.reconcile import PackReconciler
1982
reconciler = PackReconciler(self, thorough=thorough)
1983
reconciler.reconcile()
1987
if self._write_lock_count == 1 and self._write_group is not None:
1988
self.abort_write_group()
1989
self._transaction = None
1990
self._write_lock_count = 0
1991
raise errors.BzrError(
1992
'Must end write group before releasing write lock on %s'
1994
if self._write_lock_count:
1995
self._write_lock_count -= 1
1996
if not self._write_lock_count:
1997
transaction = self._transaction
1998
self._transaction = None
1999
transaction.finish()
2000
for repo in self._fallback_repositories:
2003
self.control_files.unlock()
2004
for repo in self._fallback_repositories:
2008
class RepositoryFormatPack(MetaDirRepositoryFormat):
2009
"""Format logic for pack structured repositories.
2011
This repository format has:
2012
- a list of packs in pack-names
2013
- packs in packs/NAME.pack
2014
- indices in indices/NAME.{iix,six,tix,rix}
2015
- knit deltas in the packs, knit indices mapped to the indices.
2016
- thunk objects to support the knits programming API.
2017
- a format marker of its own
2018
- an optional 'shared-storage' flag
2019
- an optional 'no-working-trees' flag
2023
# Set this attribute in derived classes to control the repository class
2024
# created by open and initialize.
2025
repository_class = None
2026
# Set this attribute in derived classes to control the
2027
# _commit_builder_class that the repository objects will have passed to
2028
# their constructor.
2029
_commit_builder_class = None
2030
# Set this attribute in derived clases to control the _serializer that the
2031
# repository objects will have passed to their constructor.
2033
# External references are not supported in pack repositories yet.
2034
supports_external_lookups = False
2035
# What index classes to use
2036
index_builder_class = None
2039
def initialize(self, a_bzrdir, shared=False):
2040
"""Create a pack based repository.
2042
:param a_bzrdir: bzrdir to contain the new repository; must already
2044
:param shared: If true the repository will be initialized as a shared
2047
mutter('creating repository in %s.', a_bzrdir.transport.base)
2048
dirs = ['indices', 'obsolete_packs', 'packs', 'upload']
2049
builder = self.index_builder_class()
2050
files = [('pack-names', builder.finish())]
2051
utf8_files = [('format', self.get_format_string())]
2053
self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
2054
return self.open(a_bzrdir=a_bzrdir, _found=True)
2056
def open(self, a_bzrdir, _found=False, _override_transport=None):
2057
"""See RepositoryFormat.open().
2059
:param _override_transport: INTERNAL USE ONLY. Allows opening the
2060
repository at a slightly different url
2061
than normal. I.e. during 'upgrade'.
2064
format = RepositoryFormat.find_format(a_bzrdir)
2065
if _override_transport is not None:
2066
repo_transport = _override_transport
2068
repo_transport = a_bzrdir.get_repository_transport(None)
2069
control_files = lockable_files.LockableFiles(repo_transport,
2070
'lock', lockdir.LockDir)
2071
return self.repository_class(_format=self,
2073
control_files=control_files,
2074
_commit_builder_class=self._commit_builder_class,
2075
_serializer=self._serializer)
2078
class RepositoryFormatKnitPack1(RepositoryFormatPack):
2079
"""A no-subtrees parameterized Pack repository.
2081
This format was introduced in 0.92.
2084
repository_class = KnitPackRepository
2085
_commit_builder_class = PackCommitBuilder
2087
def _serializer(self):
2088
return xml5.serializer_v5
2089
# What index classes to use
2090
index_builder_class = InMemoryGraphIndex
2091
index_class = GraphIndex
2093
def _get_matching_bzrdir(self):
2094
return bzrdir.format_registry.make_bzrdir('pack-0.92')
2096
def _ignore_setting_bzrdir(self, format):
2099
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2101
def get_format_string(self):
2102
"""See RepositoryFormat.get_format_string()."""
2103
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
2105
def get_format_description(self):
2106
"""See RepositoryFormat.get_format_description()."""
2107
return "Packs containing knits without subtree support"
2109
def check_conversion_target(self, target_format):
2113
class RepositoryFormatKnitPack3(RepositoryFormatPack):
2114
"""A subtrees parameterized Pack repository.
2116
This repository format uses the xml7 serializer to get:
2117
- support for recording full info about the tree root
2118
- support for recording tree-references
2120
This format was introduced in 0.92.
2123
repository_class = KnitPackRepository
2124
_commit_builder_class = PackRootCommitBuilder
2125
rich_root_data = True
2126
supports_tree_reference = True
2128
def _serializer(self):
2129
return xml7.serializer_v7
2130
# What index classes to use
2131
index_builder_class = InMemoryGraphIndex
2132
index_class = GraphIndex
2134
def _get_matching_bzrdir(self):
2135
return bzrdir.format_registry.make_bzrdir(
2136
'pack-0.92-subtree')
2138
def _ignore_setting_bzrdir(self, format):
2141
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2143
def check_conversion_target(self, target_format):
2144
if not target_format.rich_root_data:
2145
raise errors.BadConversionTarget(
2146
'Does not support rich root data.', target_format)
2147
if not getattr(target_format, 'supports_tree_reference', False):
2148
raise errors.BadConversionTarget(
2149
'Does not support nested trees', target_format)
2151
def get_format_string(self):
2152
"""See RepositoryFormat.get_format_string()."""
2153
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2155
def get_format_description(self):
2156
"""See RepositoryFormat.get_format_description()."""
2157
return "Packs containing knits with subtree support\n"
2160
class RepositoryFormatKnitPack4(RepositoryFormatPack):
2161
"""A rich-root, no subtrees parameterized Pack repository.
2163
This repository format uses the xml6 serializer to get:
2164
- support for recording full info about the tree root
2166
This format was introduced in 1.0.
2169
repository_class = KnitPackRepository
2170
_commit_builder_class = PackRootCommitBuilder
2171
rich_root_data = True
2172
supports_tree_reference = False
2174
def _serializer(self):
2175
return xml6.serializer_v6
2176
# What index classes to use
2177
index_builder_class = InMemoryGraphIndex
2178
index_class = GraphIndex
2180
def _get_matching_bzrdir(self):
2181
return bzrdir.format_registry.make_bzrdir(
2184
def _ignore_setting_bzrdir(self, format):
2187
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2189
def check_conversion_target(self, target_format):
2190
if not target_format.rich_root_data:
2191
raise errors.BadConversionTarget(
2192
'Does not support rich root data.', target_format)
2194
def get_format_string(self):
2195
"""See RepositoryFormat.get_format_string()."""
2196
return ("Bazaar pack repository format 1 with rich root"
2197
" (needs bzr 1.0)\n")
2199
def get_format_description(self):
2200
"""See RepositoryFormat.get_format_description()."""
2201
return "Packs containing knits with rich root support\n"
2204
class RepositoryFormatKnitPack5(RepositoryFormatPack):
2205
"""Repository that supports external references to allow stacking.
2209
Supports external lookups, which results in non-truncated ghosts after
2210
reconcile compared to pack-0.92 formats.
2213
repository_class = KnitPackRepository
2214
_commit_builder_class = PackCommitBuilder
2215
supports_external_lookups = True
2216
# What index classes to use
2217
index_builder_class = InMemoryGraphIndex
2218
index_class = GraphIndex
2221
def _serializer(self):
2222
return xml5.serializer_v5
2224
def _get_matching_bzrdir(self):
2225
return bzrdir.format_registry.make_bzrdir('1.6')
2227
def _ignore_setting_bzrdir(self, format):
2230
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2232
def get_format_string(self):
2233
"""See RepositoryFormat.get_format_string()."""
2234
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
2236
def get_format_description(self):
2237
"""See RepositoryFormat.get_format_description()."""
2238
return "Packs 5 (adds stacking support, requires bzr 1.6)"
2240
def check_conversion_target(self, target_format):
2244
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
2245
"""A repository with rich roots and stacking.
2247
New in release 1.6.1.
2249
Supports stacking on other repositories, allowing data to be accessed
2250
without being stored locally.
2253
repository_class = KnitPackRepository
2254
_commit_builder_class = PackRootCommitBuilder
2255
rich_root_data = True
2256
supports_tree_reference = False # no subtrees
2257
supports_external_lookups = True
2258
# What index classes to use
2259
index_builder_class = InMemoryGraphIndex
2260
index_class = GraphIndex
2263
def _serializer(self):
2264
return xml6.serializer_v6
2266
def _get_matching_bzrdir(self):
2267
return bzrdir.format_registry.make_bzrdir(
2270
def _ignore_setting_bzrdir(self, format):
2273
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2275
def check_conversion_target(self, target_format):
2276
if not target_format.rich_root_data:
2277
raise errors.BadConversionTarget(
2278
'Does not support rich root data.', target_format)
2280
def get_format_string(self):
2281
"""See RepositoryFormat.get_format_string()."""
2282
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
2284
def get_format_description(self):
2285
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
2288
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
2289
"""A repository with rich roots and external references.
2293
Supports external lookups, which results in non-truncated ghosts after
2294
reconcile compared to pack-0.92 formats.
2296
This format was deprecated because the serializer it uses accidentally
2297
supported subtrees, when the format was not intended to. This meant that
2298
someone could accidentally fetch from an incorrect repository.
2301
repository_class = KnitPackRepository
2302
_commit_builder_class = PackRootCommitBuilder
2303
rich_root_data = True
2304
supports_tree_reference = False # no subtrees
2306
supports_external_lookups = True
2307
# What index classes to use
2308
index_builder_class = InMemoryGraphIndex
2309
index_class = GraphIndex
2312
def _serializer(self):
2313
return xml7.serializer_v7
2315
def _get_matching_bzrdir(self):
2316
matching = bzrdir.format_registry.make_bzrdir(
2318
matching.repository_format = self
2321
def _ignore_setting_bzrdir(self, format):
2324
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2326
def check_conversion_target(self, target_format):
2327
if not target_format.rich_root_data:
2328
raise errors.BadConversionTarget(
2329
'Does not support rich root data.', target_format)
2331
def get_format_string(self):
2332
"""See RepositoryFormat.get_format_string()."""
2333
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
2335
def get_format_description(self):
2336
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
2340
class RepositoryFormatKnitPack6(RepositoryFormatPack):
2341
"""A repository with stacking and btree indexes,
2342
without rich roots or subtrees.
2344
This is equivalent to pack-1.6 with B+Tree indices.
2347
repository_class = KnitPackRepository
2348
_commit_builder_class = PackCommitBuilder
2349
supports_external_lookups = True
2350
# What index classes to use
2351
index_builder_class = BTreeBuilder
2352
index_class = BTreeGraphIndex
2355
def _serializer(self):
2356
return xml5.serializer_v5
2358
def _get_matching_bzrdir(self):
2359
return bzrdir.format_registry.make_bzrdir('1.9')
2361
def _ignore_setting_bzrdir(self, format):
2364
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2366
def get_format_string(self):
2367
"""See RepositoryFormat.get_format_string()."""
2368
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
2370
def get_format_description(self):
2371
"""See RepositoryFormat.get_format_description()."""
2372
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2374
def check_conversion_target(self, target_format):
2378
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2379
"""A repository with rich roots, no subtrees, stacking and btree indexes.
2381
1.6-rich-root with B+Tree indices.
2384
repository_class = KnitPackRepository
2385
_commit_builder_class = PackRootCommitBuilder
2386
rich_root_data = True
2387
supports_tree_reference = False # no subtrees
2388
supports_external_lookups = True
2389
# What index classes to use
2390
index_builder_class = BTreeBuilder
2391
index_class = BTreeGraphIndex
2394
def _serializer(self):
2395
return xml6.serializer_v6
2397
def _get_matching_bzrdir(self):
2398
return bzrdir.format_registry.make_bzrdir(
2401
def _ignore_setting_bzrdir(self, format):
2404
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2406
def check_conversion_target(self, target_format):
2407
if not target_format.rich_root_data:
2408
raise errors.BadConversionTarget(
2409
'Does not support rich root data.', target_format)
2411
def get_format_string(self):
2412
"""See RepositoryFormat.get_format_string()."""
2413
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2415
def get_format_description(self):
2416
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2419
class RepositoryFormatPackDevelopment2(RepositoryFormatPack):
2420
"""A no-subtrees development repository.
2422
This format should be retained until the second release after bzr 1.7.
2424
This is pack-1.6.1 with B+Tree indices.
2427
repository_class = KnitPackRepository
2428
_commit_builder_class = PackCommitBuilder
2429
supports_external_lookups = True
2430
# What index classes to use
2431
index_builder_class = BTreeBuilder
2432
index_class = BTreeGraphIndex
2435
def _serializer(self):
2436
return xml5.serializer_v5
2438
def _get_matching_bzrdir(self):
2439
return bzrdir.format_registry.make_bzrdir('development2')
2441
def _ignore_setting_bzrdir(self, format):
2444
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2446
def get_format_string(self):
2447
"""See RepositoryFormat.get_format_string()."""
2448
return "Bazaar development format 2 (needs bzr.dev from before 1.8)\n"
2450
def get_format_description(self):
2451
"""See RepositoryFormat.get_format_description()."""
2452
return ("Development repository format, currently the same as "
2453
"1.6.1 with B+Trees.\n")
2455
def check_conversion_target(self, target_format):
2459
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2460
"""A subtrees development repository.
2462
This format should be retained until the second release after bzr 1.7.
2464
1.6.1-subtree[as it might have been] with B+Tree indices.
2467
repository_class = KnitPackRepository
2468
_commit_builder_class = PackRootCommitBuilder
2469
rich_root_data = True
2470
supports_tree_reference = True
2471
supports_external_lookups = True
2472
# What index classes to use
2473
index_builder_class = BTreeBuilder
2474
index_class = BTreeGraphIndex
2477
def _serializer(self):
2478
return xml7.serializer_v7
2480
def _get_matching_bzrdir(self):
2481
return bzrdir.format_registry.make_bzrdir(
2482
'development2-subtree')
2484
def _ignore_setting_bzrdir(self, format):
2487
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2489
def check_conversion_target(self, target_format):
2490
if not target_format.rich_root_data:
2491
raise errors.BadConversionTarget(
2492
'Does not support rich root data.', target_format)
2493
if not getattr(target_format, 'supports_tree_reference', False):
2494
raise errors.BadConversionTarget(
2495
'Does not support nested trees', target_format)
2497
def get_format_string(self):
2498
"""See RepositoryFormat.get_format_string()."""
2499
return ("Bazaar development format 2 with subtree support "
2500
"(needs bzr.dev from before 1.8)\n")
2502
def get_format_description(self):
2503
"""See RepositoryFormat.get_format_description()."""
2504
return ("Development repository format, currently the same as "
2505
"1.6.1-subtree with B+Tree indices.\n")