140
143
def __init__(self, revision_index, inventory_index, text_index,
144
signature_index, chk_index=None):
142
145
"""Create a pack instance.
144
147
:param revision_index: A GraphIndex for determining what revisions are
151
154
texts/deltas (via (fileid, revisionid) tuples).
152
155
:param signature_index: A GraphIndex for determining what signatures are
153
156
present in the Pack and accessing the locations of their texts.
157
:param chk_index: A GraphIndex for accessing content by CHK, if the
155
160
self.revision_index = revision_index
156
161
self.inventory_index = inventory_index
157
162
self.text_index = text_index
158
163
self.signature_index = signature_index
164
self.chk_index = chk_index
160
166
def access_tuple(self):
161
167
"""Return a tuple (transport, name) for the pack content."""
232
238
"""An in memory proxy for an existing .pack and its disk indices."""
234
240
def __init__(self, pack_transport, name, revision_index, inventory_index,
235
text_index, signature_index):
241
text_index, signature_index, chk_index=None):
236
242
"""Create an ExistingPack object.
238
244
:param pack_transport: The transport where the pack file resides.
239
245
:param name: The name of the pack on disk in the pack_transport.
241
247
Pack.__init__(self, revision_index, inventory_index, text_index,
248
signature_index, chk_index)
244
250
self.pack_transport = pack_transport
245
251
if None in (revision_index, inventory_index, text_index,
327
333
# The relative locations of the packs are constrained, but all are
328
334
# passed in because the caller has them, so as to avoid object churn.
329
335
index_builder_class = pack_collection._index_builder_class
336
if pack_collection.chk_index is not None:
337
chk_index = index_builder_class(reference_lists=0)
330
340
Pack.__init__(self,
331
341
# Revisions: parents list, no text compression.
332
342
index_builder_class(reference_lists=1),
341
351
# Signatures: Just blobs to store, no compression, no parents
343
353
index_builder_class(reference_lists=0),
354
# CHK based storage - just blobs, no compression or parents.
345
357
self._pack_collection = pack_collection
346
358
# When we make readonly indices, we need this.
355
367
self._file_mode = file_mode
356
368
# tracks the content written to the .pack file.
357
369
self._hash = osutils.md5()
358
# a four-tuple with the length in bytes of the indices, once the pack
359
# is finalised. (rev, inv, text, sigs)
370
# a tuple with the length in bytes of the indices, once the pack
371
# is finalised. (rev, inv, text, sigs, chk_if_in_use)
360
372
self.index_sizes = None
361
373
# How much data to cache when writing packs. Note that this is not
362
374
# synchronised with reads, because it's not in the transport layer, so
423
435
return bool(self.get_revision_count() or
424
436
self.inventory_index.key_count() or
425
437
self.text_index.key_count() or
426
self.signature_index.key_count())
438
self.signature_index.key_count() or
439
(self.chk_index is not None and self.chk_index.key_count()))
428
441
def finish(self, suspend=False):
429
442
"""Finish the new pack.
454
467
self._write_index('text', self.text_index, 'file texts', suspend)
455
468
self._write_index('signature', self.signature_index,
456
469
'revision signatures', suspend)
470
if self.chk_index is not None:
471
self.index_sizes.append(None)
472
self._write_index('chk', self.chk_index,
473
'content hash bytes', suspend)
457
474
self.write_stream.close()
458
475
# Note that this will clobber an existing pack with the same name,
459
476
# without checking for hash collisions. While this is undesirable this
727
744
def open_pack(self):
728
745
"""Open a pack for the pack we are creating."""
729
new_pack = NewPack(self._pack_collection, upload_suffix=self.suffix,
746
new_pack = self._pack_collection.pack_factory(self._pack_collection,
747
upload_suffix=self.suffix,
730
748
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
731
749
# We know that we will process all nodes in order, and don't need to
732
750
# query, so don't combine any indices spilled to disk until we are done
897
915
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
898
916
new_pack.signature_index.key_count(),
899
917
time.time() - new_pack.start_time)
919
# NB XXX: how to check CHK references are present? perhaps by yielding
920
# the items? How should that interact with stacked repos?
921
if new_pack.chk_index is not None:
923
if 'pack' in debug.debug_flags:
924
mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',
925
time.ctime(), self._pack_collection._upload_transport.base,
926
new_pack.random_name,
927
new_pack.chk_index.key_count(),
928
time.time() - new_pack.start_time)
900
929
new_pack._check_references()
901
930
if not self._use_pack(new_pack):
906
935
self._pack_collection.allocate(new_pack)
909
def _copy_nodes(self, nodes, index_map, writer, write_index):
910
"""Copy knit nodes between packs with no graph references."""
938
def _copy_chks(self, refs=None):
939
# XXX: Todo, recursive follow-pointers facility when fetching some
941
chk_index_map, chk_indices = self._pack_map_and_index_list(
943
chk_nodes = self._index_contents(chk_indices, refs)
945
# TODO: This isn't strictly tasteful as we are accessing some private
946
# variables (_serializer). Perhaps a better way would be to have
947
# Repository._deserialise_chk_node()
948
search_key_func = chk_map.search_key_registry.get(
949
self._pack_collection.repo._serializer.search_key_name)
950
def accumlate_refs(lines):
951
# XXX: move to a generic location
953
bytes = ''.join(lines)
954
node = chk_map._deserialise(bytes, ("unknown",), search_key_func)
955
new_refs.update(node.refs())
956
self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
957
self.new_pack.chk_index, output_lines=accumlate_refs)
960
def _copy_nodes(self, nodes, index_map, writer, write_index,
962
"""Copy knit nodes between packs with no graph references.
964
:param output_lines: Output full texts of copied items.
911
966
pb = ui.ui_factory.nested_progress_bar()
913
968
return self._do_copy_nodes(nodes, index_map, writer,
969
write_index, pb, output_lines=output_lines)
918
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
973
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
919
975
# for record verification
920
976
knit = KnitVersionedFiles(None, None)
921
977
# plan a readv on each source pack:
955
1011
izip(reader.iter_records(), pack_readv_requests):
956
1012
raw_data = read_func(None)
957
1013
# check the header only
958
df, _ = knit._parse_record_header(key, raw_data)
1014
if output_lines is not None:
1015
output_lines(knit._parse_record(key[-1], raw_data)[0])
1017
df, _ = knit._parse_record_header(key, raw_data)
960
1019
pos, size = writer.add_bytes_record(raw_data, names)
961
1020
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
962
1021
pb.update("Copied record", record_index)
1292
1351
:ivar _names: map of {pack_name: (index_size,)}
1354
pack_factory = NewPack
1295
1356
def __init__(self, repo, transport, index_transport, upload_transport,
1296
pack_transport, index_builder_class, index_class):
1357
pack_transport, index_builder_class, index_class,
1297
1359
"""Create a new RepositoryPackCollection.
1299
1361
:param transport: Addresses the repository base directory
1313
1376
self._pack_transport = pack_transport
1314
1377
self._index_builder_class = index_builder_class
1315
1378
self._index_class = index_class
1316
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}
1379
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3,
1317
1381
self.packs = []
1318
1382
# name:Pack mapping
1319
1383
self._names = None
1328
1392
self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1329
1393
self.text_index = AggregateIndex(self.reload_pack_names, flush)
1330
1394
self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1396
self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1398
# used to determine if we're using a chk_index elsewhere.
1399
self.chk_index = None
1331
1400
# resumed packs
1332
1401
self._resumed_packs = []
1345
1414
self.inventory_index.add_index(pack.inventory_index, pack)
1346
1415
self.text_index.add_index(pack.text_index, pack)
1347
1416
self.signature_index.add_index(pack.signature_index, pack)
1417
if self.chk_index is not None:
1418
self.chk_index.add_index(pack.chk_index, pack)
1349
1420
def all_packs(self):
1350
1421
"""Return a list of all the Pack objects this repository has.
1419
1488
num_new_packs, num_revs_affected)
1420
1489
self._execute_pack_operations(pack_operations,
1421
1490
reload_func=self._restart_autopack)
1491
mutter('Auto-packing repository %s completed', self)
1424
1494
def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
1467
1537
self.repo.control_files.lock_write()
1539
def _already_packed(self):
1540
"""Is the collection already packed?"""
1541
return len(self._names) < 2
1469
1543
def pack(self):
1470
1544
"""Pack the pack collection totally."""
1471
1545
self.ensure_loaded()
1472
1546
total_packs = len(self._names)
1547
if self._already_packed():
1474
1548
# This is arguably wrong because we might not be optimal, but for
1475
1549
# now lets leave it in. (e.g. reconcile -> one pack. But not
1581
1655
inv_index = self._make_index(name, '.iix')
1582
1656
txt_index = self._make_index(name, '.tix')
1583
1657
sig_index = self._make_index(name, '.six')
1658
if self.chk_index is not None:
1659
chk_index = self._make_index(name, '.cix')
1584
1662
result = ExistingPack(self._pack_transport, name, rev_index,
1585
inv_index, txt_index, sig_index)
1663
inv_index, txt_index, sig_index, chk_index)
1586
1664
self.add_pack_to_memory(result)
1682
1760
# TODO: Probably needs to know all possible indices for this pack
1683
1761
# - or maybe list the directory and move all indices matching this
1684
1762
# name whether we recognize it or not?
1685
for suffix in ('.iix', '.six', '.tix', '.rix'):
1763
suffixes = ['.iix', '.six', '.tix', '.rix']
1764
if self.chk_index is not None:
1765
suffixes.append('.cix')
1766
for suffix in suffixes:
1686
1767
self._index_transport.rename(pack.name + suffix,
1687
1768
'../obsolete_packs/' + pack.name + suffix)
1722
1803
self.inventory_index.remove_index(pack.inventory_index, pack)
1723
1804
self.text_index.remove_index(pack.text_index, pack)
1724
1805
self.signature_index.remove_index(pack.signature_index, pack)
1806
if self.chk_index is not None:
1807
self.chk_index.remove_index(pack.chk_index, pack)
1726
1809
def reset(self):
1727
1810
"""Clear all cached data."""
1903
1989
# Do not permit preparation for writing if we're not in a 'write lock'.
1904
1990
if not self.repo.is_write_locked():
1905
1991
raise errors.NotWriteLocked(self)
1906
self._new_pack = NewPack(self, upload_suffix='.pack',
1992
self._new_pack = self.pack_factory(self, upload_suffix='.pack',
1907
1993
file_mode=self.repo.bzrdir._get_file_mode())
1908
1994
# allow writing: queue writes to a new index
1909
1995
self.revision_index.add_writable_index(self._new_pack.revision_index,
1912
1998
self._new_pack)
1913
1999
self.text_index.add_writable_index(self._new_pack.text_index,
1914
2000
self._new_pack)
2001
self._new_pack.text_index.set_optimize(combine_backing_indices=False)
1915
2002
self.signature_index.add_writable_index(self._new_pack.signature_index,
1916
2003
self._new_pack)
2004
if self.chk_index is not None:
2005
self.chk_index.add_writable_index(self._new_pack.chk_index,
2007
self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
2008
self._new_pack.chk_index.set_optimize(combine_backing_indices=False)
1918
2010
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
1919
2011
self.repo.revisions._index._add_callback = self.revision_index.add_callback
2046
2138
self._transport.clone('upload'),
2047
2139
self._transport.clone('packs'),
2048
2140
_format.index_builder_class,
2049
_format.index_class)
2141
_format.index_class,
2142
use_chk_index=self._format.supports_chks,
2050
2144
self.inventories = KnitVersionedFiles(
2051
2145
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
2052
2146
add_callback=self._pack_collection.inventory_index.add_callback,
2071
2165
deltas=True, parents=True, is_locked=self.is_locked),
2072
2166
data_access=self._pack_collection.text_index.data_access,
2073
2167
max_delta_chain=200)
2074
self.chk_bytes = None
2168
if _format.supports_chks:
2169
# No graph, no compression:- references from chks are between
2170
# different objects not temporal versions of the same; and without
2171
# some sort of temporal structure knit compression will just fail.
2172
self.chk_bytes = KnitVersionedFiles(
2173
_KnitGraphIndex(self._pack_collection.chk_index.combined_index,
2174
add_callback=self._pack_collection.chk_index.add_callback,
2175
deltas=False, parents=False, is_locked=self.is_locked),
2176
data_access=self._pack_collection.chk_index.data_access,
2179
self.chk_bytes = None
2075
2180
# True when the repository object is 'write locked' (as opposed to the
2076
2181
# physical lock only taken out around changes to the pack-names list.)
2077
2182
# Another way to represent this would be a decorator around the control
2112
2217
revision_nodes = self._pack_collection.revision_index \
2113
2218
.combined_index.iter_all_entries()
2114
2219
index_positions = []
2115
# Get the cached index values for all revisions, and also the location
2116
# in each index of the revision text so we can perform linear IO.
2220
# Get the cached index values for all revisions, and also the
2221
# location in each index of the revision text so we can perform
2117
2223
for index, key, value, refs in revision_nodes:
2118
pos, length = value[1:].split(' ')
2119
index_positions.append((index, int(pos), key[0],
2120
tuple(parent[0] for parent in refs[0])))
2224
node = (index, key, value, refs)
2225
index_memo = self.revisions._index._node_to_position(node)
2226
if index_memo[0] != index:
2227
raise AssertionError('%r != %r' % (index_memo[0], index))
2228
index_positions.append((index_memo, key[0],
2229
tuple(parent[0] for parent in refs[0])))
2121
2230
pb.update("Reading revision index", 0, 0)
2122
2231
index_positions.sort()
2123
batch_count = len(index_positions) / 1000 + 1
2124
pb.update("Checking cached revision graph", 0, batch_count)
2125
for offset in xrange(batch_count):
2233
pb.update("Checking cached revision graph", 0,
2234
len(index_positions))
2235
for offset in xrange(0, len(index_positions), 1000):
2126
2236
pb.update("Checking cached revision graph", offset)
2127
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
2237
to_query = index_positions[offset:offset + batch_size]
2128
2238
if not to_query:
2130
rev_ids = [item[2] for item in to_query]
2240
rev_ids = [item[1] for item in to_query]
2131
2241
revs = self.get_revisions(rev_ids)
2132
2242
for revision, item in zip(revs, to_query):
2133
index_parents = item[3]
2243
index_parents = item[2]
2134
2244
rev_parents = tuple(revision.parent_ids)
2135
2245
if index_parents != rev_parents:
2136
result.append((revision.revision_id, index_parents, rev_parents))
2246
result.append((revision.revision_id, index_parents,
2667
2778
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2670
class RepositoryFormatPackDevelopment2(RepositoryFormatPack):
2671
"""A no-subtrees development repository.
2673
This format should be retained until the second release after bzr 1.7.
2675
This is pack-1.6.1 with B+Tree indices.
2678
repository_class = KnitPackRepository
2679
_commit_builder_class = PackCommitBuilder
2680
supports_external_lookups = True
2681
# What index classes to use
2682
index_builder_class = BTreeBuilder
2683
index_class = BTreeGraphIndex
2684
# Set to true to get the fast-commit code path tested until a really fast
2685
# format lands in trunk. Not actually fast in this format.
2689
def _serializer(self):
2690
return xml5.serializer_v5
2692
def _get_matching_bzrdir(self):
2693
return bzrdir.format_registry.make_bzrdir('development2')
2695
def _ignore_setting_bzrdir(self, format):
2698
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2700
def get_format_string(self):
2701
"""See RepositoryFormat.get_format_string()."""
2702
return "Bazaar development format 2 (needs bzr.dev from before 1.8)\n"
2704
def get_format_description(self):
2705
"""See RepositoryFormat.get_format_description()."""
2706
return ("Development repository format, currently the same as "
2707
"1.6.1 with B+Trees.\n")
2709
def check_conversion_target(self, target_format):
2713
2781
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2714
2782
"""A subtrees development repository.
2716
2784
This format should be retained until the second release after bzr 1.7.
2718
2786
1.6.1-subtree[as it might have been] with B+Tree indices.
2788
This is [now] retained until we have a CHK based subtree format in
2721
2792
repository_class = KnitPackRepository