741
836
Sets self._text_filter appropriately.
743
raise NotImplementedError(self._copy_inventory_texts)
838
# select inventory keys
839
inv_keys = self._revision_keys # currently the same keyspace, and note that
840
# querying for keys here could introduce a bug where an inventory item
841
# is missed, so do not change it to query separately without cross
842
# checking like the text key check below.
843
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
845
inv_nodes = self._index_contents(inventory_indices, inv_keys)
846
# copy inventory keys and adjust values
847
# XXX: Should be a helper function to allow different inv representation
849
self.pb.update("Copying inventory texts", 2)
850
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
851
# Only grab the output lines if we will be processing them
852
output_lines = bool(self.revision_ids)
853
inv_lines = self._copy_nodes_graph(inventory_index_map,
854
self.new_pack._writer, self.new_pack.inventory_index,
855
readv_group_iter, total_items, output_lines=output_lines)
856
if self.revision_ids:
857
self._process_inventory_lines(inv_lines)
859
# eat the iterator to cause it to execute.
861
self._text_filter = None
862
if 'pack' in debug.debug_flags:
863
mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
864
time.ctime(), self._pack_collection._upload_transport.base,
865
self.new_pack.random_name,
866
self.new_pack.inventory_index.key_count(),
867
time.time() - self.new_pack.start_time)
745
869
def _copy_text_texts(self):
746
raise NotImplementedError(self._copy_text_texts)
871
text_index_map, text_nodes = self._get_text_nodes()
872
if self._text_filter is not None:
873
# We could return the keys copied as part of the return value from
874
# _copy_nodes_graph but this doesn't work all that well with the
875
# need to get line output too, so we check separately, and as we're
876
# going to buffer everything anyway, we check beforehand, which
877
# saves reading knit data over the wire when we know there are
879
text_nodes = set(text_nodes)
880
present_text_keys = set(_node[1] for _node in text_nodes)
881
missing_text_keys = set(self._text_filter) - present_text_keys
882
if missing_text_keys:
883
# TODO: raise a specific error that can handle many missing
885
mutter("missing keys during fetch: %r", missing_text_keys)
886
a_missing_key = missing_text_keys.pop()
887
raise errors.RevisionNotPresent(a_missing_key[1],
889
# copy text keys and adjust values
890
self.pb.update("Copying content texts", 3)
891
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
892
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
893
self.new_pack.text_index, readv_group_iter, total_items))
894
self._log_copied_texts()
748
896
def _create_pack_from_packs(self):
749
raise NotImplementedError(self._create_pack_from_packs)
897
self.pb.update("Opening pack", 0, 5)
898
self.new_pack = self.open_pack()
899
new_pack = self.new_pack
900
# buffer data - we won't be reading-back during the pack creation and
901
# this makes a significant difference on sftp pushes.
902
new_pack.set_write_cache_size(1024*1024)
903
if 'pack' in debug.debug_flags:
904
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
905
for a_pack in self.packs]
906
if self.revision_ids is not None:
907
rev_count = len(self.revision_ids)
910
mutter('%s: create_pack: creating pack from source packs: '
911
'%s%s %s revisions wanted %s t=0',
912
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
913
plain_pack_list, rev_count)
914
self._copy_revision_texts()
915
self._copy_inventory_texts()
916
self._copy_text_texts()
917
# select signature keys
918
signature_filter = self._revision_keys # same keyspace
919
signature_index_map, signature_indices = self._pack_map_and_index_list(
921
signature_nodes = self._index_contents(signature_indices,
923
# copy signature keys and adjust values
924
self.pb.update("Copying signature texts", 4)
925
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
926
new_pack.signature_index)
927
if 'pack' in debug.debug_flags:
928
mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
929
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
930
new_pack.signature_index.key_count(),
931
time.time() - new_pack.start_time)
933
# NB XXX: how to check CHK references are present? perhaps by yielding
934
# the items? How should that interact with stacked repos?
935
if new_pack.chk_index is not None:
937
if 'pack' in debug.debug_flags:
938
mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',
939
time.ctime(), self._pack_collection._upload_transport.base,
940
new_pack.random_name,
941
new_pack.chk_index.key_count(),
942
time.time() - new_pack.start_time)
943
new_pack._check_references()
944
if not self._use_pack(new_pack):
947
self.pb.update("Finishing pack", 5)
949
self._pack_collection.allocate(new_pack)
952
def _copy_chks(self, refs=None):
953
# XXX: Todo, recursive follow-pointers facility when fetching some
955
chk_index_map, chk_indices = self._pack_map_and_index_list(
957
chk_nodes = self._index_contents(chk_indices, refs)
959
# TODO: This isn't strictly tasteful as we are accessing some private
960
# variables (_serializer). Perhaps a better way would be to have
961
# Repository._deserialise_chk_node()
962
search_key_func = chk_map.search_key_registry.get(
963
self._pack_collection.repo._serializer.search_key_name)
964
def accumlate_refs(lines):
965
# XXX: move to a generic location
967
bytes = ''.join(lines)
968
node = chk_map._deserialise(bytes, ("unknown",), search_key_func)
969
new_refs.update(node.refs())
970
self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
971
self.new_pack.chk_index, output_lines=accumlate_refs)
974
def _copy_nodes(self, nodes, index_map, writer, write_index,
976
"""Copy knit nodes between packs with no graph references.
978
:param output_lines: Output full texts of copied items.
980
pb = ui.ui_factory.nested_progress_bar()
982
return self._do_copy_nodes(nodes, index_map, writer,
983
write_index, pb, output_lines=output_lines)
987
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
989
# for record verification
990
knit = KnitVersionedFiles(None, None)
991
# plan a readv on each source pack:
993
nodes = sorted(nodes)
994
# how to map this into knit.py - or knit.py into this?
995
# we don't want the typical knit logic, we want grouping by pack
996
# at this point - perhaps a helper library for the following code
997
# duplication points?
999
for index, key, value in nodes:
1000
if index not in request_groups:
1001
request_groups[index] = []
1002
request_groups[index].append((key, value))
1004
pb.update("Copied record", record_index, len(nodes))
1005
for index, items in request_groups.iteritems():
1006
pack_readv_requests = []
1007
for key, value in items:
1008
# ---- KnitGraphIndex.get_position
1009
bits = value[1:].split(' ')
1010
offset, length = int(bits[0]), int(bits[1])
1011
pack_readv_requests.append((offset, length, (key, value[0])))
1012
# linear scan up the pack
1013
pack_readv_requests.sort()
1015
pack_obj = index_map[index]
1016
transport, path = pack_obj.access_tuple()
1018
reader = pack.make_readv_reader(transport, path,
1019
[offset[0:2] for offset in pack_readv_requests])
1020
except errors.NoSuchFile:
1021
if self._reload_func is not None:
1024
for (names, read_func), (_1, _2, (key, eol_flag)) in \
1025
izip(reader.iter_records(), pack_readv_requests):
1026
raw_data = read_func(None)
1027
# check the header only
1028
if output_lines is not None:
1029
output_lines(knit._parse_record(key[-1], raw_data)[0])
1031
df, _ = knit._parse_record_header(key, raw_data)
1033
pos, size = writer.add_bytes_record(raw_data, names)
1034
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
1035
pb.update("Copied record", record_index)
1038
def _copy_nodes_graph(self, index_map, writer, write_index,
1039
readv_group_iter, total_items, output_lines=False):
1040
"""Copy knit nodes between packs.
1042
:param output_lines: Return lines present in the copied data as
1043
an iterator of line,version_id.
1045
pb = ui.ui_factory.nested_progress_bar()
1047
for result in self._do_copy_nodes_graph(index_map, writer,
1048
write_index, output_lines, pb, readv_group_iter, total_items):
1051
# Python 2.4 does not permit try:finally: in a generator.
1057
def _do_copy_nodes_graph(self, index_map, writer, write_index,
1058
output_lines, pb, readv_group_iter, total_items):
1059
# for record verification
1060
knit = KnitVersionedFiles(None, None)
1061
# for line extraction when requested (inventories only)
1063
factory = KnitPlainFactory()
1065
pb.update("Copied record", record_index, total_items)
1066
for index, readv_vector, node_vector in readv_group_iter:
1068
pack_obj = index_map[index]
1069
transport, path = pack_obj.access_tuple()
1071
reader = pack.make_readv_reader(transport, path, readv_vector)
1072
except errors.NoSuchFile:
1073
if self._reload_func is not None:
1076
for (names, read_func), (key, eol_flag, references) in \
1077
izip(reader.iter_records(), node_vector):
1078
raw_data = read_func(None)
1080
# read the entire thing
1081
content, _ = knit._parse_record(key[-1], raw_data)
1082
if len(references[-1]) == 0:
1083
line_iterator = factory.get_fulltext_content(content)
1085
line_iterator = factory.get_linedelta_content(content)
1086
for line in line_iterator:
1089
# check the header only
1090
df, _ = knit._parse_record_header(key, raw_data)
1092
pos, size = writer.add_bytes_record(raw_data, names)
1093
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
1094
pb.update("Copied record", record_index)
1097
def _get_text_nodes(self):
1098
text_index_map, text_indices = self._pack_map_and_index_list(
1100
return text_index_map, self._index_contents(text_indices,
1103
def _least_readv_node_readv(self, nodes):
1104
"""Generate request groups for nodes using the least readv's.
1106
:param nodes: An iterable of graph index nodes.
1107
:return: Total node count and an iterator of the data needed to perform
1108
readvs to obtain the data for nodes. Each item yielded by the
1109
iterator is a tuple with:
1110
index, readv_vector, node_vector. readv_vector is a list ready to
1111
hand to the transport readv method, and node_vector is a list of
1112
(key, eol_flag, references) for the the node retrieved by the
1113
matching readv_vector.
1115
# group by pack so we do one readv per pack
1116
nodes = sorted(nodes)
1119
for index, key, value, references in nodes:
1120
if index not in request_groups:
1121
request_groups[index] = []
1122
request_groups[index].append((key, value, references))
1124
for index, items in request_groups.iteritems():
1125
pack_readv_requests = []
1126
for key, value, references in items:
1127
# ---- KnitGraphIndex.get_position
1128
bits = value[1:].split(' ')
1129
offset, length = int(bits[0]), int(bits[1])
1130
pack_readv_requests.append(
1131
((offset, length), (key, value[0], references)))
1132
# linear scan up the pack to maximum range combining.
1133
pack_readv_requests.sort()
1134
# split out the readv and the node data.
1135
pack_readv = [readv for readv, node in pack_readv_requests]
1136
node_vector = [node for readv, node in pack_readv_requests]
1137
result.append((index, pack_readv, node_vector))
1138
return total, result
751
1140
def _log_copied_texts(self):
752
1141
if 'pack' in debug.debug_flags:
765
1173
return new_pack.data_inserted()
1176
class OptimisingPacker(Packer):
1177
"""A packer which spends more time to create better disk layouts."""
1179
def _revision_node_readv(self, revision_nodes):
1180
"""Return the total revisions and the readv's to issue.
1182
This sort places revisions in topological order with the ancestors
1185
:param revision_nodes: The revision index contents for the packs being
1186
incorporated into the new pack.
1187
:return: As per _least_readv_node_readv.
1189
# build an ancestors dict
1192
for index, key, value, references in revision_nodes:
1193
ancestors[key] = references[0]
1194
by_key[key] = (index, value, references)
1195
order = tsort.topo_sort(ancestors)
1197
# Single IO is pathological, but it will work as a starting point.
1199
for key in reversed(order):
1200
index, value, references = by_key[key]
1201
# ---- KnitGraphIndex.get_position
1202
bits = value[1:].split(' ')
1203
offset, length = int(bits[0]), int(bits[1])
1205
(index, [(offset, length)], [(key, value[0], references)]))
1206
# TODO: combine requests in the same index that are in ascending order.
1207
return total, requests
1209
def open_pack(self):
1210
"""Open a pack for the pack we are creating."""
1211
new_pack = super(OptimisingPacker, self).open_pack()
1212
# Turn on the optimization flags for all the index builders.
1213
new_pack.revision_index.set_optimize(for_size=True)
1214
new_pack.inventory_index.set_optimize(for_size=True)
1215
new_pack.text_index.set_optimize(for_size=True)
1216
new_pack.signature_index.set_optimize(for_size=True)
1220
class ReconcilePacker(Packer):
1221
"""A packer which regenerates indices etc as it copies.
1223
This is used by ``bzr reconcile`` to cause parent text pointers to be
1227
def _extra_init(self):
1228
self._data_changed = False
1230
def _process_inventory_lines(self, inv_lines):
1231
"""Generate a text key reference map rather for reconciling with."""
1232
repo = self._pack_collection.repo
1233
refs = repo._find_text_key_references_from_xml_inventory_lines(
1235
self._text_refs = refs
1236
# during reconcile we:
1237
# - convert unreferenced texts to full texts
1238
# - correct texts which reference a text not copied to be full texts
1239
# - copy all others as-is but with corrected parents.
1240
# - so at this point we don't know enough to decide what becomes a full
1242
self._text_filter = None
1244
def _copy_text_texts(self):
1245
"""generate what texts we should have and then copy."""
1246
self.pb.update("Copying content texts", 3)
1247
# we have three major tasks here:
1248
# 1) generate the ideal index
1249
repo = self._pack_collection.repo
1250
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
1251
_1, key, _2, refs in
1252
self.new_pack.revision_index.iter_all_entries()])
1253
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
1254
# 2) generate a text_nodes list that contains all the deltas that can
1255
# be used as-is, with corrected parents.
1258
discarded_nodes = []
1259
NULL_REVISION = _mod_revision.NULL_REVISION
1260
text_index_map, text_nodes = self._get_text_nodes()
1261
for node in text_nodes:
1267
ideal_parents = tuple(ideal_index[node[1]])
1269
discarded_nodes.append(node)
1270
self._data_changed = True
1272
if ideal_parents == (NULL_REVISION,):
1274
if ideal_parents == node[3][0]:
1276
ok_nodes.append(node)
1277
elif ideal_parents[0:1] == node[3][0][0:1]:
1278
# the left most parent is the same, or there are no parents
1279
# today. Either way, we can preserve the representation as
1280
# long as we change the refs to be inserted.
1281
self._data_changed = True
1282
ok_nodes.append((node[0], node[1], node[2],
1283
(ideal_parents, node[3][1])))
1284
self._data_changed = True
1286
# Reinsert this text completely
1287
bad_texts.append((node[1], ideal_parents))
1288
self._data_changed = True
1289
# we're finished with some data.
1292
# 3) bulk copy the ok data
1293
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1294
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1295
self.new_pack.text_index, readv_group_iter, total_items))
1296
# 4) adhoc copy all the other texts.
1297
# We have to topologically insert all texts otherwise we can fail to
1298
# reconcile when parts of a single delta chain are preserved intact,
1299
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1300
# reinserted, and if d3 has incorrect parents it will also be
1301
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1302
# copied), so we will try to delta, but d2 is not currently able to be
1303
# extracted because it's basis d1 is not present. Topologically sorting
1304
# addresses this. The following generates a sort for all the texts that
1305
# are being inserted without having to reference the entire text key
1306
# space (we only topo sort the revisions, which is smaller).
1307
topo_order = tsort.topo_sort(ancestors)
1308
rev_order = dict(zip(topo_order, range(len(topo_order))))
1309
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1310
transaction = repo.get_transaction()
1311
file_id_index = GraphIndexPrefixAdapter(
1312
self.new_pack.text_index,
1314
add_nodes_callback=self.new_pack.text_index.add_nodes)
1315
data_access = _DirectPackAccess(
1316
{self.new_pack.text_index:self.new_pack.access_tuple()})
1317
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1318
self.new_pack.access_tuple())
1319
output_texts = KnitVersionedFiles(
1320
_KnitGraphIndex(self.new_pack.text_index,
1321
add_callback=self.new_pack.text_index.add_nodes,
1322
deltas=True, parents=True, is_locked=repo.is_locked),
1323
data_access=data_access, max_delta_chain=200)
1324
for key, parent_keys in bad_texts:
1325
# We refer to the new pack to delta data being output.
1326
# A possible improvement would be to catch errors on short reads
1327
# and only flush then.
1328
self.new_pack.flush()
1330
for parent_key in parent_keys:
1331
if parent_key[0] != key[0]:
1332
# Graph parents must match the fileid
1333
raise errors.BzrError('Mismatched key parent %r:%r' %
1335
parents.append(parent_key[1])
1336
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1337
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1338
output_texts.add_lines(key, parent_keys, text_lines,
1339
random_id=True, check_content=False)
1340
# 5) check that nothing inserted has a reference outside the keyspace.
1341
missing_text_keys = self.new_pack.text_index._external_references()
1342
if missing_text_keys:
1343
raise errors.BzrCheckError('Reference to missing compression parents %r'
1344
% (missing_text_keys,))
1345
self._log_copied_texts()
1347
def _use_pack(self, new_pack):
1348
"""Override _use_pack to check for reconcile having changed content."""
1349
# XXX: we might be better checking this at the copy time.
1350
original_inventory_keys = set()
1351
inv_index = self._pack_collection.inventory_index.combined_index
1352
for entry in inv_index.iter_all_entries():
1353
original_inventory_keys.add(entry[1])
1354
new_inventory_keys = set()
1355
for entry in new_pack.inventory_index.iter_all_entries():
1356
new_inventory_keys.add(entry[1])
1357
if new_inventory_keys != original_inventory_keys:
1358
self._data_changed = True
1359
return new_pack.data_inserted() and self._data_changed
768
1362
class RepositoryPackCollection(object):
769
1363
"""Management of packs within a repository.
771
1365
:ivar _names: map of {pack_name: (index_size,)}
775
resumed_pack_factory = None
776
normal_packer_class = None
777
optimising_packer_class = None
1368
pack_factory = NewPack
1369
resumed_pack_factory = ResumedPack
779
1371
def __init__(self, repo, transport, index_transport, upload_transport,
780
1372
pack_transport, index_builder_class, index_class,
1665
# These attributes are inherited from the Repository base class. Setting
1666
# them to None ensures that if the constructor is changed to not initialize
1667
# them, or a subclass fails to call the constructor, that an error will
1668
# occur rather than the system working but generating incorrect data.
1669
_commit_builder_class = None
1672
2145
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
1674
MetaDirRepository.__init__(self, _format, a_bzrdir, control_files)
1675
self._commit_builder_class = _commit_builder_class
1676
self._serializer = _serializer
2147
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
2148
_commit_builder_class, _serializer)
2149
index_transport = self._transport.clone('indices')
2150
self._pack_collection = RepositoryPackCollection(self, self._transport,
2152
self._transport.clone('upload'),
2153
self._transport.clone('packs'),
2154
_format.index_builder_class,
2155
_format.index_class,
2156
use_chk_index=self._format.supports_chks,
2158
self.inventories = KnitVersionedFiles(
2159
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
2160
add_callback=self._pack_collection.inventory_index.add_callback,
2161
deltas=True, parents=True, is_locked=self.is_locked),
2162
data_access=self._pack_collection.inventory_index.data_access,
2163
max_delta_chain=200)
2164
self.revisions = KnitVersionedFiles(
2165
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
2166
add_callback=self._pack_collection.revision_index.add_callback,
2167
deltas=False, parents=True, is_locked=self.is_locked,
2168
track_external_parent_refs=True),
2169
data_access=self._pack_collection.revision_index.data_access,
2171
self.signatures = KnitVersionedFiles(
2172
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
2173
add_callback=self._pack_collection.signature_index.add_callback,
2174
deltas=False, parents=False, is_locked=self.is_locked),
2175
data_access=self._pack_collection.signature_index.data_access,
2177
self.texts = KnitVersionedFiles(
2178
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
2179
add_callback=self._pack_collection.text_index.add_callback,
2180
deltas=True, parents=True, is_locked=self.is_locked),
2181
data_access=self._pack_collection.text_index.data_access,
2182
max_delta_chain=200)
2183
if _format.supports_chks:
2184
# No graph, no compression:- references from chks are between
2185
# different objects not temporal versions of the same; and without
2186
# some sort of temporal structure knit compression will just fail.
2187
self.chk_bytes = KnitVersionedFiles(
2188
_KnitGraphIndex(self._pack_collection.chk_index.combined_index,
2189
add_callback=self._pack_collection.chk_index.add_callback,
2190
deltas=False, parents=False, is_locked=self.is_locked),
2191
data_access=self._pack_collection.chk_index.data_access,
2194
self.chk_bytes = None
2195
# True when the repository object is 'write locked' (as opposed to the
2196
# physical lock only taken out around changes to the pack-names list.)
2197
# Another way to represent this would be a decorator around the control
2198
# files object that presents logical locks as physical ones - if this
2199
# gets ugly consider that alternative design. RBC 20071011
2200
self._write_lock_count = 0
2201
self._transaction = None
2203
self._reconcile_does_inventory_gc = True
1677
2204
self._reconcile_fixes_text_parents = True
1678
if self._format.supports_external_lookups:
1679
self._unstacked_provider = graph.CachingParentsProvider(
1680
self._make_parents_provider_unstacked())
1682
self._unstacked_provider = graph.CachingParentsProvider(self)
1683
self._unstacked_provider.disable_cache()
2205
self._reconcile_backsup_inventory = False
1686
def _all_revision_ids(self):
1687
"""See Repository.all_revision_ids()."""
1688
return [key[0] for key in self.revisions.keys()]
2207
def _warn_if_deprecated(self):
2208
# This class isn't deprecated, but one sub-format is
2209
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
2210
from bzrlib import repository
2211
if repository._deprecation_warning_done:
2213
repository._deprecation_warning_done = True
2214
warning("Format %s for %s is deprecated - please use"
2215
" 'bzr upgrade --1.6.1-rich-root'"
2216
% (self._format, self.bzrdir.transport.base))
1690
2218
def _abort_write_group(self):
1691
self.revisions._index._key_dependencies.clear()
2219
self.revisions._index._key_dependencies.refs.clear()
1692
2220
self._pack_collection._abort_write_group()
2222
def _find_inconsistent_revision_parents(self):
2223
"""Find revisions with incorrectly cached parents.
2225
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
2226
parents-in-revision).
2228
if not self.is_locked():
2229
raise errors.ObjectNotLocked(self)
2230
pb = ui.ui_factory.nested_progress_bar()
2233
revision_nodes = self._pack_collection.revision_index \
2234
.combined_index.iter_all_entries()
2235
index_positions = []
2236
# Get the cached index values for all revisions, and also the
2237
# location in each index of the revision text so we can perform
2239
for index, key, value, refs in revision_nodes:
2240
node = (index, key, value, refs)
2241
index_memo = self.revisions._index._node_to_position(node)
2242
if index_memo[0] != index:
2243
raise AssertionError('%r != %r' % (index_memo[0], index))
2244
index_positions.append((index_memo, key[0],
2245
tuple(parent[0] for parent in refs[0])))
2246
pb.update("Reading revision index", 0, 0)
2247
index_positions.sort()
2249
pb.update("Checking cached revision graph", 0,
2250
len(index_positions))
2251
for offset in xrange(0, len(index_positions), 1000):
2252
pb.update("Checking cached revision graph", offset)
2253
to_query = index_positions[offset:offset + batch_size]
2256
rev_ids = [item[1] for item in to_query]
2257
revs = self.get_revisions(rev_ids)
2258
for revision, item in zip(revs, to_query):
2259
index_parents = item[2]
2260
rev_parents = tuple(revision.parent_ids)
2261
if index_parents != rev_parents:
2262
result.append((revision.revision_id, index_parents,
1694
2268
def _make_parents_provider(self):
1695
if not self._format.supports_external_lookups:
1696
return self._unstacked_provider
1697
return graph.StackedParentsProvider(_LazyListJoin(
1698
[self._unstacked_provider], self._fallback_repositories))
2269
return graph.CachingParentsProvider(self)
1700
2271
def _refresh_data(self):
1701
2272
if not self.is_locked():
1703
2274
self._pack_collection.reload_pack_names()
1704
self._unstacked_provider.disable_cache()
1705
self._unstacked_provider.enable_cache()
1707
2276
def _start_write_group(self):
1708
2277
self._pack_collection._start_write_group()
1710
2279
def _commit_write_group(self):
1711
hint = self._pack_collection._commit_write_group()
1712
self.revisions._index._key_dependencies.clear()
1713
# The commit may have added keys that were previously cached as
1714
# missing, so reset the cache.
1715
self._unstacked_provider.disable_cache()
1716
self._unstacked_provider.enable_cache()
2280
self.revisions._index._key_dependencies.refs.clear()
2281
return self._pack_collection._commit_write_group()
1719
2283
def suspend_write_group(self):
1720
2284
# XXX check self._write_group is self.get_transaction()?
1721
2285
tokens = self._pack_collection._suspend_write_group()
1722
self.revisions._index._key_dependencies.clear()
2286
self.revisions._index._key_dependencies.refs.clear()
1723
2287
self._write_group = None
1920
2460
_serializer=self._serializer)
1923
class RetryPackOperations(errors.RetryWithNewPacks):
1924
"""Raised when we are packing and we find a missing file.
1926
Meant as a signaling exception, to tell the RepositoryPackCollection.pack
1927
code it should try again.
1930
internal_error = True
1932
_fmt = ("Pack files have changed, reload and try pack again."
1933
" context: %(context)s %(orig_error)s")
1936
class _DirectPackAccess(object):
1937
"""Access to data in one or more packs with less translation."""
1939
def __init__(self, index_to_packs, reload_func=None, flush_func=None):
1940
"""Create a _DirectPackAccess object.
1942
:param index_to_packs: A dict mapping index objects to the transport
1943
and file names for obtaining data.
1944
:param reload_func: A function to call if we determine that the pack
1945
files have moved and we need to reload our caches. See
1946
bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.
1948
self._container_writer = None
1949
self._write_index = None
1950
self._indices = index_to_packs
1951
self._reload_func = reload_func
1952
self._flush_func = flush_func
1954
def add_raw_records(self, key_sizes, raw_data):
1955
"""Add raw knit bytes to a storage area.
1957
The data is spooled to the container writer in one bytes-record per
1960
:param sizes: An iterable of tuples containing the key and size of each
1962
:param raw_data: A bytestring containing the data.
1963
:return: A list of memos to retrieve the record later. Each memo is an
1964
opaque index memo. For _DirectPackAccess the memo is (index, pos,
1965
length), where the index field is the write_index object supplied
1966
to the PackAccess object.
1968
if type(raw_data) is not str:
1969
raise AssertionError(
1970
'data must be plain bytes was %s' % type(raw_data))
1973
for key, size in key_sizes:
1974
p_offset, p_length = self._container_writer.add_bytes_record(
1975
raw_data[offset:offset+size], [])
1977
result.append((self._write_index, p_offset, p_length))
1981
"""Flush pending writes on this access object.
1983
This will flush any buffered writes to a NewPack.
1985
if self._flush_func is not None:
1988
def get_raw_records(self, memos_for_retrieval):
1989
"""Get the raw bytes for a records.
1991
:param memos_for_retrieval: An iterable containing the (index, pos,
1992
length) memo for retrieving the bytes. The Pack access method
1993
looks up the pack to use for a given record in its index_to_pack
1995
:return: An iterator over the bytes of the records.
1997
# first pass, group into same-index requests
1999
current_index = None
2000
for (index, offset, length) in memos_for_retrieval:
2001
if current_index == index:
2002
current_list.append((offset, length))
2004
if current_index is not None:
2005
request_lists.append((current_index, current_list))
2006
current_index = index
2007
current_list = [(offset, length)]
2008
# handle the last entry
2009
if current_index is not None:
2010
request_lists.append((current_index, current_list))
2011
for index, offsets in request_lists:
2013
transport, path = self._indices[index]
2015
# A KeyError here indicates that someone has triggered an index
2016
# reload, and this index has gone missing, we need to start
2018
if self._reload_func is None:
2019
# If we don't have a _reload_func there is nothing that can
2022
raise errors.RetryWithNewPacks(index,
2023
reload_occurred=True,
2024
exc_info=sys.exc_info())
2026
reader = pack.make_readv_reader(transport, path, offsets)
2027
for names, read_func in reader.iter_records():
2028
yield read_func(None)
2029
except errors.NoSuchFile:
2030
# A NoSuchFile error indicates that a pack file has gone
2031
# missing on disk, we need to trigger a reload, and start over.
2032
if self._reload_func is None:
2034
raise errors.RetryWithNewPacks(transport.abspath(path),
2035
reload_occurred=False,
2036
exc_info=sys.exc_info())
2038
def set_writer(self, writer, index, transport_packname):
2039
"""Set a writer to use for adding data."""
2040
if index is not None:
2041
self._indices[index] = transport_packname
2042
self._container_writer = writer
2043
self._write_index = index
2045
def reload_or_raise(self, retry_exc):
2046
"""Try calling the reload function, or re-raise the original exception.
2048
This should be called after _DirectPackAccess raises a
2049
RetryWithNewPacks exception. This function will handle the common logic
2050
of determining when the error is fatal versus being temporary.
2051
It will also make sure that the original exception is raised, rather
2052
than the RetryWithNewPacks exception.
2054
If this function returns, then the calling function should retry
2055
whatever operation was being performed. Otherwise an exception will
2058
:param retry_exc: A RetryWithNewPacks exception.
2061
if self._reload_func is None:
2063
elif not self._reload_func():
2064
# The reload claimed that nothing changed
2065
if not retry_exc.reload_occurred:
2066
# If there wasn't an earlier reload, then we really were
2067
# expecting to find changes. We didn't find them, so this is a
2071
exc_class, exc_value, exc_traceback = retry_exc.exc_info
2072
raise exc_class, exc_value, exc_traceback
2463
class RepositoryFormatKnitPack1(RepositoryFormatPack):
2464
"""A no-subtrees parameterized Pack repository.
2466
This format was introduced in 0.92.
2469
repository_class = KnitPackRepository
2470
_commit_builder_class = PackCommitBuilder
2472
def _serializer(self):
2473
return xml5.serializer_v5
2474
# What index classes to use
2475
index_builder_class = InMemoryGraphIndex
2476
index_class = GraphIndex
2478
def _get_matching_bzrdir(self):
2479
return bzrdir.format_registry.make_bzrdir('pack-0.92')
2481
def _ignore_setting_bzrdir(self, format):
2484
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2486
def get_format_string(self):
2487
"""See RepositoryFormat.get_format_string()."""
2488
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
2490
def get_format_description(self):
2491
"""See RepositoryFormat.get_format_description()."""
2492
return "Packs containing knits without subtree support"
2494
def check_conversion_target(self, target_format):
2498
class RepositoryFormatKnitPack3(RepositoryFormatPack):
2499
"""A subtrees parameterized Pack repository.
2501
This repository format uses the xml7 serializer to get:
2502
- support for recording full info about the tree root
2503
- support for recording tree-references
2505
This format was introduced in 0.92.
2508
repository_class = KnitPackRepository
2509
_commit_builder_class = PackRootCommitBuilder
2510
rich_root_data = True
2511
supports_tree_reference = True
2513
def _serializer(self):
2514
return xml7.serializer_v7
2515
# What index classes to use
2516
index_builder_class = InMemoryGraphIndex
2517
index_class = GraphIndex
2519
def _get_matching_bzrdir(self):
2520
return bzrdir.format_registry.make_bzrdir(
2521
'pack-0.92-subtree')
2523
def _ignore_setting_bzrdir(self, format):
2526
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2528
def check_conversion_target(self, target_format):
2529
if not target_format.rich_root_data:
2530
raise errors.BadConversionTarget(
2531
'Does not support rich root data.', target_format)
2532
if not getattr(target_format, 'supports_tree_reference', False):
2533
raise errors.BadConversionTarget(
2534
'Does not support nested trees', target_format)
2536
def get_format_string(self):
2537
"""See RepositoryFormat.get_format_string()."""
2538
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2540
def get_format_description(self):
2541
"""See RepositoryFormat.get_format_description()."""
2542
return "Packs containing knits with subtree support\n"
2545
class RepositoryFormatKnitPack4(RepositoryFormatPack):
2546
"""A rich-root, no subtrees parameterized Pack repository.
2548
This repository format uses the xml6 serializer to get:
2549
- support for recording full info about the tree root
2551
This format was introduced in 1.0.
2554
repository_class = KnitPackRepository
2555
_commit_builder_class = PackRootCommitBuilder
2556
rich_root_data = True
2557
supports_tree_reference = False
2559
def _serializer(self):
2560
return xml6.serializer_v6
2561
# What index classes to use
2562
index_builder_class = InMemoryGraphIndex
2563
index_class = GraphIndex
2565
def _get_matching_bzrdir(self):
2566
return bzrdir.format_registry.make_bzrdir(
2569
def _ignore_setting_bzrdir(self, format):
2572
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2574
def check_conversion_target(self, target_format):
2575
if not target_format.rich_root_data:
2576
raise errors.BadConversionTarget(
2577
'Does not support rich root data.', target_format)
2579
def get_format_string(self):
2580
"""See RepositoryFormat.get_format_string()."""
2581
return ("Bazaar pack repository format 1 with rich root"
2582
" (needs bzr 1.0)\n")
2584
def get_format_description(self):
2585
"""See RepositoryFormat.get_format_description()."""
2586
return "Packs containing knits with rich root support\n"
2589
class RepositoryFormatKnitPack5(RepositoryFormatPack):
2590
"""Repository that supports external references to allow stacking.
2594
Supports external lookups, which results in non-truncated ghosts after
2595
reconcile compared to pack-0.92 formats.
2598
repository_class = KnitPackRepository
2599
_commit_builder_class = PackCommitBuilder
2600
supports_external_lookups = True
2601
# What index classes to use
2602
index_builder_class = InMemoryGraphIndex
2603
index_class = GraphIndex
2606
def _serializer(self):
2607
return xml5.serializer_v5
2609
def _get_matching_bzrdir(self):
2610
return bzrdir.format_registry.make_bzrdir('1.6')
2612
def _ignore_setting_bzrdir(self, format):
2615
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2617
def get_format_string(self):
2618
"""See RepositoryFormat.get_format_string()."""
2619
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
2621
def get_format_description(self):
2622
"""See RepositoryFormat.get_format_description()."""
2623
return "Packs 5 (adds stacking support, requires bzr 1.6)"
2625
def check_conversion_target(self, target_format):
2629
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
2630
"""A repository with rich roots and stacking.
2632
New in release 1.6.1.
2634
Supports stacking on other repositories, allowing data to be accessed
2635
without being stored locally.
2638
repository_class = KnitPackRepository
2639
_commit_builder_class = PackRootCommitBuilder
2640
rich_root_data = True
2641
supports_tree_reference = False # no subtrees
2642
supports_external_lookups = True
2643
# What index classes to use
2644
index_builder_class = InMemoryGraphIndex
2645
index_class = GraphIndex
2648
def _serializer(self):
2649
return xml6.serializer_v6
2651
def _get_matching_bzrdir(self):
2652
return bzrdir.format_registry.make_bzrdir(
2655
def _ignore_setting_bzrdir(self, format):
2658
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2660
def check_conversion_target(self, target_format):
2661
if not target_format.rich_root_data:
2662
raise errors.BadConversionTarget(
2663
'Does not support rich root data.', target_format)
2665
def get_format_string(self):
2666
"""See RepositoryFormat.get_format_string()."""
2667
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
2669
def get_format_description(self):
2670
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
2673
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
2674
"""A repository with rich roots and external references.
2678
Supports external lookups, which results in non-truncated ghosts after
2679
reconcile compared to pack-0.92 formats.
2681
This format was deprecated because the serializer it uses accidentally
2682
supported subtrees, when the format was not intended to. This meant that
2683
someone could accidentally fetch from an incorrect repository.
2686
repository_class = KnitPackRepository
2687
_commit_builder_class = PackRootCommitBuilder
2688
rich_root_data = True
2689
supports_tree_reference = False # no subtrees
2691
supports_external_lookups = True
2692
# What index classes to use
2693
index_builder_class = InMemoryGraphIndex
2694
index_class = GraphIndex
2697
def _serializer(self):
2698
return xml7.serializer_v7
2700
def _get_matching_bzrdir(self):
2701
matching = bzrdir.format_registry.make_bzrdir(
2703
matching.repository_format = self
2706
def _ignore_setting_bzrdir(self, format):
2709
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2711
def check_conversion_target(self, target_format):
2712
if not target_format.rich_root_data:
2713
raise errors.BadConversionTarget(
2714
'Does not support rich root data.', target_format)
2716
def get_format_string(self):
2717
"""See RepositoryFormat.get_format_string()."""
2718
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
2720
def get_format_description(self):
2721
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
2725
class RepositoryFormatKnitPack6(RepositoryFormatPack):
2726
"""A repository with stacking and btree indexes,
2727
without rich roots or subtrees.
2729
This is equivalent to pack-1.6 with B+Tree indices.
2732
repository_class = KnitPackRepository
2733
_commit_builder_class = PackCommitBuilder
2734
supports_external_lookups = True
2735
# What index classes to use
2736
index_builder_class = BTreeBuilder
2737
index_class = BTreeGraphIndex
2740
def _serializer(self):
2741
return xml5.serializer_v5
2743
def _get_matching_bzrdir(self):
2744
return bzrdir.format_registry.make_bzrdir('1.9')
2746
def _ignore_setting_bzrdir(self, format):
2749
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2751
def get_format_string(self):
2752
"""See RepositoryFormat.get_format_string()."""
2753
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
2755
def get_format_description(self):
2756
"""See RepositoryFormat.get_format_description()."""
2757
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2759
def check_conversion_target(self, target_format):
2763
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2764
"""A repository with rich roots, no subtrees, stacking and btree indexes.
2766
1.6-rich-root with B+Tree indices.
2769
repository_class = KnitPackRepository
2770
_commit_builder_class = PackRootCommitBuilder
2771
rich_root_data = True
2772
supports_tree_reference = False # no subtrees
2773
supports_external_lookups = True
2774
# What index classes to use
2775
index_builder_class = BTreeBuilder
2776
index_class = BTreeGraphIndex
2779
def _serializer(self):
2780
return xml6.serializer_v6
2782
def _get_matching_bzrdir(self):
2783
return bzrdir.format_registry.make_bzrdir(
2786
def _ignore_setting_bzrdir(self, format):
2789
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2791
def check_conversion_target(self, target_format):
2792
if not target_format.rich_root_data:
2793
raise errors.BadConversionTarget(
2794
'Does not support rich root data.', target_format)
2796
def get_format_string(self):
2797
"""See RepositoryFormat.get_format_string()."""
2798
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2800
def get_format_description(self):
2801
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2804
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2805
"""A subtrees development repository.
2807
This format should be retained until the second release after bzr 1.7.
2809
1.6.1-subtree[as it might have been] with B+Tree indices.
2811
This is [now] retained until we have a CHK based subtree format in
2815
repository_class = KnitPackRepository
2816
_commit_builder_class = PackRootCommitBuilder
2817
rich_root_data = True
2818
supports_tree_reference = True
2819
supports_external_lookups = True
2820
# What index classes to use
2821
index_builder_class = BTreeBuilder
2822
index_class = BTreeGraphIndex
2825
def _serializer(self):
2826
return xml7.serializer_v7
2828
def _get_matching_bzrdir(self):
2829
return bzrdir.format_registry.make_bzrdir(
2830
'development-subtree')
2832
def _ignore_setting_bzrdir(self, format):
2835
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2837
def check_conversion_target(self, target_format):
2838
if not target_format.rich_root_data:
2839
raise errors.BadConversionTarget(
2840
'Does not support rich root data.', target_format)
2841
if not getattr(target_format, 'supports_tree_reference', False):
2842
raise errors.BadConversionTarget(
2843
'Does not support nested trees', target_format)
2845
def get_format_string(self):
2846
"""See RepositoryFormat.get_format_string()."""
2847
return ("Bazaar development format 2 with subtree support "
2848
"(needs bzr.dev from before 1.8)\n")
2850
def get_format_description(self):
2851
"""See RepositoryFormat.get_format_description()."""
2852
return ("Development repository format, currently the same as "
2853
"1.6.1-subtree with B+Tree indices.\n")