743
836
Sets self._text_filter appropriately.
745
raise NotImplementedError(self._copy_inventory_texts)
838
# select inventory keys
839
inv_keys = self._revision_keys # currently the same keyspace, and note that
840
# querying for keys here could introduce a bug where an inventory item
841
# is missed, so do not change it to query separately without cross
842
# checking like the text key check below.
843
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
845
inv_nodes = self._index_contents(inventory_indices, inv_keys)
846
# copy inventory keys and adjust values
847
# XXX: Should be a helper function to allow different inv representation
849
self.pb.update("Copying inventory texts", 2)
850
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
851
# Only grab the output lines if we will be processing them
852
output_lines = bool(self.revision_ids)
853
inv_lines = self._copy_nodes_graph(inventory_index_map,
854
self.new_pack._writer, self.new_pack.inventory_index,
855
readv_group_iter, total_items, output_lines=output_lines)
856
if self.revision_ids:
857
self._process_inventory_lines(inv_lines)
859
# eat the iterator to cause it to execute.
861
self._text_filter = None
862
if 'pack' in debug.debug_flags:
863
mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
864
time.ctime(), self._pack_collection._upload_transport.base,
865
self.new_pack.random_name,
866
self.new_pack.inventory_index.key_count(),
867
time.time() - self.new_pack.start_time)
747
869
def _copy_text_texts(self):
748
raise NotImplementedError(self._copy_text_texts)
871
text_index_map, text_nodes = self._get_text_nodes()
872
if self._text_filter is not None:
873
# We could return the keys copied as part of the return value from
874
# _copy_nodes_graph but this doesn't work all that well with the
875
# need to get line output too, so we check separately, and as we're
876
# going to buffer everything anyway, we check beforehand, which
877
# saves reading knit data over the wire when we know there are
879
text_nodes = set(text_nodes)
880
present_text_keys = set(_node[1] for _node in text_nodes)
881
missing_text_keys = set(self._text_filter) - present_text_keys
882
if missing_text_keys:
883
# TODO: raise a specific error that can handle many missing
885
mutter("missing keys during fetch: %r", missing_text_keys)
886
a_missing_key = missing_text_keys.pop()
887
raise errors.RevisionNotPresent(a_missing_key[1],
889
# copy text keys and adjust values
890
self.pb.update("Copying content texts", 3)
891
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
892
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
893
self.new_pack.text_index, readv_group_iter, total_items))
894
self._log_copied_texts()
750
896
def _create_pack_from_packs(self):
751
raise NotImplementedError(self._create_pack_from_packs)
897
self.pb.update("Opening pack", 0, 5)
898
self.new_pack = self.open_pack()
899
new_pack = self.new_pack
900
# buffer data - we won't be reading-back during the pack creation and
901
# this makes a significant difference on sftp pushes.
902
new_pack.set_write_cache_size(1024*1024)
903
if 'pack' in debug.debug_flags:
904
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
905
for a_pack in self.packs]
906
if self.revision_ids is not None:
907
rev_count = len(self.revision_ids)
910
mutter('%s: create_pack: creating pack from source packs: '
911
'%s%s %s revisions wanted %s t=0',
912
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
913
plain_pack_list, rev_count)
914
self._copy_revision_texts()
915
self._copy_inventory_texts()
916
self._copy_text_texts()
917
# select signature keys
918
signature_filter = self._revision_keys # same keyspace
919
signature_index_map, signature_indices = self._pack_map_and_index_list(
921
signature_nodes = self._index_contents(signature_indices,
923
# copy signature keys and adjust values
924
self.pb.update("Copying signature texts", 4)
925
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
926
new_pack.signature_index)
927
if 'pack' in debug.debug_flags:
928
mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
929
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
930
new_pack.signature_index.key_count(),
931
time.time() - new_pack.start_time)
933
# NB XXX: how to check CHK references are present? perhaps by yielding
934
# the items? How should that interact with stacked repos?
935
if new_pack.chk_index is not None:
937
if 'pack' in debug.debug_flags:
938
mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',
939
time.ctime(), self._pack_collection._upload_transport.base,
940
new_pack.random_name,
941
new_pack.chk_index.key_count(),
942
time.time() - new_pack.start_time)
943
new_pack._check_references()
944
if not self._use_pack(new_pack):
947
self.pb.update("Finishing pack", 5)
949
self._pack_collection.allocate(new_pack)
952
def _copy_chks(self, refs=None):
953
# XXX: Todo, recursive follow-pointers facility when fetching some
955
chk_index_map, chk_indices = self._pack_map_and_index_list(
957
chk_nodes = self._index_contents(chk_indices, refs)
959
# TODO: This isn't strictly tasteful as we are accessing some private
960
# variables (_serializer). Perhaps a better way would be to have
961
# Repository._deserialise_chk_node()
962
search_key_func = chk_map.search_key_registry.get(
963
self._pack_collection.repo._serializer.search_key_name)
964
def accumlate_refs(lines):
965
# XXX: move to a generic location
967
bytes = ''.join(lines)
968
node = chk_map._deserialise(bytes, ("unknown",), search_key_func)
969
new_refs.update(node.refs())
970
self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
971
self.new_pack.chk_index, output_lines=accumlate_refs)
974
def _copy_nodes(self, nodes, index_map, writer, write_index,
976
"""Copy knit nodes between packs with no graph references.
978
:param output_lines: Output full texts of copied items.
980
pb = ui.ui_factory.nested_progress_bar()
982
return self._do_copy_nodes(nodes, index_map, writer,
983
write_index, pb, output_lines=output_lines)
987
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
989
# for record verification
990
knit = KnitVersionedFiles(None, None)
991
# plan a readv on each source pack:
993
nodes = sorted(nodes)
994
# how to map this into knit.py - or knit.py into this?
995
# we don't want the typical knit logic, we want grouping by pack
996
# at this point - perhaps a helper library for the following code
997
# duplication points?
999
for index, key, value in nodes:
1000
if index not in request_groups:
1001
request_groups[index] = []
1002
request_groups[index].append((key, value))
1004
pb.update("Copied record", record_index, len(nodes))
1005
for index, items in request_groups.iteritems():
1006
pack_readv_requests = []
1007
for key, value in items:
1008
# ---- KnitGraphIndex.get_position
1009
bits = value[1:].split(' ')
1010
offset, length = int(bits[0]), int(bits[1])
1011
pack_readv_requests.append((offset, length, (key, value[0])))
1012
# linear scan up the pack
1013
pack_readv_requests.sort()
1015
pack_obj = index_map[index]
1016
transport, path = pack_obj.access_tuple()
1018
reader = pack.make_readv_reader(transport, path,
1019
[offset[0:2] for offset in pack_readv_requests])
1020
except errors.NoSuchFile:
1021
if self._reload_func is not None:
1024
for (names, read_func), (_1, _2, (key, eol_flag)) in \
1025
izip(reader.iter_records(), pack_readv_requests):
1026
raw_data = read_func(None)
1027
# check the header only
1028
if output_lines is not None:
1029
output_lines(knit._parse_record(key[-1], raw_data)[0])
1031
df, _ = knit._parse_record_header(key, raw_data)
1033
pos, size = writer.add_bytes_record(raw_data, names)
1034
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
1035
pb.update("Copied record", record_index)
1038
def _copy_nodes_graph(self, index_map, writer, write_index,
1039
readv_group_iter, total_items, output_lines=False):
1040
"""Copy knit nodes between packs.
1042
:param output_lines: Return lines present in the copied data as
1043
an iterator of line,version_id.
1045
pb = ui.ui_factory.nested_progress_bar()
1047
for result in self._do_copy_nodes_graph(index_map, writer,
1048
write_index, output_lines, pb, readv_group_iter, total_items):
1051
# Python 2.4 does not permit try:finally: in a generator.
1057
def _do_copy_nodes_graph(self, index_map, writer, write_index,
1058
output_lines, pb, readv_group_iter, total_items):
1059
# for record verification
1060
knit = KnitVersionedFiles(None, None)
1061
# for line extraction when requested (inventories only)
1063
factory = KnitPlainFactory()
1065
pb.update("Copied record", record_index, total_items)
1066
for index, readv_vector, node_vector in readv_group_iter:
1068
pack_obj = index_map[index]
1069
transport, path = pack_obj.access_tuple()
1071
reader = pack.make_readv_reader(transport, path, readv_vector)
1072
except errors.NoSuchFile:
1073
if self._reload_func is not None:
1076
for (names, read_func), (key, eol_flag, references) in \
1077
izip(reader.iter_records(), node_vector):
1078
raw_data = read_func(None)
1080
# read the entire thing
1081
content, _ = knit._parse_record(key[-1], raw_data)
1082
if len(references[-1]) == 0:
1083
line_iterator = factory.get_fulltext_content(content)
1085
line_iterator = factory.get_linedelta_content(content)
1086
for line in line_iterator:
1089
# check the header only
1090
df, _ = knit._parse_record_header(key, raw_data)
1092
pos, size = writer.add_bytes_record(raw_data, names)
1093
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
1094
pb.update("Copied record", record_index)
1097
def _get_text_nodes(self):
1098
text_index_map, text_indices = self._pack_map_and_index_list(
1100
return text_index_map, self._index_contents(text_indices,
1103
def _least_readv_node_readv(self, nodes):
1104
"""Generate request groups for nodes using the least readv's.
1106
:param nodes: An iterable of graph index nodes.
1107
:return: Total node count and an iterator of the data needed to perform
1108
readvs to obtain the data for nodes. Each item yielded by the
1109
iterator is a tuple with:
1110
index, readv_vector, node_vector. readv_vector is a list ready to
1111
hand to the transport readv method, and node_vector is a list of
1112
(key, eol_flag, references) for the the node retrieved by the
1113
matching readv_vector.
1115
# group by pack so we do one readv per pack
1116
nodes = sorted(nodes)
1119
for index, key, value, references in nodes:
1120
if index not in request_groups:
1121
request_groups[index] = []
1122
request_groups[index].append((key, value, references))
1124
for index, items in request_groups.iteritems():
1125
pack_readv_requests = []
1126
for key, value, references in items:
1127
# ---- KnitGraphIndex.get_position
1128
bits = value[1:].split(' ')
1129
offset, length = int(bits[0]), int(bits[1])
1130
pack_readv_requests.append(
1131
((offset, length), (key, value[0], references)))
1132
# linear scan up the pack to maximum range combining.
1133
pack_readv_requests.sort()
1134
# split out the readv and the node data.
1135
pack_readv = [readv for readv, node in pack_readv_requests]
1136
node_vector = [node for readv, node in pack_readv_requests]
1137
result.append((index, pack_readv, node_vector))
1138
return total, result
753
1140
def _log_copied_texts(self):
754
1141
if 'pack' in debug.debug_flags:
767
1173
return new_pack.data_inserted()
1176
class OptimisingPacker(Packer):
1177
"""A packer which spends more time to create better disk layouts."""
1179
def _revision_node_readv(self, revision_nodes):
1180
"""Return the total revisions and the readv's to issue.
1182
This sort places revisions in topological order with the ancestors
1185
:param revision_nodes: The revision index contents for the packs being
1186
incorporated into the new pack.
1187
:return: As per _least_readv_node_readv.
1189
# build an ancestors dict
1192
for index, key, value, references in revision_nodes:
1193
ancestors[key] = references[0]
1194
by_key[key] = (index, value, references)
1195
order = tsort.topo_sort(ancestors)
1197
# Single IO is pathological, but it will work as a starting point.
1199
for key in reversed(order):
1200
index, value, references = by_key[key]
1201
# ---- KnitGraphIndex.get_position
1202
bits = value[1:].split(' ')
1203
offset, length = int(bits[0]), int(bits[1])
1205
(index, [(offset, length)], [(key, value[0], references)]))
1206
# TODO: combine requests in the same index that are in ascending order.
1207
return total, requests
1209
def open_pack(self):
1210
"""Open a pack for the pack we are creating."""
1211
new_pack = super(OptimisingPacker, self).open_pack()
1212
# Turn on the optimization flags for all the index builders.
1213
new_pack.revision_index.set_optimize(for_size=True)
1214
new_pack.inventory_index.set_optimize(for_size=True)
1215
new_pack.text_index.set_optimize(for_size=True)
1216
new_pack.signature_index.set_optimize(for_size=True)
1220
class ReconcilePacker(Packer):
1221
"""A packer which regenerates indices etc as it copies.
1223
This is used by ``bzr reconcile`` to cause parent text pointers to be
1227
def _extra_init(self):
1228
self._data_changed = False
1230
def _process_inventory_lines(self, inv_lines):
1231
"""Generate a text key reference map rather for reconciling with."""
1232
repo = self._pack_collection.repo
1233
refs = repo._find_text_key_references_from_xml_inventory_lines(
1235
self._text_refs = refs
1236
# during reconcile we:
1237
# - convert unreferenced texts to full texts
1238
# - correct texts which reference a text not copied to be full texts
1239
# - copy all others as-is but with corrected parents.
1240
# - so at this point we don't know enough to decide what becomes a full
1242
self._text_filter = None
1244
def _copy_text_texts(self):
1245
"""generate what texts we should have and then copy."""
1246
self.pb.update("Copying content texts", 3)
1247
# we have three major tasks here:
1248
# 1) generate the ideal index
1249
repo = self._pack_collection.repo
1250
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
1251
_1, key, _2, refs in
1252
self.new_pack.revision_index.iter_all_entries()])
1253
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
1254
# 2) generate a text_nodes list that contains all the deltas that can
1255
# be used as-is, with corrected parents.
1258
discarded_nodes = []
1259
NULL_REVISION = _mod_revision.NULL_REVISION
1260
text_index_map, text_nodes = self._get_text_nodes()
1261
for node in text_nodes:
1267
ideal_parents = tuple(ideal_index[node[1]])
1269
discarded_nodes.append(node)
1270
self._data_changed = True
1272
if ideal_parents == (NULL_REVISION,):
1274
if ideal_parents == node[3][0]:
1276
ok_nodes.append(node)
1277
elif ideal_parents[0:1] == node[3][0][0:1]:
1278
# the left most parent is the same, or there are no parents
1279
# today. Either way, we can preserve the representation as
1280
# long as we change the refs to be inserted.
1281
self._data_changed = True
1282
ok_nodes.append((node[0], node[1], node[2],
1283
(ideal_parents, node[3][1])))
1284
self._data_changed = True
1286
# Reinsert this text completely
1287
bad_texts.append((node[1], ideal_parents))
1288
self._data_changed = True
1289
# we're finished with some data.
1292
# 3) bulk copy the ok data
1293
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1294
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1295
self.new_pack.text_index, readv_group_iter, total_items))
1296
# 4) adhoc copy all the other texts.
1297
# We have to topologically insert all texts otherwise we can fail to
1298
# reconcile when parts of a single delta chain are preserved intact,
1299
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1300
# reinserted, and if d3 has incorrect parents it will also be
1301
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1302
# copied), so we will try to delta, but d2 is not currently able to be
1303
# extracted because it's basis d1 is not present. Topologically sorting
1304
# addresses this. The following generates a sort for all the texts that
1305
# are being inserted without having to reference the entire text key
1306
# space (we only topo sort the revisions, which is smaller).
1307
topo_order = tsort.topo_sort(ancestors)
1308
rev_order = dict(zip(topo_order, range(len(topo_order))))
1309
bad_texts.sort(key=lambda key:rev_order[key[0][1]])
1310
transaction = repo.get_transaction()
1311
file_id_index = GraphIndexPrefixAdapter(
1312
self.new_pack.text_index,
1314
add_nodes_callback=self.new_pack.text_index.add_nodes)
1315
data_access = _DirectPackAccess(
1316
{self.new_pack.text_index:self.new_pack.access_tuple()})
1317
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1318
self.new_pack.access_tuple())
1319
output_texts = KnitVersionedFiles(
1320
_KnitGraphIndex(self.new_pack.text_index,
1321
add_callback=self.new_pack.text_index.add_nodes,
1322
deltas=True, parents=True, is_locked=repo.is_locked),
1323
data_access=data_access, max_delta_chain=200)
1324
for key, parent_keys in bad_texts:
1325
# We refer to the new pack to delta data being output.
1326
# A possible improvement would be to catch errors on short reads
1327
# and only flush then.
1328
self.new_pack.flush()
1330
for parent_key in parent_keys:
1331
if parent_key[0] != key[0]:
1332
# Graph parents must match the fileid
1333
raise errors.BzrError('Mismatched key parent %r:%r' %
1335
parents.append(parent_key[1])
1336
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1337
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1338
output_texts.add_lines(key, parent_keys, text_lines,
1339
random_id=True, check_content=False)
1340
# 5) check that nothing inserted has a reference outside the keyspace.
1341
missing_text_keys = self.new_pack.text_index._external_references()
1342
if missing_text_keys:
1343
raise errors.BzrCheckError('Reference to missing compression parents %r'
1344
% (missing_text_keys,))
1345
self._log_copied_texts()
1347
def _use_pack(self, new_pack):
1348
"""Override _use_pack to check for reconcile having changed content."""
1349
# XXX: we might be better checking this at the copy time.
1350
original_inventory_keys = set()
1351
inv_index = self._pack_collection.inventory_index.combined_index
1352
for entry in inv_index.iter_all_entries():
1353
original_inventory_keys.add(entry[1])
1354
new_inventory_keys = set()
1355
for entry in new_pack.inventory_index.iter_all_entries():
1356
new_inventory_keys.add(entry[1])
1357
if new_inventory_keys != original_inventory_keys:
1358
self._data_changed = True
1359
return new_pack.data_inserted() and self._data_changed
770
1362
class RepositoryPackCollection(object):
771
1363
"""Management of packs within a repository.
773
1365
:ivar _names: map of {pack_name: (index_size,)}
777
resumed_pack_factory = None
778
normal_packer_class = None
779
optimising_packer_class = None
1368
pack_factory = NewPack
1369
resumed_pack_factory = ResumedPack
781
1371
def __init__(self, repo, transport, index_transport, upload_transport,
782
1372
pack_transport, index_builder_class, index_class,
1681
# These attributes are inherited from the Repository base class. Setting
1682
# them to None ensures that if the constructor is changed to not initialize
1683
# them, or a subclass fails to call the constructor, that an error will
1684
# occur rather than the system working but generating incorrect data.
1685
_commit_builder_class = None
1688
2145
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
1690
MetaDirRepository.__init__(self, _format, a_bzrdir, control_files)
1691
self._commit_builder_class = _commit_builder_class
1692
self._serializer = _serializer
2147
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
2148
_commit_builder_class, _serializer)
2149
index_transport = self._transport.clone('indices')
2150
self._pack_collection = RepositoryPackCollection(self, self._transport,
2152
self._transport.clone('upload'),
2153
self._transport.clone('packs'),
2154
_format.index_builder_class,
2155
_format.index_class,
2156
use_chk_index=self._format.supports_chks,
2158
self.inventories = KnitVersionedFiles(
2159
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
2160
add_callback=self._pack_collection.inventory_index.add_callback,
2161
deltas=True, parents=True, is_locked=self.is_locked),
2162
data_access=self._pack_collection.inventory_index.data_access,
2163
max_delta_chain=200)
2164
self.revisions = KnitVersionedFiles(
2165
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
2166
add_callback=self._pack_collection.revision_index.add_callback,
2167
deltas=False, parents=True, is_locked=self.is_locked,
2168
track_external_parent_refs=True),
2169
data_access=self._pack_collection.revision_index.data_access,
2171
self.signatures = KnitVersionedFiles(
2172
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
2173
add_callback=self._pack_collection.signature_index.add_callback,
2174
deltas=False, parents=False, is_locked=self.is_locked),
2175
data_access=self._pack_collection.signature_index.data_access,
2177
self.texts = KnitVersionedFiles(
2178
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
2179
add_callback=self._pack_collection.text_index.add_callback,
2180
deltas=True, parents=True, is_locked=self.is_locked),
2181
data_access=self._pack_collection.text_index.data_access,
2182
max_delta_chain=200)
2183
if _format.supports_chks:
2184
# No graph, no compression:- references from chks are between
2185
# different objects not temporal versions of the same; and without
2186
# some sort of temporal structure knit compression will just fail.
2187
self.chk_bytes = KnitVersionedFiles(
2188
_KnitGraphIndex(self._pack_collection.chk_index.combined_index,
2189
add_callback=self._pack_collection.chk_index.add_callback,
2190
deltas=False, parents=False, is_locked=self.is_locked),
2191
data_access=self._pack_collection.chk_index.data_access,
2194
self.chk_bytes = None
2195
# True when the repository object is 'write locked' (as opposed to the
2196
# physical lock only taken out around changes to the pack-names list.)
2197
# Another way to represent this would be a decorator around the control
2198
# files object that presents logical locks as physical ones - if this
2199
# gets ugly consider that alternative design. RBC 20071011
2200
self._write_lock_count = 0
2201
self._transaction = None
2203
self._reconcile_does_inventory_gc = True
1693
2204
self._reconcile_fixes_text_parents = True
1694
if self._format.supports_external_lookups:
1695
self._unstacked_provider = graph.CachingParentsProvider(
1696
self._make_parents_provider_unstacked())
1698
self._unstacked_provider = graph.CachingParentsProvider(self)
1699
self._unstacked_provider.disable_cache()
2205
self._reconcile_backsup_inventory = False
1702
def _all_revision_ids(self):
1703
"""See Repository.all_revision_ids()."""
1704
return [key[0] for key in self.revisions.keys()]
2207
def _warn_if_deprecated(self):
2208
# This class isn't deprecated, but one sub-format is
2209
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
2210
from bzrlib import repository
2211
if repository._deprecation_warning_done:
2213
repository._deprecation_warning_done = True
2214
warning("Format %s for %s is deprecated - please use"
2215
" 'bzr upgrade --1.6.1-rich-root'"
2216
% (self._format, self.bzrdir.transport.base))
1706
2218
def _abort_write_group(self):
1707
self.revisions._index._key_dependencies.clear()
2219
self.revisions._index._key_dependencies.refs.clear()
1708
2220
self._pack_collection._abort_write_group()
2222
def _find_inconsistent_revision_parents(self):
2223
"""Find revisions with incorrectly cached parents.
2225
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
2226
parents-in-revision).
2228
if not self.is_locked():
2229
raise errors.ObjectNotLocked(self)
2230
pb = ui.ui_factory.nested_progress_bar()
2233
revision_nodes = self._pack_collection.revision_index \
2234
.combined_index.iter_all_entries()
2235
index_positions = []
2236
# Get the cached index values for all revisions, and also the
2237
# location in each index of the revision text so we can perform
2239
for index, key, value, refs in revision_nodes:
2240
node = (index, key, value, refs)
2241
index_memo = self.revisions._index._node_to_position(node)
2242
if index_memo[0] != index:
2243
raise AssertionError('%r != %r' % (index_memo[0], index))
2244
index_positions.append((index_memo, key[0],
2245
tuple(parent[0] for parent in refs[0])))
2246
pb.update("Reading revision index", 0, 0)
2247
index_positions.sort()
2249
pb.update("Checking cached revision graph", 0,
2250
len(index_positions))
2251
for offset in xrange(0, len(index_positions), 1000):
2252
pb.update("Checking cached revision graph", offset)
2253
to_query = index_positions[offset:offset + batch_size]
2256
rev_ids = [item[1] for item in to_query]
2257
revs = self.get_revisions(rev_ids)
2258
for revision, item in zip(revs, to_query):
2259
index_parents = item[2]
2260
rev_parents = tuple(revision.parent_ids)
2261
if index_parents != rev_parents:
2262
result.append((revision.revision_id, index_parents,
1710
2268
def _make_parents_provider(self):
1711
if not self._format.supports_external_lookups:
1712
return self._unstacked_provider
1713
return graph.StackedParentsProvider(_LazyListJoin(
1714
[self._unstacked_provider], self._fallback_repositories))
2269
return graph.CachingParentsProvider(self)
1716
2271
def _refresh_data(self):
1717
2272
if not self.is_locked():
1719
2274
self._pack_collection.reload_pack_names()
1720
self._unstacked_provider.disable_cache()
1721
self._unstacked_provider.enable_cache()
1723
2276
def _start_write_group(self):
1724
2277
self._pack_collection._start_write_group()
1726
2279
def _commit_write_group(self):
1727
hint = self._pack_collection._commit_write_group()
1728
self.revisions._index._key_dependencies.clear()
1729
# The commit may have added keys that were previously cached as
1730
# missing, so reset the cache.
1731
self._unstacked_provider.disable_cache()
1732
self._unstacked_provider.enable_cache()
2280
self.revisions._index._key_dependencies.refs.clear()
2281
return self._pack_collection._commit_write_group()
1735
2283
def suspend_write_group(self):
1736
2284
# XXX check self._write_group is self.get_transaction()?
1737
2285
tokens = self._pack_collection._suspend_write_group()
1738
self.revisions._index._key_dependencies.clear()
2286
self.revisions._index._key_dependencies.refs.clear()
1739
2287
self._write_group = None
1742
2290
def _resume_write_group(self, tokens):
1743
2291
self._start_write_group()
1745
self._pack_collection._resume_write_group(tokens)
1746
except errors.UnresumableWriteGroup:
1747
self._abort_write_group()
2292
self._pack_collection._resume_write_group(tokens)
1749
2293
for pack in self._pack_collection._resumed_packs:
1750
2294
self.revisions._index.scan_unvalidated_index(pack.revision_index)
1936
2456
_serializer=self._serializer)
1939
class RetryPackOperations(errors.RetryWithNewPacks):
1940
"""Raised when we are packing and we find a missing file.
1942
Meant as a signaling exception, to tell the RepositoryPackCollection.pack
1943
code it should try again.
1946
internal_error = True
1948
_fmt = ("Pack files have changed, reload and try pack again."
1949
" context: %(context)s %(orig_error)s")
1952
class _DirectPackAccess(object):
1953
"""Access to data in one or more packs with less translation."""
1955
def __init__(self, index_to_packs, reload_func=None, flush_func=None):
1956
"""Create a _DirectPackAccess object.
1958
:param index_to_packs: A dict mapping index objects to the transport
1959
and file names for obtaining data.
1960
:param reload_func: A function to call if we determine that the pack
1961
files have moved and we need to reload our caches. See
1962
bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.
1964
self._container_writer = None
1965
self._write_index = None
1966
self._indices = index_to_packs
1967
self._reload_func = reload_func
1968
self._flush_func = flush_func
1970
def add_raw_records(self, key_sizes, raw_data):
1971
"""Add raw knit bytes to a storage area.
1973
The data is spooled to the container writer in one bytes-record per
1976
:param sizes: An iterable of tuples containing the key and size of each
1978
:param raw_data: A bytestring containing the data.
1979
:return: A list of memos to retrieve the record later. Each memo is an
1980
opaque index memo. For _DirectPackAccess the memo is (index, pos,
1981
length), where the index field is the write_index object supplied
1982
to the PackAccess object.
1984
if type(raw_data) is not str:
1985
raise AssertionError(
1986
'data must be plain bytes was %s' % type(raw_data))
1989
for key, size in key_sizes:
1990
p_offset, p_length = self._container_writer.add_bytes_record(
1991
raw_data[offset:offset+size], [])
1993
result.append((self._write_index, p_offset, p_length))
1997
"""Flush pending writes on this access object.
1999
This will flush any buffered writes to a NewPack.
2001
if self._flush_func is not None:
2004
def get_raw_records(self, memos_for_retrieval):
2005
"""Get the raw bytes for a records.
2007
:param memos_for_retrieval: An iterable containing the (index, pos,
2008
length) memo for retrieving the bytes. The Pack access method
2009
looks up the pack to use for a given record in its index_to_pack
2011
:return: An iterator over the bytes of the records.
2013
# first pass, group into same-index requests
2015
current_index = None
2016
for (index, offset, length) in memos_for_retrieval:
2017
if current_index == index:
2018
current_list.append((offset, length))
2020
if current_index is not None:
2021
request_lists.append((current_index, current_list))
2022
current_index = index
2023
current_list = [(offset, length)]
2024
# handle the last entry
2025
if current_index is not None:
2026
request_lists.append((current_index, current_list))
2027
for index, offsets in request_lists:
2029
transport, path = self._indices[index]
2031
# A KeyError here indicates that someone has triggered an index
2032
# reload, and this index has gone missing, we need to start
2034
if self._reload_func is None:
2035
# If we don't have a _reload_func there is nothing that can
2038
raise errors.RetryWithNewPacks(index,
2039
reload_occurred=True,
2040
exc_info=sys.exc_info())
2042
reader = pack.make_readv_reader(transport, path, offsets)
2043
for names, read_func in reader.iter_records():
2044
yield read_func(None)
2045
except errors.NoSuchFile:
2046
# A NoSuchFile error indicates that a pack file has gone
2047
# missing on disk, we need to trigger a reload, and start over.
2048
if self._reload_func is None:
2050
raise errors.RetryWithNewPacks(transport.abspath(path),
2051
reload_occurred=False,
2052
exc_info=sys.exc_info())
2054
def set_writer(self, writer, index, transport_packname):
2055
"""Set a writer to use for adding data."""
2056
if index is not None:
2057
self._indices[index] = transport_packname
2058
self._container_writer = writer
2059
self._write_index = index
2061
def reload_or_raise(self, retry_exc):
2062
"""Try calling the reload function, or re-raise the original exception.
2064
This should be called after _DirectPackAccess raises a
2065
RetryWithNewPacks exception. This function will handle the common logic
2066
of determining when the error is fatal versus being temporary.
2067
It will also make sure that the original exception is raised, rather
2068
than the RetryWithNewPacks exception.
2070
If this function returns, then the calling function should retry
2071
whatever operation was being performed. Otherwise an exception will
2074
:param retry_exc: A RetryWithNewPacks exception.
2077
if self._reload_func is None:
2079
elif not self._reload_func():
2080
# The reload claimed that nothing changed
2081
if not retry_exc.reload_occurred:
2082
# If there wasn't an earlier reload, then we really were
2083
# expecting to find changes. We didn't find them, so this is a
2087
exc_class, exc_value, exc_traceback = retry_exc.exc_info
2088
raise exc_class, exc_value, exc_traceback
2459
class RepositoryFormatKnitPack1(RepositoryFormatPack):
2460
"""A no-subtrees parameterized Pack repository.
2462
This format was introduced in 0.92.
2465
repository_class = KnitPackRepository
2466
_commit_builder_class = PackCommitBuilder
2468
def _serializer(self):
2469
return xml5.serializer_v5
2470
# What index classes to use
2471
index_builder_class = InMemoryGraphIndex
2472
index_class = GraphIndex
2474
def _get_matching_bzrdir(self):
2475
return bzrdir.format_registry.make_bzrdir('pack-0.92')
2477
def _ignore_setting_bzrdir(self, format):
2480
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2482
def get_format_string(self):
2483
"""See RepositoryFormat.get_format_string()."""
2484
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
2486
def get_format_description(self):
2487
"""See RepositoryFormat.get_format_description()."""
2488
return "Packs containing knits without subtree support"
2490
def check_conversion_target(self, target_format):
2494
class RepositoryFormatKnitPack3(RepositoryFormatPack):
2495
"""A subtrees parameterized Pack repository.
2497
This repository format uses the xml7 serializer to get:
2498
- support for recording full info about the tree root
2499
- support for recording tree-references
2501
This format was introduced in 0.92.
2504
repository_class = KnitPackRepository
2505
_commit_builder_class = PackRootCommitBuilder
2506
rich_root_data = True
2507
supports_tree_reference = True
2509
def _serializer(self):
2510
return xml7.serializer_v7
2511
# What index classes to use
2512
index_builder_class = InMemoryGraphIndex
2513
index_class = GraphIndex
2515
def _get_matching_bzrdir(self):
2516
return bzrdir.format_registry.make_bzrdir(
2517
'pack-0.92-subtree')
2519
def _ignore_setting_bzrdir(self, format):
2522
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2524
def check_conversion_target(self, target_format):
2525
if not target_format.rich_root_data:
2526
raise errors.BadConversionTarget(
2527
'Does not support rich root data.', target_format)
2528
if not getattr(target_format, 'supports_tree_reference', False):
2529
raise errors.BadConversionTarget(
2530
'Does not support nested trees', target_format)
2532
def get_format_string(self):
2533
"""See RepositoryFormat.get_format_string()."""
2534
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2536
def get_format_description(self):
2537
"""See RepositoryFormat.get_format_description()."""
2538
return "Packs containing knits with subtree support\n"
2541
class RepositoryFormatKnitPack4(RepositoryFormatPack):
2542
"""A rich-root, no subtrees parameterized Pack repository.
2544
This repository format uses the xml6 serializer to get:
2545
- support for recording full info about the tree root
2547
This format was introduced in 1.0.
2550
repository_class = KnitPackRepository
2551
_commit_builder_class = PackRootCommitBuilder
2552
rich_root_data = True
2553
supports_tree_reference = False
2555
def _serializer(self):
2556
return xml6.serializer_v6
2557
# What index classes to use
2558
index_builder_class = InMemoryGraphIndex
2559
index_class = GraphIndex
2561
def _get_matching_bzrdir(self):
2562
return bzrdir.format_registry.make_bzrdir(
2565
def _ignore_setting_bzrdir(self, format):
2568
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2570
def check_conversion_target(self, target_format):
2571
if not target_format.rich_root_data:
2572
raise errors.BadConversionTarget(
2573
'Does not support rich root data.', target_format)
2575
def get_format_string(self):
2576
"""See RepositoryFormat.get_format_string()."""
2577
return ("Bazaar pack repository format 1 with rich root"
2578
" (needs bzr 1.0)\n")
2580
def get_format_description(self):
2581
"""See RepositoryFormat.get_format_description()."""
2582
return "Packs containing knits with rich root support\n"
2585
class RepositoryFormatKnitPack5(RepositoryFormatPack):
2586
"""Repository that supports external references to allow stacking.
2590
Supports external lookups, which results in non-truncated ghosts after
2591
reconcile compared to pack-0.92 formats.
2594
repository_class = KnitPackRepository
2595
_commit_builder_class = PackCommitBuilder
2596
supports_external_lookups = True
2597
# What index classes to use
2598
index_builder_class = InMemoryGraphIndex
2599
index_class = GraphIndex
2602
def _serializer(self):
2603
return xml5.serializer_v5
2605
def _get_matching_bzrdir(self):
2606
return bzrdir.format_registry.make_bzrdir('1.6')
2608
def _ignore_setting_bzrdir(self, format):
2611
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2613
def get_format_string(self):
2614
"""See RepositoryFormat.get_format_string()."""
2615
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
2617
def get_format_description(self):
2618
"""See RepositoryFormat.get_format_description()."""
2619
return "Packs 5 (adds stacking support, requires bzr 1.6)"
2621
def check_conversion_target(self, target_format):
2625
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
2626
"""A repository with rich roots and stacking.
2628
New in release 1.6.1.
2630
Supports stacking on other repositories, allowing data to be accessed
2631
without being stored locally.
2634
repository_class = KnitPackRepository
2635
_commit_builder_class = PackRootCommitBuilder
2636
rich_root_data = True
2637
supports_tree_reference = False # no subtrees
2638
supports_external_lookups = True
2639
# What index classes to use
2640
index_builder_class = InMemoryGraphIndex
2641
index_class = GraphIndex
2644
def _serializer(self):
2645
return xml6.serializer_v6
2647
def _get_matching_bzrdir(self):
2648
return bzrdir.format_registry.make_bzrdir(
2651
def _ignore_setting_bzrdir(self, format):
2654
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2656
def check_conversion_target(self, target_format):
2657
if not target_format.rich_root_data:
2658
raise errors.BadConversionTarget(
2659
'Does not support rich root data.', target_format)
2661
def get_format_string(self):
2662
"""See RepositoryFormat.get_format_string()."""
2663
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
2665
def get_format_description(self):
2666
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
2669
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
2670
"""A repository with rich roots and external references.
2674
Supports external lookups, which results in non-truncated ghosts after
2675
reconcile compared to pack-0.92 formats.
2677
This format was deprecated because the serializer it uses accidentally
2678
supported subtrees, when the format was not intended to. This meant that
2679
someone could accidentally fetch from an incorrect repository.
2682
repository_class = KnitPackRepository
2683
_commit_builder_class = PackRootCommitBuilder
2684
rich_root_data = True
2685
supports_tree_reference = False # no subtrees
2687
supports_external_lookups = True
2688
# What index classes to use
2689
index_builder_class = InMemoryGraphIndex
2690
index_class = GraphIndex
2693
def _serializer(self):
2694
return xml7.serializer_v7
2696
def _get_matching_bzrdir(self):
2697
matching = bzrdir.format_registry.make_bzrdir(
2699
matching.repository_format = self
2702
def _ignore_setting_bzrdir(self, format):
2705
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2707
def check_conversion_target(self, target_format):
2708
if not target_format.rich_root_data:
2709
raise errors.BadConversionTarget(
2710
'Does not support rich root data.', target_format)
2712
def get_format_string(self):
2713
"""See RepositoryFormat.get_format_string()."""
2714
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
2716
def get_format_description(self):
2717
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
2721
class RepositoryFormatKnitPack6(RepositoryFormatPack):
2722
"""A repository with stacking and btree indexes,
2723
without rich roots or subtrees.
2725
This is equivalent to pack-1.6 with B+Tree indices.
2728
repository_class = KnitPackRepository
2729
_commit_builder_class = PackCommitBuilder
2730
supports_external_lookups = True
2731
# What index classes to use
2732
index_builder_class = BTreeBuilder
2733
index_class = BTreeGraphIndex
2736
def _serializer(self):
2737
return xml5.serializer_v5
2739
def _get_matching_bzrdir(self):
2740
return bzrdir.format_registry.make_bzrdir('1.9')
2742
def _ignore_setting_bzrdir(self, format):
2745
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2747
def get_format_string(self):
2748
"""See RepositoryFormat.get_format_string()."""
2749
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
2751
def get_format_description(self):
2752
"""See RepositoryFormat.get_format_description()."""
2753
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2755
def check_conversion_target(self, target_format):
2759
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2760
"""A repository with rich roots, no subtrees, stacking and btree indexes.
2762
1.6-rich-root with B+Tree indices.
2765
repository_class = KnitPackRepository
2766
_commit_builder_class = PackRootCommitBuilder
2767
rich_root_data = True
2768
supports_tree_reference = False # no subtrees
2769
supports_external_lookups = True
2770
# What index classes to use
2771
index_builder_class = BTreeBuilder
2772
index_class = BTreeGraphIndex
2775
def _serializer(self):
2776
return xml6.serializer_v6
2778
def _get_matching_bzrdir(self):
2779
return bzrdir.format_registry.make_bzrdir(
2782
def _ignore_setting_bzrdir(self, format):
2785
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2787
def check_conversion_target(self, target_format):
2788
if not target_format.rich_root_data:
2789
raise errors.BadConversionTarget(
2790
'Does not support rich root data.', target_format)
2792
def get_format_string(self):
2793
"""See RepositoryFormat.get_format_string()."""
2794
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2796
def get_format_description(self):
2797
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2800
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2801
"""A subtrees development repository.
2803
This format should be retained until the second release after bzr 1.7.
2805
1.6.1-subtree[as it might have been] with B+Tree indices.
2807
This is [now] retained until we have a CHK based subtree format in
2811
repository_class = KnitPackRepository
2812
_commit_builder_class = PackRootCommitBuilder
2813
rich_root_data = True
2814
supports_tree_reference = True
2815
supports_external_lookups = True
2816
# What index classes to use
2817
index_builder_class = BTreeBuilder
2818
index_class = BTreeGraphIndex
2821
def _serializer(self):
2822
return xml7.serializer_v7
2824
def _get_matching_bzrdir(self):
2825
return bzrdir.format_registry.make_bzrdir(
2826
'development-subtree')
2828
def _ignore_setting_bzrdir(self, format):
2831
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2833
def check_conversion_target(self, target_format):
2834
if not target_format.rich_root_data:
2835
raise errors.BadConversionTarget(
2836
'Does not support rich root data.', target_format)
2837
if not getattr(target_format, 'supports_tree_reference', False):
2838
raise errors.BadConversionTarget(
2839
'Does not support nested trees', target_format)
2841
def get_format_string(self):
2842
"""See RepositoryFormat.get_format_string()."""
2843
return ("Bazaar development format 2 with subtree support "
2844
"(needs bzr.dev from before 1.8)\n")
2846
def get_format_description(self):
2847
"""See RepositoryFormat.get_format_description()."""
2848
return ("Development repository format, currently the same as "
2849
"1.6.1-subtree with B+Tree indices.\n")