1200
1199
return new_pack
1203
class ReconcilePacker(Packer):
1204
"""A packer which regenerates indices etc as it copies.
1206
This is used by ``bzr reconcile`` to cause parent text pointers to be
1210
def _extra_init(self):
1211
self._data_changed = False
1213
def _process_inventory_lines(self, inv_lines):
1214
"""Generate a text key reference map rather for reconciling with."""
1215
repo = self._pack_collection.repo
1216
refs = repo._serializer._find_text_key_references(inv_lines)
1217
self._text_refs = refs
1218
# during reconcile we:
1219
# - convert unreferenced texts to full texts
1220
# - correct texts which reference a text not copied to be full texts
1221
# - copy all others as-is but with corrected parents.
1222
# - so at this point we don't know enough to decide what becomes a full
1224
self._text_filter = None
1226
def _copy_text_texts(self):
1227
"""generate what texts we should have and then copy."""
1228
self.pb.update("Copying content texts", 3)
1229
# we have three major tasks here:
1230
# 1) generate the ideal index
1231
repo = self._pack_collection.repo
1232
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
1233
_1, key, _2, refs in
1234
self.new_pack.revision_index.iter_all_entries()])
1235
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
1236
# 2) generate a text_nodes list that contains all the deltas that can
1237
# be used as-is, with corrected parents.
1240
discarded_nodes = []
1241
NULL_REVISION = _mod_revision.NULL_REVISION
1242
text_index_map, text_nodes = self._get_text_nodes()
1243
for node in text_nodes:
1249
ideal_parents = tuple(ideal_index[node[1]])
1251
discarded_nodes.append(node)
1252
self._data_changed = True
1254
if ideal_parents == (NULL_REVISION,):
1256
if ideal_parents == node[3][0]:
1258
ok_nodes.append(node)
1259
elif ideal_parents[0:1] == node[3][0][0:1]:
1260
# the left most parent is the same, or there are no parents
1261
# today. Either way, we can preserve the representation as
1262
# long as we change the refs to be inserted.
1263
self._data_changed = True
1264
ok_nodes.append((node[0], node[1], node[2],
1265
(ideal_parents, node[3][1])))
1266
self._data_changed = True
1268
# Reinsert this text completely
1269
bad_texts.append((node[1], ideal_parents))
1270
self._data_changed = True
1271
# we're finished with some data.
1274
# 3) bulk copy the ok data
1275
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1276
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1277
self.new_pack.text_index, readv_group_iter, total_items))
1278
# 4) adhoc copy all the other texts.
1279
# We have to topologically insert all texts otherwise we can fail to
1280
# reconcile when parts of a single delta chain are preserved intact,
1281
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1282
# reinserted, and if d3 has incorrect parents it will also be
1283
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1284
# copied), so we will try to delta, but d2 is not currently able to be
1285
# extracted because its basis d1 is not present. Topologically sorting
1286
# addresses this. The following generates a sort for all the texts that
1287
# are being inserted without having to reference the entire text key
1288
# space (we only topo sort the revisions, which is smaller).
1289
topo_order = tsort.topo_sort(ancestors)
1290
rev_order = dict(zip(topo_order, range(len(topo_order))))
1291
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1292
transaction = repo.get_transaction()
1293
file_id_index = GraphIndexPrefixAdapter(
1294
self.new_pack.text_index,
1296
add_nodes_callback=self.new_pack.text_index.add_nodes)
1297
data_access = _DirectPackAccess(
1298
{self.new_pack.text_index:self.new_pack.access_tuple()})
1299
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1300
self.new_pack.access_tuple())
1301
output_texts = KnitVersionedFiles(
1302
_KnitGraphIndex(self.new_pack.text_index,
1303
add_callback=self.new_pack.text_index.add_nodes,
1304
deltas=True, parents=True, is_locked=repo.is_locked),
1305
data_access=data_access, max_delta_chain=200)
1306
for key, parent_keys in bad_texts:
1307
# We refer to the new pack to delta data being output.
1308
# A possible improvement would be to catch errors on short reads
1309
# and only flush then.
1310
self.new_pack.flush()
1312
for parent_key in parent_keys:
1313
if parent_key[0] != key[0]:
1314
# Graph parents must match the fileid
1315
raise errors.BzrError('Mismatched key parent %r:%r' %
1317
parents.append(parent_key[1])
1318
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1319
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1320
output_texts.add_lines(key, parent_keys, text_lines,
1321
random_id=True, check_content=False)
1322
# 5) check that nothing inserted has a reference outside the keyspace.
1323
missing_text_keys = self.new_pack.text_index._external_references()
1324
if missing_text_keys:
1325
raise errors.BzrCheckError('Reference to missing compression parents %r'
1326
% (missing_text_keys,))
1327
self._log_copied_texts()
1329
def _use_pack(self, new_pack):
1330
"""Override _use_pack to check for reconcile having changed content."""
1331
# XXX: we might be better checking this at the copy time.
1332
original_inventory_keys = set()
1333
inv_index = self._pack_collection.inventory_index.combined_index
1334
for entry in inv_index.iter_all_entries():
1335
original_inventory_keys.add(entry[1])
1336
new_inventory_keys = set()
1337
for entry in new_pack.inventory_index.iter_all_entries():
1338
new_inventory_keys.add(entry[1])
1339
if new_inventory_keys != original_inventory_keys:
1340
self._data_changed = True
1341
return new_pack.data_inserted() and self._data_changed
1344
1202
class RepositoryPackCollection(object):
1345
1203
"""Management of packs within a repository.