~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

Committer: Canonical.com Patch Queue Manager
Date: 2011-04-08 15:03:31 UTC
mfrom: (5757.3.4 knitpackrepo-3)
Revision ID: pqm@pqm.ubuntu.com-20110408150331-pc8lu2zpvce2qw7f

(jelmer) Move default ReconcilePacker from bzrlib.repofmt.pack_repo to
bzrlib.repofmt.knitpack_repo. (Jelmer Vernooij)

files modified:
bzrlib/repofmt/knitpack_repo.py

bzrlib/repofmt/pack_repo.py

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/pack_repo.py

errors,

lockable_files,

lockdir,

revision as _mod_revision,

)

from bzrlib.decorators import needs_write_lock, only_raises

1200

1199

return new_pack

1201

1200

1202

1201

1203

class ReconcilePacker(Packer):

1204

"""A packer which regenerates indices etc as it copies.

1205

1206

This is used by ``bzr reconcile`` to cause parent text pointers to be

1207

regenerated.

1208

"""

1209

1210

def _extra_init(self):

1211

self._data_changed = False

1212

1213

def _process_inventory_lines(self, inv_lines):

1214

"""Generate a text key reference map rather for reconciling with."""

1215

repo = self._pack_collection.repo

1216

refs = repo._serializer._find_text_key_references(inv_lines)

1217

self._text_refs = refs

1218

# during reconcile we:

1219

# - convert unreferenced texts to full texts

1220

# - correct texts which reference a text not copied to be full texts

1221

# - copy all others as-is but with corrected parents.

1222

# - so at this point we don't know enough to decide what becomes a full

1223

# text.

1224

self._text_filter = None

1225

1226

def _copy_text_texts(self):

1227

"""generate what texts we should have and then copy."""

1228

self.pb.update("Copying content texts", 3)

1229

# we have three major tasks here:

1230

# 1) generate the ideal index

1231

repo = self._pack_collection.repo

1232

ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for

1233

_1, key, _2, refs in

1234

self.new_pack.revision_index.iter_all_entries()])

1235

ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)

1236

# 2) generate a text_nodes list that contains all the deltas that can

1237

# be used as-is, with corrected parents.

1238

ok_nodes = []

1239

bad_texts = []

1240

discarded_nodes = []

1241

NULL_REVISION = _mod_revision.NULL_REVISION

1242

text_index_map, text_nodes = self._get_text_nodes()

1243

for node in text_nodes:

1244

# 0 - index

1245

# 1 - key

1246

# 2 - value

1247

# 3 - refs

1248

try:

1249

ideal_parents = tuple(ideal_index[node[1]])

1250

except KeyError:

1251

discarded_nodes.append(node)

1252

self._data_changed = True

1253

else:

1254

if ideal_parents == (NULL_REVISION,):

1255

ideal_parents = ()

1256

if ideal_parents == node[3][0]:

1257

# no change needed.

1258

ok_nodes.append(node)

1259

elif ideal_parents[0:1] == node[3][0][0:1]:

1260

# the left most parent is the same, or there are no parents

1261

# today. Either way, we can preserve the representation as

1262

# long as we change the refs to be inserted.

1263

self._data_changed = True

1264

ok_nodes.append((node[0], node[1], node[2],

1265

(ideal_parents, node[3][1])))

1266

self._data_changed = True

1267

else:

1268

# Reinsert this text completely

1269

bad_texts.append((node[1], ideal_parents))

1270

self._data_changed = True

1271

# we're finished with some data.

1272

del ideal_index

1273

del text_nodes

1274

# 3) bulk copy the ok data

1275

total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)

1276

list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,

1277

self.new_pack.text_index, readv_group_iter, total_items))

1278

# 4) adhoc copy all the other texts.

1279

# We have to topologically insert all texts otherwise we can fail to

1280

# reconcile when parts of a single delta chain are preserved intact,

1281

# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be

1282

# reinserted, and if d3 has incorrect parents it will also be

1283

# reinserted. If we insert d3 first, d2 is present (as it was bulk

1284

# copied), so we will try to delta, but d2 is not currently able to be

1285

# extracted because its basis d1 is not present. Topologically sorting

1286

# addresses this. The following generates a sort for all the texts that

1287

# are being inserted without having to reference the entire text key

1288

# space (we only topo sort the revisions, which is smaller).

1289

topo_order = tsort.topo_sort(ancestors)

1290

rev_order = dict(zip(topo_order, range(len(topo_order))))

1291

bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))

1292

transaction = repo.get_transaction()

1293

file_id_index = GraphIndexPrefixAdapter(

1294

self.new_pack.text_index,

1295

('blank', ), 1,

1296

add_nodes_callback=self.new_pack.text_index.add_nodes)

1297

data_access = _DirectPackAccess(

1298

{self.new_pack.text_index:self.new_pack.access_tuple()})

1299

data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,

1300

self.new_pack.access_tuple())

1301

output_texts = KnitVersionedFiles(

1302

_KnitGraphIndex(self.new_pack.text_index,

1303

add_callback=self.new_pack.text_index.add_nodes,

1304

deltas=True, parents=True, is_locked=repo.is_locked),

1305

data_access=data_access, max_delta_chain=200)

1306

for key, parent_keys in bad_texts:

1307

# We refer to the new pack to delta data being output.

1308

# A possible improvement would be to catch errors on short reads

1309

# and only flush then.

1310

self.new_pack.flush()

1311

parents = []

1312

for parent_key in parent_keys:

1313

if parent_key[0] != key[0]:

1314

# Graph parents must match the fileid

1315

raise errors.BzrError('Mismatched key parent %r:%r' %

1316

(key, parent_keys))

1317

parents.append(parent_key[1])

1318

text_lines = osutils.split_lines(repo.texts.get_record_stream(

1319

[key], 'unordered', True).next().get_bytes_as('fulltext'))

1320

output_texts.add_lines(key, parent_keys, text_lines,

1321

random_id=True, check_content=False)

1322

# 5) check that nothing inserted has a reference outside the keyspace.

1323

missing_text_keys = self.new_pack.text_index._external_references()

1324

if missing_text_keys:

1325

raise errors.BzrCheckError('Reference to missing compression parents %r'

1326

% (missing_text_keys,))

1327

self._log_copied_texts()

1328

1329

def _use_pack(self, new_pack):

1330

"""Override _use_pack to check for reconcile having changed content."""

1331

# XXX: we might be better checking this at the copy time.

1332

original_inventory_keys = set()

1333

inv_index = self._pack_collection.inventory_index.combined_index

1334

for entry in inv_index.iter_all_entries():

1335

original_inventory_keys.add(entry[1])

1336

new_inventory_keys = set()

1337

for entry in new_pack.inventory_index.iter_all_entries():

1338

new_inventory_keys.add(entry[1])

1339

if new_inventory_keys != original_inventory_keys:

1340

self._data_changed = True

1341

return new_pack.data_inserted() and self._data_changed

1342

1343

1344

1202

class RepositoryPackCollection(object):

1345

1203

"""Management of packs within a repository.

1346

1204

2409

2267

return reconciler

2410

2268

2411

2269

def _reconcile_pack(self, collection, packs, extension, revs, pb):

2412

packer = ReconcilePacker(collection, packs, extension, revs)

2413

return packer.pack(pb)

2270

raise NotImplementedError(self._reconcile_pack)

2414

2271

2415

2272

@only_raises(errors.LockNotHeld, errors.LockBroken)

2416

2273

def unlock(self):

Older »