~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/xml8.py

Committer: Canonical.com Patch Queue Manager
Date: 2008-12-19 17:14:59 UTC
mfrom: (3882.6.23 xml_cache)
Revision ID: pqm@pqm.ubuntu.com-20081219171459-521qbou7ho7g297f

(jam) Add a cache for deserializing inventory entries from XML.

files modified:
NEWS

bzrlib/fifo_cache.py

bzrlib/inventory.py

bzrlib/repository.py

bzrlib/tests/__init__.py

bzrlib/tests/test_fifo_cache.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

Show diffs side-by-side

added added

removed removed

bzrlib/xml8.py

errors,

inventory,

revision as _mod_revision,

trace,

)

from bzrlib.xml_serializer import SubElement, Element, Serializer

from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry

166

167

if inv.root.revision is None:

167

168

raise AssertionError()

168

169

170

def _check_cache_size(self, inv_size, entry_cache):

171

"""Check that the entry_cache is large enough.

172

173

We want the cache to be ~2x the size of an inventory. The reason is

174

because we use a FIFO cache, and how Inventory records are likely to

175

change. In general, you have a small number of records which change

176

often, and a lot of records which do not change at all. So when the

177

cache gets full, you actually flush out a lot of the records you are

178

interested in, which means you need to recreate all of those records.

179

An LRU Cache would be better, but the overhead negates the cache

180

coherency benefit.

181

182

One way to look at it, only the size of the cache > len(inv) is your

183

'working' set. And in general, it shouldn't be a problem to hold 2

184

inventories in memory anyway.

185

186

:param inv_size: The number of entries in an inventory.

187

"""

188

if entry_cache is None:

189

return

190

# 1.5 times might also be reasonable.

191

recommended_min_cache_size = inv_size * 1.5

192

if entry_cache.cache_size() < recommended_min_cache_size:

193

recommended_cache_size = inv_size * 2

194

trace.mutter('Resizing the inventory entry cache from %d to %d',

195

entry_cache.cache_size(), recommended_cache_size)

196

entry_cache.resize(recommended_cache_size)

197

169

198

def write_inventory_to_lines(self, inv):

170

199

"""Return a list of lines with the encoded inventory."""

171

200

return self.write_inventory(inv, None)

337

366

prop_elt.tail = '\n'

338

367

top_elt.tail = '\n'

339

368

340

def _unpack_inventory(self, elt, revision_id=None):

369

def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):

341

370

"""Construct from XML Element"""

342

371

if elt.tag != 'inventory':

343

372

raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)

350

379

revision_id = cache_utf8.encode(revision_id)

351

380

inv = inventory.Inventory(root_id=None, revision_id=revision_id)

352

381

for e in elt:

353

ie = self._unpack_entry(e)

382

ie = self._unpack_entry(e, entry_cache=entry_cache)

354

383

inv.add(ie)

384

self._check_cache_size(len(inv), entry_cache)

355

385

return inv

356

386

357

def _unpack_entry(self, elt):

387

def _unpack_entry(self, elt, entry_cache=None):

388

elt_get = elt.get

389

file_id = elt_get('file_id')

390

revision = elt_get('revision')

391

# Check and see if we have already unpacked this exact entry

392

# Some timings for "repo.revision_trees(last_100_revs)"

393

# bzr mysql

394

# unmodified 4.1s 40.8s

395

# using lru 3.5s

396

# using fifo 2.83s 29.1s

397

# lru._cache 2.8s

398

# dict 2.75s 26.8s

399

# inv.add 2.5s 26.0s

400

# no_copy 2.00s 20.5s

401

# no_c,dict 1.95s 18.0s

402

# Note that a cache of 10k nodes is more than sufficient to hold all of

403

# the inventory for the last 100 revs for bzr, but not for mysql (20k

404

# is enough for mysql, which saves the same 2s as using a dict)

405

406

# Breakdown of mysql using time.clock()

407

# 4.1s 2 calls to element.get for file_id, revision_id

408

# 4.5s cache_hit lookup

409

# 7.1s InventoryFile.copy()

410

# 2.4s InventoryDirectory.copy()

411

# 0.4s decoding unique entries

412

# 1.6s decoding entries after FIFO fills up

413

# 0.8s Adding nodes to FIFO (including flushes)

414

# 0.1s cache miss lookups

415

# Using an LRU cache

416

# 4.1s 2 calls to element.get for file_id, revision_id

417

# 9.9s cache_hit lookup

418

# 10.8s InventoryEntry.copy()

419

# 0.3s cache miss lookus

420

# 1.2s decoding entries

421

# 1.0s adding nodes to LRU

422

if entry_cache is not None and revision is not None:

423

key = (file_id, revision)

424

try:

425

# We copy it, because some operatations may mutate it

426

cached_ie = entry_cache[key]

427

except KeyError:

428

pass

429

else:

430

# Only copying directory entries drops us 2.85s => 2.35s

431

# if cached_ie.kind == 'directory':

432

# return cached_ie.copy()

433

# return cached_ie

434

return cached_ie.copy()

435

358

436

kind = elt.tag

359

437

if not InventoryEntry.versionable_kind(kind):

360

438

raise AssertionError('unsupported entry kind %s' % kind)

361

439

362

440

get_cached = _get_utf8_or_ascii

363

441

364

parent_id = elt.get('parent_id')

442

file_id = get_cached(file_id)

443

if revision is not None:

444

revision = get_cached(revision)

445

parent_id = elt_get('parent_id')

365

446

if parent_id is not None:

366

447

parent_id = get_cached(parent_id)

367

file_id = get_cached(elt.get('file_id'))

368

448

369

449

if kind == 'directory':

370

450

ie = inventory.InventoryDirectory(file_id,

371

elt.get('name'),

451

elt_get('name'),

372

452

parent_id)

373

453

elif kind == 'file':

374

454

ie = inventory.InventoryFile(file_id,

375

elt.get('name'),

455

elt_get('name'),

376

456

parent_id)

377

ie.text_sha1 = elt.get('text_sha1')

378

if elt.get('executable') == 'yes':

457

ie.text_sha1 = elt_get('text_sha1')

458

if elt_get('executable') == 'yes':

379

459

ie.executable = True

380

v = elt.get('text_size')

460

v = elt_get('text_size')

381

461

ie.text_size = v and int(v)

382

462

elif kind == 'symlink':

383

463

ie = inventory.InventoryLink(file_id,

384

elt.get('name'),

464

elt_get('name'),

385

465

parent_id)

386

ie.symlink_target = elt.get('symlink_target')

466

ie.symlink_target = elt_get('symlink_target')

387

467

else:

388

468

raise errors.UnsupportedInventoryKind(kind)

389

revision = elt.get('revision')

390

if revision is not None:

391

revision = get_cached(revision)

392

469

ie.revision = revision

470

if revision is not None and entry_cache is not None:

471

# We cache a copy() because callers like to mutate objects, and

472

# that would cause the item in cache to mutate as well.

473

# This has a small effect on many-inventory performance, because

474

# the majority fraction is spent in cache hits, not misses.

475

entry_cache[key] = ie.copy()

393

476

394

477

return ie

395

478

Older »