166
167
if inv.root.revision is None:
167
168
raise AssertionError()
170
def _check_cache_size(self, inv_size, entry_cache):
171
"""Check that the entry_cache is large enough.
173
We want the cache to be ~2x the size of an inventory. The reason is
174
because we use a FIFO cache, and how Inventory records are likely to
175
change. In general, you have a small number of records which change
176
often, and a lot of records which do not change at all. So when the
177
cache gets full, you actually flush out a lot of the records you are
178
interested in, which means you need to recreate all of those records.
179
An LRU Cache would be better, but the overhead negates the cache
182
One way to look at it, only the size of the cache > len(inv) is your
183
'working' set. And in general, it shouldn't be a problem to hold 2
184
inventories in memory anyway.
186
:param inv_size: The number of entries in an inventory.
188
if entry_cache is None:
190
# 1.5 times might also be reasonable.
191
recommended_min_cache_size = inv_size * 1.5
192
if entry_cache.cache_size() < recommended_min_cache_size:
193
recommended_cache_size = inv_size * 2
194
trace.mutter('Resizing the inventory entry cache from %d to %d',
195
entry_cache.cache_size(), recommended_cache_size)
196
entry_cache.resize(recommended_cache_size)
169
198
def write_inventory_to_lines(self, inv):
170
199
"""Return a list of lines with the encoded inventory."""
171
200
return self.write_inventory(inv, None)
350
379
revision_id = cache_utf8.encode(revision_id)
351
380
inv = inventory.Inventory(root_id=None, revision_id=revision_id)
353
ie = self._unpack_entry(e)
382
ie = self._unpack_entry(e, entry_cache=entry_cache)
384
self._check_cache_size(len(inv), entry_cache)
357
def _unpack_entry(self, elt):
387
def _unpack_entry(self, elt, entry_cache=None):
389
file_id = elt_get('file_id')
390
revision = elt_get('revision')
391
# Check and see if we have already unpacked this exact entry
392
# Some timings for "repo.revision_trees(last_100_revs)"
394
# unmodified 4.1s 40.8s
396
# using fifo 2.83s 29.1s
400
# no_copy 2.00s 20.5s
401
# no_c,dict 1.95s 18.0s
402
# Note that a cache of 10k nodes is more than sufficient to hold all of
403
# the inventory for the last 100 revs for bzr, but not for mysql (20k
404
# is enough for mysql, which saves the same 2s as using a dict)
406
# Breakdown of mysql using time.clock()
407
# 4.1s 2 calls to element.get for file_id, revision_id
408
# 4.5s cache_hit lookup
409
# 7.1s InventoryFile.copy()
410
# 2.4s InventoryDirectory.copy()
411
# 0.4s decoding unique entries
412
# 1.6s decoding entries after FIFO fills up
413
# 0.8s Adding nodes to FIFO (including flushes)
414
# 0.1s cache miss lookups
416
# 4.1s 2 calls to element.get for file_id, revision_id
417
# 9.9s cache_hit lookup
418
# 10.8s InventoryEntry.copy()
419
# 0.3s cache miss lookus
420
# 1.2s decoding entries
421
# 1.0s adding nodes to LRU
422
if entry_cache is not None and revision is not None:
423
key = (file_id, revision)
425
# We copy it, because some operatations may mutate it
426
cached_ie = entry_cache[key]
430
# Only copying directory entries drops us 2.85s => 2.35s
431
# if cached_ie.kind == 'directory':
432
# return cached_ie.copy()
434
return cached_ie.copy()
359
437
if not InventoryEntry.versionable_kind(kind):
360
438
raise AssertionError('unsupported entry kind %s' % kind)
362
440
get_cached = _get_utf8_or_ascii
364
parent_id = elt.get('parent_id')
442
file_id = get_cached(file_id)
443
if revision is not None:
444
revision = get_cached(revision)
445
parent_id = elt_get('parent_id')
365
446
if parent_id is not None:
366
447
parent_id = get_cached(parent_id)
367
file_id = get_cached(elt.get('file_id'))
369
449
if kind == 'directory':
370
450
ie = inventory.InventoryDirectory(file_id,
373
453
elif kind == 'file':
374
454
ie = inventory.InventoryFile(file_id,
377
ie.text_sha1 = elt.get('text_sha1')
378
if elt.get('executable') == 'yes':
457
ie.text_sha1 = elt_get('text_sha1')
458
if elt_get('executable') == 'yes':
379
459
ie.executable = True
380
v = elt.get('text_size')
460
v = elt_get('text_size')
381
461
ie.text_size = v and int(v)
382
462
elif kind == 'symlink':
383
463
ie = inventory.InventoryLink(file_id,
386
ie.symlink_target = elt.get('symlink_target')
466
ie.symlink_target = elt_get('symlink_target')
388
468
raise errors.UnsupportedInventoryKind(kind)
389
revision = elt.get('revision')
390
if revision is not None:
391
revision = get_cached(revision)
392
469
ie.revision = revision
470
if revision is not None and entry_cache is not None:
471
# We cache a copy() because callers like to mutate objects, and
472
# that would cause the item in cache to mutate as well.
473
# This has a small effect on many-inventory performance, because
474
# the majority fraction is spent in cache hits, not misses.
475
entry_cache[key] = ie.copy()