167
166
if inv.root.revision is None:
168
167
raise AssertionError()
170
def _check_cache_size(self, inv_size, entry_cache):
171
"""Check that the entry_cache is large enough.
173
We want the cache to be ~2x the size of an inventory. The reason is
174
because we use a FIFO cache, and how Inventory records are likely to
175
change. In general, you have a small number of records which change
176
often, and a lot of records which do not change at all. So when the
177
cache gets full, you actually flush out a lot of the records you are
178
interested in, which means you need to recreate all of those records.
179
An LRU Cache would be better, but the overhead negates the cache
182
One way to look at it, only the size of the cache > len(inv) is your
183
'working' set. And in general, it shouldn't be a problem to hold 2
184
inventories in memory anyway.
186
:param inv_size: The number of entries in an inventory.
188
if entry_cache is None:
190
# 1.5 times might also be reasonable.
191
recommended_min_cache_size = inv_size * 1.5
192
if entry_cache.cache_size() < recommended_min_cache_size:
193
recommended_cache_size = inv_size * 2
194
trace.mutter('Resizing the inventory entry cache from %d to %d',
195
entry_cache.cache_size(), recommended_cache_size)
196
entry_cache.resize(recommended_cache_size)
198
169
def write_inventory_to_lines(self, inv):
199
170
"""Return a list of lines with the encoded inventory."""
200
171
return self.write_inventory(inv, None)
379
350
revision_id = cache_utf8.encode(revision_id)
380
351
inv = inventory.Inventory(root_id=None, revision_id=revision_id)
382
ie = self._unpack_entry(e, entry_cache=entry_cache)
353
ie = self._unpack_entry(e)
384
self._check_cache_size(len(inv), entry_cache)
387
def _unpack_entry(self, elt, entry_cache=None):
389
file_id = elt_get('file_id')
390
revision = elt_get('revision')
391
# Check and see if we have already unpacked this exact entry
392
# Some timings for "repo.revision_trees(last_100_revs)"
394
# unmodified 4.1s 40.8s
396
# using fifo 2.83s 29.1s
400
# no_copy 2.00s 20.5s
401
# no_c,dict 1.95s 18.0s
402
# Note that a cache of 10k nodes is more than sufficient to hold all of
403
# the inventory for the last 100 revs for bzr, but not for mysql (20k
404
# is enough for mysql, which saves the same 2s as using a dict)
406
# Breakdown of mysql using time.clock()
407
# 4.1s 2 calls to element.get for file_id, revision_id
408
# 4.5s cache_hit lookup
409
# 7.1s InventoryFile.copy()
410
# 2.4s InventoryDirectory.copy()
411
# 0.4s decoding unique entries
412
# 1.6s decoding entries after FIFO fills up
413
# 0.8s Adding nodes to FIFO (including flushes)
414
# 0.1s cache miss lookups
416
# 4.1s 2 calls to element.get for file_id, revision_id
417
# 9.9s cache_hit lookup
418
# 10.8s InventoryEntry.copy()
419
# 0.3s cache miss lookus
420
# 1.2s decoding entries
421
# 1.0s adding nodes to LRU
422
if entry_cache is not None and revision is not None:
423
key = (file_id, revision)
425
# We copy it, because some operatations may mutate it
426
cached_ie = entry_cache[key]
430
# Only copying directory entries drops us 2.85s => 2.35s
431
# if cached_ie.kind == 'directory':
432
# return cached_ie.copy()
434
return cached_ie.copy()
357
def _unpack_entry(self, elt):
437
359
if not InventoryEntry.versionable_kind(kind):
438
360
raise AssertionError('unsupported entry kind %s' % kind)
440
362
get_cached = _get_utf8_or_ascii
442
file_id = get_cached(file_id)
443
if revision is not None:
444
revision = get_cached(revision)
445
parent_id = elt_get('parent_id')
364
parent_id = elt.get('parent_id')
446
365
if parent_id is not None:
447
366
parent_id = get_cached(parent_id)
367
file_id = get_cached(elt.get('file_id'))
449
369
if kind == 'directory':
450
370
ie = inventory.InventoryDirectory(file_id,
453
373
elif kind == 'file':
454
374
ie = inventory.InventoryFile(file_id,
457
ie.text_sha1 = elt_get('text_sha1')
458
if elt_get('executable') == 'yes':
377
ie.text_sha1 = elt.get('text_sha1')
378
if elt.get('executable') == 'yes':
459
379
ie.executable = True
460
v = elt_get('text_size')
380
v = elt.get('text_size')
461
381
ie.text_size = v and int(v)
462
382
elif kind == 'symlink':
463
383
ie = inventory.InventoryLink(file_id,
466
ie.symlink_target = elt_get('symlink_target')
386
ie.symlink_target = elt.get('symlink_target')
468
388
raise errors.UnsupportedInventoryKind(kind)
389
revision = elt.get('revision')
390
if revision is not None:
391
revision = get_cached(revision)
469
392
ie.revision = revision
470
if revision is not None and entry_cache is not None:
471
# We cache a copy() because callers like to mutate objects, and
472
# that would cause the item in cache to mutate as well.
473
# This has a small effect on many-inventory performance, because
474
# the majority fraction is spent in cache hits, not misses.
475
entry_cache[key] = ie.copy()