159
165
"""Extension point for subclasses to check during serialisation.
161
167
:param inv: An inventory about to be serialised, to be checked.
162
:raises: AssertionError if an error has occured.
168
:raises: AssertionError if an error has occurred.
164
170
if inv.revision_id is None:
165
raise AssertionError()
171
raise AssertionError("inv.revision_id is None")
166
172
if inv.root.revision is None:
167
raise AssertionError()
173
raise AssertionError("inv.root.revision is None")
175
def _check_cache_size(self, inv_size, entry_cache):
176
"""Check that the entry_cache is large enough.
178
We want the cache to be ~2x the size of an inventory. The reason is
179
because we use a FIFO cache, and how Inventory records are likely to
180
change. In general, you have a small number of records which change
181
often, and a lot of records which do not change at all. So when the
182
cache gets full, you actually flush out a lot of the records you are
183
interested in, which means you need to recreate all of those records.
184
An LRU Cache would be better, but the overhead negates the cache
187
One way to look at it, only the size of the cache > len(inv) is your
188
'working' set. And in general, it shouldn't be a problem to hold 2
189
inventories in memory anyway.
191
:param inv_size: The number of entries in an inventory.
193
if entry_cache is None:
195
# 1.5 times might also be reasonable.
196
recommended_min_cache_size = inv_size * 1.5
197
if entry_cache.cache_size() < recommended_min_cache_size:
198
recommended_cache_size = inv_size * 2
199
trace.mutter('Resizing the inventory entry cache from %d to %d',
200
entry_cache.cache_size(), recommended_cache_size)
201
entry_cache.resize(recommended_cache_size)
169
203
def write_inventory_to_lines(self, inv):
170
204
"""Return a list of lines with the encoded inventory."""
350
384
revision_id = cache_utf8.encode(revision_id)
351
385
inv = inventory.Inventory(root_id=None, revision_id=revision_id)
353
ie = self._unpack_entry(e)
387
ie = self._unpack_entry(e, entry_cache=entry_cache)
389
self._check_cache_size(len(inv), entry_cache)
357
def _unpack_entry(self, elt):
392
def _unpack_entry(self, elt, entry_cache=None):
394
file_id = elt_get('file_id')
395
revision = elt_get('revision')
396
# Check and see if we have already unpacked this exact entry
397
# Some timings for "repo.revision_trees(last_100_revs)"
399
# unmodified 4.1s 40.8s
401
# using fifo 2.83s 29.1s
405
# no_copy 2.00s 20.5s
406
# no_c,dict 1.95s 18.0s
407
# Note that a cache of 10k nodes is more than sufficient to hold all of
408
# the inventory for the last 100 revs for bzr, but not for mysql (20k
409
# is enough for mysql, which saves the same 2s as using a dict)
411
# Breakdown of mysql using time.clock()
412
# 4.1s 2 calls to element.get for file_id, revision_id
413
# 4.5s cache_hit lookup
414
# 7.1s InventoryFile.copy()
415
# 2.4s InventoryDirectory.copy()
416
# 0.4s decoding unique entries
417
# 1.6s decoding entries after FIFO fills up
418
# 0.8s Adding nodes to FIFO (including flushes)
419
# 0.1s cache miss lookups
421
# 4.1s 2 calls to element.get for file_id, revision_id
422
# 9.9s cache_hit lookup
423
# 10.8s InventoryEntry.copy()
424
# 0.3s cache miss lookus
425
# 1.2s decoding entries
426
# 1.0s adding nodes to LRU
427
if entry_cache is not None and revision is not None:
428
key = (file_id, revision)
430
# We copy it, because some operations may mutate it
431
cached_ie = entry_cache[key]
435
# Only copying directory entries drops us 2.85s => 2.35s
436
# if cached_ie.kind == 'directory':
437
# return cached_ie.copy()
439
return cached_ie.copy()
359
442
if not InventoryEntry.versionable_kind(kind):
360
443
raise AssertionError('unsupported entry kind %s' % kind)
362
445
get_cached = _get_utf8_or_ascii
364
parent_id = elt.get('parent_id')
447
file_id = get_cached(file_id)
448
if revision is not None:
449
revision = get_cached(revision)
450
parent_id = elt_get('parent_id')
365
451
if parent_id is not None:
366
452
parent_id = get_cached(parent_id)
367
file_id = get_cached(elt.get('file_id'))
369
454
if kind == 'directory':
370
455
ie = inventory.InventoryDirectory(file_id,
373
458
elif kind == 'file':
374
459
ie = inventory.InventoryFile(file_id,
377
ie.text_sha1 = elt.get('text_sha1')
378
if elt.get('executable') == 'yes':
462
ie.text_sha1 = elt_get('text_sha1')
463
if elt_get('executable') == 'yes':
379
464
ie.executable = True
380
v = elt.get('text_size')
465
v = elt_get('text_size')
381
466
ie.text_size = v and int(v)
382
467
elif kind == 'symlink':
383
468
ie = inventory.InventoryLink(file_id,
386
ie.symlink_target = elt.get('symlink_target')
471
ie.symlink_target = elt_get('symlink_target')
388
473
raise errors.UnsupportedInventoryKind(kind)
389
revision = elt.get('revision')
390
if revision is not None:
391
revision = get_cached(revision)
392
474
ie.revision = revision
475
if revision is not None and entry_cache is not None:
476
# We cache a copy() because callers like to mutate objects, and
477
# that would cause the item in cache to mutate as well.
478
# This has a small effect on many-inventory performance, because
479
# the majority fraction is spent in cache hits, not misses.
480
entry_cache[key] = ie.copy()