~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/groupcompress.py

Committer: John Arbash Meinel
Date: 2009-10-23 15:46:01 UTC
mto: This revision was merged to the branch mainline in revision 4771.
Revision ID: john@arbash-meinel.com-20091023154601-2gg0gcbs3s5m90c3

Interning the start and stop group positions saves another 7MB peak mem. \o/

I'm hoping some of this savings is the 'dark memory', but that is unclear at this point.

files modified:
bzrlib/groupcompress.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

1269

"""See VersionedFiles.clear_cache()"""

1270

self._group_cache.clear()

1271

self._index._graph_index.clear_cache()

1272

self._index._int_cache.clear()

1272

1273

1274

def _check_add(self, key, lines, random_id, check_content):

1274

1275

"""check that version_id and lines are safe to add."""

1832

1833

self.has_graph = parents

1833

1834

self._is_locked = is_locked

1834

1835

self._inconsistency_fatal = inconsistency_fatal

1836

# GroupCompress records tend to have the same 'group' start + offset

1837

# repeated over and over, this creates a surplus of ints

1838

self._int_cache = {}

1835

1839

if track_external_parent_refs:

1836

1840

self._key_dependencies = knit._KeyRefs(

1837

1841

track_new_keys=track_new_keys)

2013

2017

"""Convert an index value to position details."""

2014

2018

bits = node[2].split(' ')

2015

2019

# It would be nice not to read the entire gzip.

2016

# TODO: Intern the start and stop integers. They are *very* common

2017

# between all records in the index. See revno 4781

2020

# start and stop are put into _int_cache because they are very common.

2021

# They define the 'group' that an entry is in, and many groups can have

2022

# thousands of objects.

2023

# Branching Launchpad, for example, saves ~600k integers, at 12 bytes

2024

# each, or about 7MB. Note that it might be even more when you consider

2025

# how PyInt is allocated in separate slabs. And you can't return a slab

2026

# to the OS if even 1 int on it is in use. Note though that Python uses

2027

# a LIFO when re-using PyInt slots, which probably causes more

2028

# fragmentation.

2018

2029

start = int(bits[0])

2030

start = self._int_cache.setdefault(start, start)

2019

2031

stop = int(bits[1])

2032

stop = self._int_cache.setdefault(stop, stop)

2020

2033

basis_end = int(bits[2])

2021

2034

delta_end = int(bits[3])

2022

return node[0], start, stop, basis_end, delta_end

2035

# We can't use StaticTuple here, because node[0] is a BTreeGraphIndex

2036

# instance...

2037

return (node[0], start, stop, basis_end, delta_end)

2023

2038

2024

2039

def scan_unvalidated_index(self, graph_index):

2025

2040

"""Inform this _GCGraphIndex that there is an unvalidated index.

Older »