~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/repofmt/pack_repo.py

Committer: John Arbash Meinel
Date: 2009-09-09 18:52:56 UTC
mto: (4634.52.16 2.0)
mto: This revision was merged to the branch mainline in revision 4738.
Revision ID: john@arbash-meinel.com-20090909185256-rdaxy872xauoem46

Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).

The basic issue is that the access pattern for chk pages is fully random,
because the keys are 'sha1' handles. As such, we have no locality of
reference, and downloading a large project over HTTP can cause us to
redownload all of the .cix pages multiple times. The bug report
noticed the pages getting downloaded 4-5 times.
This was causing a significant increase in the total bytes downloaded.
(For Launchpad, downloading the 10MB cix file 5 times was 50MB, out of
around 160MB total download.)

files modified:
NEWS

bzrlib/btree_index.py

bzrlib/index.py

bzrlib/repofmt/pack_repo.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_index.py

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/pack_repo.py

224

return self.index_name('text', name)

225

226

def _replace_index_with_readonly(self, index_type):

227

unlimited_cache = False

228

if index_type == 'chk':

229

unlimited_cache = True

227

230

setattr(self, index_type + '_index',

228

231

self.index_class(self.index_transport,

229

232

self.index_name(index_type, self.name),

230

self.index_sizes[self.index_offset(index_type)]))

233

self.index_sizes[self.index_offset(index_type)],

234

unlimited_cache=unlimited_cache))

231

235

232

236

233

237

class ExistingPack(Pack):

1674

1678

txt_index = self._make_index(name, '.tix')

1675

1679

sig_index = self._make_index(name, '.six')

1676

1680

if self.chk_index is not None:

1677

chk_index = self._make_index(name, '.cix')

1681

chk_index = self._make_index(name, '.cix', unlimited_cache=True)

1678

1682

else:

1679

1683

chk_index = None

1680

1684

result = ExistingPack(self._pack_transport, name, rev_index,

1699

1703

txt_index = self._make_index(name, '.tix', resume=True)

1700

1704

sig_index = self._make_index(name, '.six', resume=True)

1701

1705

if self.chk_index is not None:

1702

chk_index = self._make_index(name, '.cix', resume=True)

1706

chk_index = self._make_index(name, '.cix', resume=True,

1707

unlimited_cache=True)

1703

1708

else:

1704

1709

chk_index = None

1705

1710

result = self.resumed_pack_factory(name, rev_index, inv_index,

1735

1740

return self._index_class(self.transport, 'pack-names', None

1736

1741

).iter_all_entries()

1737

1742

1738

def _make_index(self, name, suffix, resume=False):

1743

def _make_index(self, name, suffix, resume=False, unlimited_cache=False):

1739

1744

size_offset = self._suffix_offsets[suffix]

1740

1745

index_name = name + suffix

1741

1746

if resume:

1744

1749

else:

1745

1750

transport = self._index_transport

1746

1751

index_size = self._names[name][size_offset]

1747

return self._index_class(transport, index_name, index_size)

1752

return self._index_class(transport, index_name, index_size,

1753

unlimited_cache=unlimited_cache)

1748

1754

1749

1755

def _max_pack_count(self, total_revisions):

1750

1756

"""Return the maximum number of packs to use for total revisions.

Older »