~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

  • Committer: John Arbash Meinel
  • Date: 2009-09-09 18:52:56 UTC
  • mto: (4634.52.16 2.0)
  • mto: This revision was merged to the branch mainline in revision 4738.
  • Revision ID: john@arbash-meinel.com-20090909185256-rdaxy872xauoem46
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).

The basic issue is that the access pattern for chk pages is fully random,
because the keys are 'sha1' handles. As such, we have no locality of
reference, and downloading a large project over HTTP can cause us to
redownload all of the .cix pages multiple times. The bug report
noticed the pages getting downloaded 4-5 times.
This was causing a significant increase in the total bytes downloaded.
(For Launchpad, downloading the 10MB cix file 5 times was 50MB, out of
around 160MB total download.)

Show diffs side-by-side

added added

removed removed

Lines of Context:
224
224
        return self.index_name('text', name)
225
225
 
226
226
    def _replace_index_with_readonly(self, index_type):
 
227
        unlimited_cache = False
 
228
        if index_type == 'chk':
 
229
            unlimited_cache = True
227
230
        setattr(self, index_type + '_index',
228
231
            self.index_class(self.index_transport,
229
232
                self.index_name(index_type, self.name),
230
 
                self.index_sizes[self.index_offset(index_type)]))
 
233
                self.index_sizes[self.index_offset(index_type)],
 
234
                unlimited_cache=unlimited_cache))
231
235
 
232
236
 
233
237
class ExistingPack(Pack):
1674
1678
            txt_index = self._make_index(name, '.tix')
1675
1679
            sig_index = self._make_index(name, '.six')
1676
1680
            if self.chk_index is not None:
1677
 
                chk_index = self._make_index(name, '.cix')
 
1681
                chk_index = self._make_index(name, '.cix', unlimited_cache=True)
1678
1682
            else:
1679
1683
                chk_index = None
1680
1684
            result = ExistingPack(self._pack_transport, name, rev_index,
1699
1703
            txt_index = self._make_index(name, '.tix', resume=True)
1700
1704
            sig_index = self._make_index(name, '.six', resume=True)
1701
1705
            if self.chk_index is not None:
1702
 
                chk_index = self._make_index(name, '.cix', resume=True)
 
1706
                chk_index = self._make_index(name, '.cix', resume=True,
 
1707
                                             unlimited_cache=True)
1703
1708
            else:
1704
1709
                chk_index = None
1705
1710
            result = self.resumed_pack_factory(name, rev_index, inv_index,
1735
1740
        return self._index_class(self.transport, 'pack-names', None
1736
1741
                ).iter_all_entries()
1737
1742
 
1738
 
    def _make_index(self, name, suffix, resume=False):
 
1743
    def _make_index(self, name, suffix, resume=False, unlimited_cache=False):
1739
1744
        size_offset = self._suffix_offsets[suffix]
1740
1745
        index_name = name + suffix
1741
1746
        if resume:
1744
1749
        else:
1745
1750
            transport = self._index_transport
1746
1751
            index_size = self._names[name][size_offset]
1747
 
        return self._index_class(transport, index_name, index_size)
 
1752
        return self._index_class(transport, index_name, index_size,
 
1753
                                 unlimited_cache=unlimited_cache)
1748
1754
 
1749
1755
    def _max_pack_count(self, total_revisions):
1750
1756
        """Return the maximum number of packs to use for total revisions.