~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

  • Committer: John Arbash Meinel
  • Date: 2009-09-02 19:54:48 UTC
  • mto: This revision was merged to the branch mainline in revision 4672.
  • Revision ID: john@arbash-meinel.com-20090902195448-1xueel7882fbuvx3
We needed a bit more data to actually get groups doing delta-compression.
We also now allow a given group to be cut by up to 75% of current bytes before we
consider it to be 'under utilized'.
This is to match the 75% of 'full utilized' that we use later on.

Show diffs side-by-side

added added

removed removed

Lines of Context:
468
468
class _LazyGroupContentManager(object):
469
469
    """This manages a group of _LazyGroupCompressFactory objects."""
470
470
 
 
471
    _max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of
 
472
                             # current size, and still be considered
 
473
                             # resuable
 
474
    _full_block_size = 4*1024*1024
 
475
    _full_mixed_block_size = 2*1024*1024
 
476
    _full_enough_block_size = 3*1024*1024 # size at which we won't repack
 
477
    _full_enough_mixed_block_size = 2*768*1024 # 1.5MB
 
478
 
471
479
    def __init__(self, block):
472
480
        self._block = block
473
481
        # We need to preserve the ordering
587
595
            # A block of length 1 is never considered 'well utilized' :)
588
596
            return False
589
597
        action, last_byte_used, total_bytes_used = self._check_rebuild_action()
590
 
        if action is not None or total_bytes_used < self._block._content_length:
591
 
            # This block wants to trim itself somehow, which inherently means
592
 
            # that it is under-utilized, since it holds data that isn't being
593
 
            # referenced
 
598
        block_size = self._block._content_length
 
599
        if total_bytes_used < block_size * self._max_cut_fraction:
 
600
            # This block wants to trim itself small enough that we want to
 
601
            # consider it under-utilized.
594
602
            return False
595
603
        # TODO: This code is meant to be the twin of _insert_record_stream's
596
604
        #       'start_new_block' logic. It would probably be better to factor
607
615
        # object, it may actually be under-utilized. However, given that this
608
616
        # is 'pack-on-the-fly' it is probably reasonable to not repack large
609
617
        # contet blobs on-the-fly.
610
 
        if self._block._content_length >= 3*1024*1024:
 
618
        if block_size >= self._full_enough_block_size:
611
619
            return True
612
620
        # If a block is <3MB, it still may be considered 'full' if it contains
613
621
        # mixed content. The current rule is 2MB of mixed content is considered
620
628
                common_prefix = prefix
621
629
            elif prefix != common_prefix:
622
630
                # Mixed content, check the size appropriately
623
 
                if self._block._content_length >= 2*768*1024: #1.5MB
 
631
                if block_size >= self._full_enough_mixed_block_size:
624
632
                    return True
625
633
                break
626
634
        # The content failed both the mixed check and the single-content check