~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/chunk_writer.py

  • Committer: John Arbash Meinel
  • Date: 2008-08-22 20:33:20 UTC
  • mto: This revision was merged to the branch mainline in revision 3653.
  • Revision ID: john@arbash-meinel.com-20080822203320-y98xykrjms4r5goj
Using a different safety margin for the first repack,
and using 2 repacks gives us effectively the same result, while
still making it safe for arbitary data. (With 1-repack, it does
effect the results 3-5%, and with 2-repacks the second margin
gives the same results.
Also, we now can get about 2-3:1 of lines that are 'blindly' added versus
ones which are added with a SYNC.

Show diffs side-by-side

added added

removed removed

Lines of Context:
21
21
from zlib import Z_FINISH, Z_SYNC_FLUSH
22
22
 
23
23
# [max_repack, buffer_full, repacks_with_space, min_compression,
24
 
#  total_bytes_in, total_bytes_out, avg_comp]
25
 
_stats = [0, 0, 0, 999, 0, 0, 0]
 
24
#  total_bytes_in, total_bytes_out, avg_comp,
 
25
#  bytes_autopack, bytes_sync_packed]
 
26
_stats = [0, 0, 0, 999, 0, 0, 0, 0, 0]
26
27
 
27
28
class ChunkWriter(object):
28
29
    """ChunkWriter allows writing of compressed data with a fixed size.
169
170
            self.bytes_in.append(bytes)
170
171
            self.seen_bytes += len(bytes)
171
172
            self.unflushed_in_bytes += len(bytes)
 
173
            _stats[7] += 1 # len(bytes)
172
174
        else:
173
175
            # This may or may not fit, try to add it with Z_SYNC_FLUSH
 
176
            _stats[8] += 1 # len(bytes)
174
177
            out = comp.compress(bytes)
175
178
            out += comp.flush(Z_SYNC_FLUSH)
176
179
            self.unflushed_in_bytes = 0
181
184
            # We are a bit extra conservative, because it seems that you *can*
182
185
            # get better compression with Z_SYNC_FLUSH than a full compress. It
183
186
            # is probably very rare, but we were able to trigger it.
184
 
            if self.bytes_out_len + 100 <= capacity:
 
187
            if self.num_repack == 0:
 
188
                safety_margin = 100
 
189
            else:
 
190
                safety_margin = 10
 
191
            if self.bytes_out_len + safety_margin <= capacity:
185
192
                # It fit, so mark it added
186
193
                self.bytes_in.append(bytes)
187
194
                self.seen_bytes += len(bytes)