~bzr-pqm/bzr/bzr.dev

6118.2.3 by John Arbash Meinel
An 'entropy' computation.
1
# Copyright (C) 2011 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Code to estimate the entropy of content"""
18
19
import zlib
20
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
21
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
22
class ZLibEstimator(object):
23
    """Uses zlib.compressobj to estimate compressed size."""
24
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
25
    def __init__(self, target_size, min_compression=2.0):
26
        """Create a new estimator.
27
28
        :param target_size: The desired size of the compressed content.
29
        :param min_compression: Estimated minimum compression. By default we
30
            assume that the content is 'text', which means a min compression of
31
            about 2:1.
32
        """
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
33
        self._target_size = target_size
34
        self._compressor = zlib.compressobj()
35
        self._uncompressed_size_added = 0
36
        self._compressed_size_added = 0
37
        self._unflushed_size_added = 0
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
38
        self._estimated_compression = 2.0
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
39
40
    def add_content(self, content):
41
        self._uncompressed_size_added += len(content)
42
        self._unflushed_size_added += len(content)
43
        z_size = len(self._compressor.compress(content))
44
        if z_size > 0:
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
45
            self._record_z_len(z_size)
46
47
    def _record_z_len(self, count):
48
        # We got some compressed bytes, update the counters
49
        self._compressed_size_added += count
50
        self._unflushed_size_added = 0
51
        # So far we've read X uncompressed bytes, and written Y compressed
52
        # bytes. We should have a decent estimate of the final compression.
53
        self._estimated_compression = (float(self._uncompressed_size_added)
54
            / self._compressed_size_added)
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
55
56
    def full(self):
57
        """Have we reached the target size?"""
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
58
        if self._unflushed_size_added:
59
            remaining_size = self._target_size - self._compressed_size_added
60
            # Estimate how much compressed content the unflushed data will
61
            # consume
62
            est_z_size = (self._unflushed_size_added /
63
                          self._estimated_compression)
64
            if est_z_size >= remaining_size:
65
                # We estimate we are close to remaining
66
                z_size = len(self._compressor.flush(zlib.Z_SYNC_FLUSH))
67
                self._record_z_len(z_size)
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
68
        return self._compressed_size_added >= self._target_size