~bzr-pqm/bzr/bzr.dev

6118.2.3 by John Arbash Meinel
An 'entropy' computation.
1
# Copyright (C) 2011 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
17
"""Code to estimate the entropy of content"""
18
6379.6.1 by Jelmer Vernooij
Import absolute_import in a few places.
19
from __future__ import absolute_import
20
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
21
import zlib
22
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
23
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
24
class ZLibEstimator(object):
25
    """Uses zlib.compressobj to estimate compressed size."""
26
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
27
    def __init__(self, target_size, min_compression=2.0):
28
        """Create a new estimator.
29
30
        :param target_size: The desired size of the compressed content.
31
        :param min_compression: Estimated minimum compression. By default we
32
            assume that the content is 'text', which means a min compression of
33
            about 2:1.
34
        """
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
35
        self._target_size = target_size
36
        self._compressor = zlib.compressobj()
37
        self._uncompressed_size_added = 0
38
        self._compressed_size_added = 0
39
        self._unflushed_size_added = 0
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
40
        self._estimated_compression = 2.0
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
41
42
    def add_content(self, content):
43
        self._uncompressed_size_added += len(content)
44
        self._unflushed_size_added += len(content)
45
        z_size = len(self._compressor.compress(content))
46
        if z_size > 0:
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
47
            self._record_z_len(z_size)
48
49
    def _record_z_len(self, count):
50
        # We got some compressed bytes, update the counters
51
        self._compressed_size_added += count
52
        self._unflushed_size_added = 0
53
        # So far we've read X uncompressed bytes, and written Y compressed
54
        # bytes. We should have a decent estimate of the final compression.
55
        self._estimated_compression = (float(self._uncompressed_size_added)
56
            / self._compressed_size_added)
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
57
58
    def full(self):
59
        """Have we reached the target size?"""
6118.2.6 by John Arbash Meinel
Updates to ZLibEstimator.
60
        if self._unflushed_size_added:
61
            remaining_size = self._target_size - self._compressed_size_added
62
            # Estimate how much compressed content the unflushed data will
63
            # consume
64
            est_z_size = (self._unflushed_size_added /
65
                          self._estimated_compression)
66
            if est_z_size >= remaining_size:
67
                # We estimate we are close to remaining
68
                z_size = len(self._compressor.flush(zlib.Z_SYNC_FLUSH))
69
                self._record_z_len(z_size)
6118.2.3 by John Arbash Meinel
An 'entropy' computation.
70
        return self._compressed_size_added >= self._target_size