1
# Copyright (C) 2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Code to estimate the entropy of content"""
22
class ZLibEstimator(object):
23
"""Uses zlib.compressobj to estimate compressed size."""
25
def __init__(self, target_size, min_compression=2.0):
26
"""Create a new estimator.
28
:param target_size: The desired size of the compressed content.
29
:param min_compression: Estimated minimum compression. By default we
30
assume that the content is 'text', which means a min compression of
33
self._target_size = target_size
34
self._compressor = zlib.compressobj()
35
self._uncompressed_size_added = 0
36
self._compressed_size_added = 0
37
self._unflushed_size_added = 0
38
self._estimated_compression = 2.0
40
def add_content(self, content):
41
self._uncompressed_size_added += len(content)
42
self._unflushed_size_added += len(content)
43
z_size = len(self._compressor.compress(content))
45
self._record_z_len(z_size)
47
def _record_z_len(self, count):
48
# We got some compressed bytes, update the counters
49
self._compressed_size_added += count
50
self._unflushed_size_added = 0
51
# So far we've read X uncompressed bytes, and written Y compressed
52
# bytes. We should have a decent estimate of the final compression.
53
self._estimated_compression = (float(self._uncompressed_size_added)
54
/ self._compressed_size_added)
57
"""Have we reached the target size?"""
58
if self._unflushed_size_added:
59
remaining_size = self._target_size - self._compressed_size_added
60
# Estimate how much compressed content the unflushed data will
62
est_z_size = (self._unflushed_size_added /
63
self._estimated_compression)
64
if est_z_size >= remaining_size:
65
# We estimate we are close to remaining
66
z_size = len(self._compressor.flush(zlib.Z_SYNC_FLUSH))
67
self._record_z_len(z_size)
68
return self._compressed_size_added >= self._target_size