1
# Copyright (C) 2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Code to estimate the entropy of content"""
19
from __future__ import absolute_import
24
class ZLibEstimator(object):
25
"""Uses zlib.compressobj to estimate compressed size."""
27
def __init__(self, target_size, min_compression=2.0):
28
"""Create a new estimator.
30
:param target_size: The desired size of the compressed content.
31
:param min_compression: Estimated minimum compression. By default we
32
assume that the content is 'text', which means a min compression of
35
self._target_size = target_size
36
self._compressor = zlib.compressobj()
37
self._uncompressed_size_added = 0
38
self._compressed_size_added = 0
39
self._unflushed_size_added = 0
40
self._estimated_compression = 2.0
42
def add_content(self, content):
43
self._uncompressed_size_added += len(content)
44
self._unflushed_size_added += len(content)
45
z_size = len(self._compressor.compress(content))
47
self._record_z_len(z_size)
49
def _record_z_len(self, count):
50
# We got some compressed bytes, update the counters
51
self._compressed_size_added += count
52
self._unflushed_size_added = 0
53
# So far we've read X uncompressed bytes, and written Y compressed
54
# bytes. We should have a decent estimate of the final compression.
55
self._estimated_compression = (float(self._uncompressed_size_added)
56
/ self._compressed_size_added)
59
"""Have we reached the target size?"""
60
if self._unflushed_size_added:
61
remaining_size = self._target_size - self._compressed_size_added
62
# Estimate how much compressed content the unflushed data will
64
est_z_size = (self._unflushed_size_added /
65
self._estimated_compression)
66
if est_z_size >= remaining_size:
67
# We estimate we are close to remaining
68
z_size = len(self._compressor.flush(zlib.Z_SYNC_FLUSH))
69
self._record_z_len(z_size)
70
return self._compressed_size_added >= self._target_size