6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
1 |
# Copyright (C) 2011 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Code to estimate the entropy of content"""
|
|
18 |
||
19 |
import zlib |
|
20 |
||
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
21 |
|
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
22 |
class ZLibEstimator(object): |
23 |
"""Uses zlib.compressobj to estimate compressed size."""
|
|
24 |
||
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
25 |
def __init__(self, target_size, min_compression=2.0): |
26 |
"""Create a new estimator.
|
|
27 |
||
28 |
:param target_size: The desired size of the compressed content.
|
|
29 |
:param min_compression: Estimated minimum compression. By default we
|
|
30 |
assume that the content is 'text', which means a min compression of
|
|
31 |
about 2:1.
|
|
32 |
"""
|
|
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
33 |
self._target_size = target_size |
34 |
self._compressor = zlib.compressobj() |
|
35 |
self._uncompressed_size_added = 0 |
|
36 |
self._compressed_size_added = 0 |
|
37 |
self._unflushed_size_added = 0 |
|
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
38 |
self._estimated_compression = 2.0 |
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
39 |
|
40 |
def add_content(self, content): |
|
41 |
self._uncompressed_size_added += len(content) |
|
42 |
self._unflushed_size_added += len(content) |
|
43 |
z_size = len(self._compressor.compress(content)) |
|
44 |
if z_size > 0: |
|
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
45 |
self._record_z_len(z_size) |
46 |
||
47 |
def _record_z_len(self, count): |
|
48 |
# We got some compressed bytes, update the counters
|
|
49 |
self._compressed_size_added += count |
|
50 |
self._unflushed_size_added = 0 |
|
51 |
# So far we've read X uncompressed bytes, and written Y compressed
|
|
52 |
# bytes. We should have a decent estimate of the final compression.
|
|
53 |
self._estimated_compression = (float(self._uncompressed_size_added) |
|
54 |
/ self._compressed_size_added) |
|
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
55 |
|
56 |
def full(self): |
|
57 |
"""Have we reached the target size?"""
|
|
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
58 |
if self._unflushed_size_added: |
59 |
remaining_size = self._target_size - self._compressed_size_added |
|
60 |
# Estimate how much compressed content the unflushed data will
|
|
61 |
# consume
|
|
62 |
est_z_size = (self._unflushed_size_added / |
|
63 |
self._estimated_compression) |
|
64 |
if est_z_size >= remaining_size: |
|
65 |
# We estimate we are close to remaining
|
|
66 |
z_size = len(self._compressor.flush(zlib.Z_SYNC_FLUSH)) |
|
67 |
self._record_z_len(z_size) |
|
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
68 |
return self._compressed_size_added >= self._target_size |