6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
1 |
# Copyright (C) 2011 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
17 |
"""Code to estimate the entropy of content"""
|
18 |
||
6379.6.1
by Jelmer Vernooij
Import absolute_import in a few places. |
19 |
from __future__ import absolute_import |
20 |
||
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
21 |
import zlib |
22 |
||
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
23 |
|
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
24 |
class ZLibEstimator(object): |
25 |
"""Uses zlib.compressobj to estimate compressed size."""
|
|
26 |
||
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
27 |
def __init__(self, target_size, min_compression=2.0): |
28 |
"""Create a new estimator.
|
|
29 |
||
30 |
:param target_size: The desired size of the compressed content.
|
|
31 |
:param min_compression: Estimated minimum compression. By default we
|
|
32 |
assume that the content is 'text', which means a min compression of
|
|
33 |
about 2:1.
|
|
34 |
"""
|
|
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
35 |
self._target_size = target_size |
36 |
self._compressor = zlib.compressobj() |
|
37 |
self._uncompressed_size_added = 0 |
|
38 |
self._compressed_size_added = 0 |
|
39 |
self._unflushed_size_added = 0 |
|
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
40 |
self._estimated_compression = 2.0 |
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
41 |
|
42 |
def add_content(self, content): |
|
43 |
self._uncompressed_size_added += len(content) |
|
44 |
self._unflushed_size_added += len(content) |
|
45 |
z_size = len(self._compressor.compress(content)) |
|
46 |
if z_size > 0: |
|
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
47 |
self._record_z_len(z_size) |
48 |
||
49 |
def _record_z_len(self, count): |
|
50 |
# We got some compressed bytes, update the counters
|
|
51 |
self._compressed_size_added += count |
|
52 |
self._unflushed_size_added = 0 |
|
53 |
# So far we've read X uncompressed bytes, and written Y compressed
|
|
54 |
# bytes. We should have a decent estimate of the final compression.
|
|
55 |
self._estimated_compression = (float(self._uncompressed_size_added) |
|
56 |
/ self._compressed_size_added) |
|
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
57 |
|
58 |
def full(self): |
|
59 |
"""Have we reached the target size?"""
|
|
6118.2.6
by John Arbash Meinel
Updates to ZLibEstimator. |
60 |
if self._unflushed_size_added: |
61 |
remaining_size = self._target_size - self._compressed_size_added |
|
62 |
# Estimate how much compressed content the unflushed data will
|
|
63 |
# consume
|
|
64 |
est_z_size = (self._unflushed_size_added / |
|
65 |
self._estimated_compression) |
|
66 |
if est_z_size >= remaining_size: |
|
67 |
# We estimate we are close to remaining
|
|
68 |
z_size = len(self._compressor.flush(zlib.Z_SYNC_FLUSH)) |
|
69 |
self._record_z_len(z_size) |
|
6118.2.3
by John Arbash Meinel
An 'entropy' computation. |
70 |
return self._compressed_size_added >= self._target_size |