6118.2.9
by John Arbash Meinel
Add some tests for how the estimator works. |
1 |
# Copyright (C) 2011 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Tests for our estimation of compressed content."""
|
|
18 |
||
19 |
import zlib |
|
20 |
import hashlib |
|
21 |
||
22 |
from bzrlib import ( |
|
23 |
estimate_compressed_size, |
|
24 |
tests, |
|
25 |
)
|
|
26 |
||
27 |
||
28 |
class TestZLibEstimator(tests.TestCase): |
|
29 |
||
30 |
def get_slightly_random_content(self, length, seed=''): |
|
31 |
"""We generate some hex-data that can be seeded.
|
|
32 |
||
33 |
The output should be deterministic, but the data stream is effectively
|
|
34 |
random.
|
|
35 |
"""
|
|
36 |
h = hashlib.md5(seed) |
|
37 |
hex_content = [] |
|
38 |
count = 0 |
|
39 |
while count < length: |
|
40 |
b = h.hexdigest() |
|
41 |
hex_content.append(b) |
|
42 |
h.update(b) |
|
43 |
count += len(b) |
|
44 |
return ''.join(hex_content)[:length] |
|
45 |
||
46 |
def test_adding_content(self): |
|
47 |
ze = estimate_compressed_size.ZLibEstimator(32000) |
|
48 |
raw_data = self.get_slightly_random_content(60000) |
|
49 |
block_size = 1000 |
|
50 |
for start in xrange(0, len(raw_data), block_size): |
|
51 |
ze.add_content(raw_data[start:start+block_size]) |
|
52 |
if ze.full(): |
|
53 |
break
|
|
54 |
# Practise showed that 'start' was 56000. However, zlib is a bit
|
|
55 |
# platform dependent, so give it +/- 5%.
|
|
56 |
self.assertTrue(54000 <= start <= 58000, |
|
57 |
"Unexpected amount of raw data added: %d bytes" % (start,)) |
|
58 |
# The real compression should be 'close' to 32000, real measurement was
|
|
59 |
# 32401
|
|
60 |
raw_comp = zlib.compress(raw_data[:start]) |
|
61 |
self.assertTrue(31000 < len(raw_comp) < 33000, |
|
62 |
"Unexpected compressed size: %d bytes" % (len(raw_comp),)) |
|
63 |
||
64 |
def test_adding_more_content(self): |
|
65 |
ze = estimate_compressed_size.ZLibEstimator(64000) |
|
66 |
raw_data = self.get_slightly_random_content(150000) |
|
67 |
block_size = 1000 |
|
68 |
for start in xrange(0, len(raw_data), block_size): |
|
69 |
ze.add_content(raw_data[start:start+block_size]) |
|
70 |
if ze.full(): |
|
71 |
break
|
|
72 |
# Practise showed that 'start' was 112000
|
|
73 |
self.assertTrue(110000 <= start <= 114000, |
|
74 |
"Unexpected amount of raw data added: %d bytes" % (start,)) |
|
75 |
# The real compression should be 'close' to 32000, real measurement was
|
|
76 |
# 32401
|
|
77 |
raw_comp = zlib.compress(raw_data[:start]) |
|
78 |
self.assertTrue(63000 < len(raw_comp) < 65000, |
|
79 |
"Unexpected compressed size: %d bytes" % (len(raw_comp),)) |