~bzr-pqm/bzr/bzr.dev

5365.5.3 by John Arbash Meinel
The results were just too promising. faster *and* better memory.
1
# Copyright (C) 2010 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
#
17
18
"""Direct tests of the btree serializer extension"""
19
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
20
import binascii
5365.5.13 by John Arbash Meinel
Populate the offsets array.
21
import bisect
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
22
5365.5.3 by John Arbash Meinel
The results were just too promising. faster *and* better memory.
23
from bzrlib import tests
24
25
from bzrlib.tests.test_btree_index import compiled_btreeparser_feature
26
27
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
28
class TestBtreeSerializer(tests.TestCase):
5365.5.3 by John Arbash Meinel
The results were just too promising. faster *and* better memory.
29
30
    _test_needs_features = [compiled_btreeparser_feature]
31
32
    def setUp(self):
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
33
        super(TestBtreeSerializer, self).setUp()
5365.5.3 by John Arbash Meinel
The results were just too promising. faster *and* better memory.
34
        self.module = compiled_btreeparser_feature.module
35
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
36
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
37
class TestHexAndUnhex(TestBtreeSerializer):
38
39
    def assertHexlify(self, as_binary):
40
        self.assertEqual(binascii.hexlify(as_binary),
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
41
                         self.module._py_hexlify(as_binary))
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
42
43
    def assertUnhexlify(self, as_hex):
44
        ba_unhex = binascii.unhexlify(as_hex)
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
45
        mod_unhex = self.module._py_unhexlify(as_hex)
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
46
        if ba_unhex != mod_unhex:
47
            if mod_unhex is None:
48
                mod_hex = '<None>'
49
            else:
50
                mod_hex = binascii.hexlify(mod_unhex)
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
51
            self.fail('_py_unhexlify returned a different answer'
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
52
                      ' from binascii:\n    %s\n != %s'
53
                      % (binascii.hexlify(ba_unhex), mod_hex))
54
55
    def assertFailUnhexlify(self, as_hex):
56
        # Invalid hex content
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
57
        self.assertIs(None, self.module._py_unhexlify(as_hex))
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
58
59
    def test_to_hex(self):
60
        raw_bytes = ''.join(map(chr, range(256)))
61
        for i in range(0, 240, 20):
62
            self.assertHexlify(raw_bytes[i:i+20])
63
        self.assertHexlify(raw_bytes[240:]+raw_bytes[0:4])
64
65
    def test_from_hex(self):
66
        self.assertUnhexlify('0123456789abcdef0123456789abcdef01234567')
67
        self.assertUnhexlify('123456789abcdef0123456789abcdef012345678')
68
        self.assertUnhexlify('0123456789ABCDEF0123456789ABCDEF01234567')
69
        self.assertUnhexlify('123456789ABCDEF0123456789ABCDEF012345678')
70
        hex_chars = binascii.hexlify(''.join(map(chr, range(256))))
71
        for i in range(0, 480, 40):
72
            self.assertUnhexlify(hex_chars[i:i+40])
73
        self.assertUnhexlify(hex_chars[480:]+hex_chars[0:8])
74
75
    def test_from_invalid_hex(self):
76
        self.assertFailUnhexlify('123456789012345678901234567890123456789X')
77
        self.assertFailUnhexlify('12345678901234567890123456789012345678X9')
78
79
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
80
_hex_form = '123456789012345678901234567890abcdefabcd'
81
82
class Test_KeyToSha1(TestBtreeSerializer):
83
84
    def assertKeyToSha1(self, expected, key):
85
        if expected is None:
86
            expected_bin = None
87
        else:
88
            expected_bin = binascii.unhexlify(expected)
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
89
        actual_sha1 = self.module._py_key_to_sha1(key)
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
90
        if expected_bin != actual_sha1:
91
            actual_hex_sha1 = None
92
            if actual_sha1 is not None:
93
                actual_hex_sha1 = binascii.hexlify(actual_sha1)
94
            self.fail('_key_to_sha1 returned:\n    %s\n != %s'
95
                      % (actual_sha1, expected))
96
97
    def test_simple(self):
98
        self.assertKeyToSha1(_hex_form, ('sha1:' + _hex_form,))
99
100
    def test_invalid_not_tuple(self):
101
        self.assertKeyToSha1(None, _hex_form)
102
        self.assertKeyToSha1(None, 'sha1:' + _hex_form)
103
104
    def test_invalid_empty(self):
105
        self.assertKeyToSha1(None, ())
106
107
    def test_invalid_not_string(self):
108
        self.assertKeyToSha1(None, (None,))
109
        self.assertKeyToSha1(None, (list(_hex_form),))
110
111
    def test_invalid_not_sha1(self):
112
        self.assertKeyToSha1(None, (_hex_form,))
113
        self.assertKeyToSha1(None, ('sha2:' + _hex_form,))
114
115
    def test_invalid_not_hex(self):
116
        self.assertKeyToSha1(None,
117
            ('sha1:abcdefghijklmnopqrstuvwxyz12345678901234',))
118
119
120
class Test_Sha1ToKey(TestBtreeSerializer):
121
122
    def assertSha1ToKey(self, hex_sha1):
123
        bin_sha1 = binascii.unhexlify(hex_sha1)
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
124
        key = self.module._py_sha1_to_key(bin_sha1)
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
125
        self.assertEqual(('sha1:' + hex_sha1,), key)
126
127
    def test_simple(self):
128
        self.assertSha1ToKey(_hex_form)
129
130
131
_one_key_content = """type=leaf
132
sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4
133
"""
134
5365.5.8 by John Arbash Meinel
Add tests that we handle sizes close to and >2GB properly.
135
_large_offsets = """type=leaf
136
sha1:123456789012345678901234567890abcdefabcd\x00\x0012345678901 1234567890 0 1
137
sha1:abcd123456789012345678901234567890abcdef\x00\x002147483648 2147483647 0 1
138
sha1:abcdefabcd123456789012345678901234567890\x00\x004294967296 4294967295 4294967294 1
139
"""
140
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
141
_multi_key_content = """type=leaf
5365.5.13 by John Arbash Meinel
Populate the offsets array.
142
sha1:c80c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
143
sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1
144
sha1:c8e240de74fb1ed08fa08d38063f6a6a91462a81\x00\x002 2 2 2
145
sha1:cda39a3ee5e6b4b0d3255bfef95601890afd8070\x00\x003 3 3 3
146
sha1:cdf51e37c269aa94d38f93e537bf6e2020b21406\x00\x004 4 4 4
147
sha1:ce0c9035898dd52fc65c41454cec9c4d2611bfb3\x00\x005 5 5 5
148
sha1:ce93b4e3c464ffd51732fbd6ded717e9efda28aa\x00\x006 6 6 6
149
sha1:cf7a9e24777ec23212c54d7a350bc5bea5477fdb\x00\x007 7 7 7
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
150
"""
151
5365.5.15 by John Arbash Meinel
Handle the case where there are more than 255 entries.
152
_multi_key_same_offset = """type=leaf
153
sha1:080c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
154
sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1
155
sha1:cd0c9035898dd52fc65c41454cec9c4d2611bfb3\x00\x002 2 2 2
156
sha1:cda39a3ee5e6b4b0d3255bfef95601890afd8070\x00\x003 3 3 3
157
sha1:cde240de74fb1ed08fa08d38063f6a6a91462a81\x00\x004 4 4 4
158
sha1:cdf51e37c269aa94d38f93e537bf6e2020b21406\x00\x005 5 5 5
159
sha1:ce7a9e24777ec23212c54d7a350bc5bea5477fdb\x00\x006 6 6 6
160
sha1:ce93b4e3c464ffd51732fbd6ded717e9efda28aa\x00\x007 7 7 7
161
"""
162
163
_common_32_bits = """type=leaf
164
sha1:123456784a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
165
sha1:1234567874fb1ed08fa08d38063f6a6a91462a81\x00\x001 1 1 1
166
sha1:12345678777ec23212c54d7a350bc5bea5477fdb\x00\x002 2 2 2
167
sha1:123456787faa5a7fce15d1ddcb9eaeaea377667b\x00\x003 3 3 3
168
sha1:12345678898dd52fc65c41454cec9c4d2611bfb3\x00\x004 4 4 4
169
sha1:12345678c269aa94d38f93e537bf6e2020b21406\x00\x005 5 5 5
170
sha1:12345678c464ffd51732fbd6ded717e9efda28aa\x00\x006 6 6 6
171
sha1:12345678e5e6b4b0d3255bfef95601890afd8070\x00\x007 7 7 7
172
"""
173
174
5365.5.4 by John Arbash Meinel
Some direct tests of the hex and unhex functions.
175
class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
176
5365.5.3 by John Arbash Meinel
The results were just too promising. faster *and* better memory.
177
    def assertInvalid(self, bytes):
178
        """Ensure that we get a proper error when trying to parse invalid bytes.
179
180
        (mostly this is testing that bad input doesn't cause us to segfault)
181
        """
182
        self.assertRaises((ValueError, TypeError), 
183
                          self.module._parse_into_chk, bytes, 1, 0)
184
185
    def test_non_str(self):
186
        self.assertInvalid(u'type=leaf\n')
187
188
    def test_not_leaf(self):
189
        self.assertInvalid('type=internal\n')
190
191
    def test_empty_leaf(self):
192
        leaf = self.module._parse_into_chk('type=leaf\n', 1, 0)
193
        self.assertEqual(0, len(leaf))
194
        self.assertEqual([], leaf.all_items())
195
        self.assertEqual([], leaf.all_keys())
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
196
        # It should allow any key to be queried
197
        self.assertFalse(('key',) in leaf)
198
199
    def test_one_key_leaf(self):
200
        leaf = self.module._parse_into_chk(_one_key_content, 1, 0)
201
        self.assertEqual(1, len(leaf))
5365.5.8 by John Arbash Meinel
Add tests that we handle sizes close to and >2GB properly.
202
        sha_key = ('sha1:' + _hex_form,)
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
203
        self.assertEqual([sha_key], leaf.all_keys())
204
        self.assertEqual([(sha_key, ('1 2 3 4', ()))], leaf.all_items())
205
        self.assertTrue(sha_key in leaf)
206
5365.5.8 by John Arbash Meinel
Add tests that we handle sizes close to and >2GB properly.
207
    def test_large_offsets(self):
208
        leaf = self.module._parse_into_chk(_large_offsets, 1, 0)
209
        self.assertEqual(['12345678901 1234567890 0 1',
210
                          '2147483648 2147483647 0 1',
211
                          '4294967296 4294967295 4294967294 1',
212
                         ], [x[1][0] for x in leaf.all_items()])
213
5365.5.5 by John Arbash Meinel
Found some more bugs. I wasn't incrementing one of the pointers,
214
    def test_many_key_leaf(self):
215
        leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
5365.5.9 by John Arbash Meinel
Add some tests that key lookup works.
216
        self.assertEqual(8, len(leaf))
217
        all_keys = leaf.all_keys()
218
        self.assertEqual(8, len(leaf.all_keys()))
219
        for idx, key in enumerate(all_keys):
220
            self.assertEqual(str(idx), leaf[key][0].split()[0])
5365.5.13 by John Arbash Meinel
Populate the offsets array.
221
5365.5.16 by John Arbash Meinel
Trim the class a little bit.
222
    def test_common_shift(self):
5365.5.13 by John Arbash Meinel
Populate the offsets array.
223
        # The keys were deliberately chosen so that the first 5 bits all
224
        # overlapped, it also happens that a later bit overlaps
225
        # Note that by 'overlap' we mean that given bit is either on in all
226
        # keys, or off in all keys
227
        leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
5365.5.15 by John Arbash Meinel
Handle the case where there are more than 255 entries.
228
        self.assertEqual(19, leaf.common_shift)
5365.5.13 by John Arbash Meinel
Populate the offsets array.
229
        # The interesting byte for each key is
230
        # (defined as the 8-bits that come after the common prefix)
231
        lst = [1, 13, 28, 180, 190, 193, 210, 239]
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
232
        offsets = leaf._get_offsets()
5365.5.13 by John Arbash Meinel
Populate the offsets array.
233
        self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)],
234
                         offsets)
235
        for idx, val in enumerate(lst):
236
            self.assertEqual(idx, offsets[val])
5365.5.15 by John Arbash Meinel
Handle the case where there are more than 255 entries.
237
        for idx, key in enumerate(leaf.all_keys()):
238
            self.assertEqual(str(idx), leaf[key][0].split()[0])
239
240
    def test_multi_key_same_offset(self):
241
        # there is no common prefix, though there are some common bits
242
        leaf = self.module._parse_into_chk(_multi_key_same_offset, 1, 0)
243
        self.assertEqual(24, leaf.common_shift)
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
244
        offsets = leaf._get_offsets()
5365.5.15 by John Arbash Meinel
Handle the case where there are more than 255 entries.
245
        # The interesting byte is just the first 8-bits of the key
246
        lst = [8, 200, 205, 205, 205, 205, 206, 206]
247
        self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)],
248
                         offsets)
249
        for val in lst:
250
            self.assertEqual(lst.index(val), offsets[val])
251
        for idx, key in enumerate(leaf.all_keys()):
252
            self.assertEqual(str(idx), leaf[key][0].split()[0])
253
254
    def test_all_common_prefix(self):
255
        # The first 32 bits of all hashes are the same. This is going to be
256
        # pretty much impossible, but I don't want to fail because of this
257
        leaf = self.module._parse_into_chk(_common_32_bits, 1, 0)
258
        self.assertEqual(0, leaf.common_shift)
259
        lst = [0x78] * 8
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
260
        offsets = leaf._get_offsets()
5365.5.15 by John Arbash Meinel
Handle the case where there are more than 255 entries.
261
        self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)],
262
                         offsets)
263
        for val in lst:
264
            self.assertEqual(lst.index(val), offsets[val])
265
        for idx, key in enumerate(leaf.all_keys()):
266
            self.assertEqual(str(idx), leaf[key][0].split()[0])
267
268
    def test_many_entries(self):
269
        # Again, this is almost impossible, but we should still work
270
        # It would be hard to fit more that 120 entries in a 4k page, much less
271
        # more than 256 of them. but hey, weird stuff happens sometimes
272
        lines = ['type=leaf\n']
273
        for i in range(500):
274
            key_str = 'sha1:%04x%s' % (i, _hex_form[:36])
275
            key = (key_str,)
276
            lines.append('%s\0\0%d %d %d %d\n' % (key_str, i, i, i, i))
277
        bytes = ''.join(lines)
278
        leaf = self.module._parse_into_chk(bytes, 1, 0)
279
        self.assertEqual(24-7, leaf.common_shift)
5365.5.24 by John Arbash Meinel
Change the _test names to _get and _py so that it doesn't provoke bad smells :)
280
        offsets = leaf._get_offsets()
5365.5.15 by John Arbash Meinel
Handle the case where there are more than 255 entries.
281
        # This is the interesting bits for each entry
282
        lst = [x // 2 for x in range(500)]
283
        expected_offsets = [x * 2 for x in range(128)] + [255]*129
284
        self.assertEqual(expected_offsets, offsets)
285
        # We truncate because offsets is an unsigned char. So the bisection
286
        # will just say 'greater than the last one' for all the rest
287
        lst = lst[:255]
288
        self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)],
289
                         offsets)
290
        for val in lst:
291
            self.assertEqual(lst.index(val), offsets[val])
292
        for idx, key in enumerate(leaf.all_keys()):
293
            self.assertEqual(str(idx), leaf[key][0].split()[0])
5365.5.23 by John Arbash Meinel
A __sizeof__ check that ensure we are getting what we are looking for.
294
295
    def test__sizeof__(self):
296
        # We can't use the exact numbers because of platform variations, etc.
297
        # But what we really care about is that it does get bigger with more
298
        # content.
299
        leaf0 = self.module._parse_into_chk('type=leaf\n', 1, 0)
300
        leaf1 = self.module._parse_into_chk(_one_key_content, 1, 0)
301
        leafN = self.module._parse_into_chk(_multi_key_content, 1, 0)
302
        sizeof_1 = leaf1.__sizeof__() - leaf0.__sizeof__()
303
        self.assertTrue(sizeof_1 > 0)
304
        sizeof_N = leafN.__sizeof__() - leaf0.__sizeof__()
305
        self.assertEqual(sizeof_1 * len(leafN), sizeof_N)