~bzr-pqm/bzr/bzr.dev

3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
1
# Copyright (C) 2008 Canonical Ltd
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
2
#
3
# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
16
#
17
18
"""Tests for btree indices."""
19
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
20
import pprint
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
21
import zlib
22
23
from bzrlib import (
24
    btree_index,
25
    errors,
26
    tests,
27
    )
28
from bzrlib.tests import (
29
    TestCaseWithTransport,
30
    condition_isinstance,
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
31
    multiply_tests,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
32
    split_suite_by_condition,
33
    )
34
from bzrlib.transport import get_transport
35
36
37
def load_tests(standard_tests, module, loader):
38
    # parameterise the TestBTreeNodes tests
39
    node_tests, others = split_suite_by_condition(standard_tests,
40
        condition_isinstance(TestBTreeNodes))
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
41
    import bzrlib._btree_serializer_py as py_module
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
42
    scenarios = [('python', {'parse_btree': py_module})]
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
43
    if CompiledBtreeParserFeature.available():
44
        # Is there a way to do this that gets missing feature failures rather
45
        # than no indication to the user?
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
46
        import bzrlib._btree_serializer_c as c_module
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
47
        scenarios.append(('C', {'parse_btree': c_module}))
48
    return multiply_tests(node_tests, scenarios, others)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
49
50
51
class _CompiledBtreeParserFeature(tests.Feature):
52
    def _probe(self):
53
        try:
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
54
            import bzrlib._btree_serializer_c
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
55
        except ImportError:
56
            return False
57
        return True
58
59
    def feature_name(self):
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
60
        return 'bzrlib._btree_serializer_c'
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
61
62
CompiledBtreeParserFeature = _CompiledBtreeParserFeature()
63
64
65
class BTreeTestCase(TestCaseWithTransport):
66
    # test names here are suffixed by the key length and reference list count
67
    # that they test.
68
69
    def setUp(self):
70
        TestCaseWithTransport.setUp(self)
71
        self._original_header = btree_index._RESERVED_HEADER_BYTES
72
        def restore():
73
            btree_index._RESERVED_HEADER_BYTES = self._original_header
74
        self.addCleanup(restore)
75
        btree_index._RESERVED_HEADER_BYTES = 100
76
77
    def make_nodes(self, count, key_elements, reference_lists):
78
        """Generate count*key_elements sample nodes."""
79
        keys = []
80
        for prefix_pos in range(key_elements):
81
            if key_elements - 1:
82
                prefix = (str(prefix_pos) * 40,)
83
            else:
84
                prefix = ()
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
85
            for pos in xrange(count):
86
                # TODO: This creates odd keys. When count == 100,000, it
87
                #       creates a 240 byte key
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
88
                key = prefix + (str(pos) * 40,)
89
                value = "value:%s" % pos
90
                if reference_lists:
91
                    # generate some references
92
                    refs = []
93
                    for list_pos in range(reference_lists):
94
                        # as many keys in each list as its index + the key depth
95
                        # mod 2 - this generates both 0 length lists and
96
                        # ones slightly longer than the number of lists.
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
97
                        # It also ensures we have non homogeneous lists.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
98
                        refs.append([])
99
                        for ref_pos in range(list_pos + pos % 2):
100
                            if pos % 2:
101
                                # refer to a nearby key
102
                                refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
103
                            else:
104
                                # serial of this ref in the ref list
105
                                refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
106
                        refs[-1] = tuple(refs[-1])
107
                    refs = tuple(refs)
108
                else:
109
                    refs = ()
110
                keys.append((key, value, refs))
111
        return keys
112
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
113
    def shrink_page_size(self):
114
        """Shrink the default page size so that less fits in a page."""
115
        old_page_size = btree_index._PAGE_SIZE
116
        def cleanup():
117
            btree_index._PAGE_SIZE = old_page_size
118
        self.addCleanup(cleanup)
119
        btree_index._PAGE_SIZE = 2048
120
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
121
122
class TestBTreeBuilder(BTreeTestCase):
123
124
    def test_empty_1_0(self):
125
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
126
        # NamedTemporaryFile dies on builder.finish().read(). weird.
127
        temp_file = builder.finish()
128
        content = temp_file.read()
129
        del temp_file
130
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
131
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
132
            "row_lengths=\n",
133
            content)
134
135
    def test_empty_2_1(self):
136
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
137
        # NamedTemporaryFile dies on builder.finish().read(). weird.
138
        temp_file = builder.finish()
139
        content = temp_file.read()
140
        del temp_file
141
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
142
            "B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
143
            "row_lengths=\n",
144
            content)
145
146
    def test_root_leaf_1_0(self):
147
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
148
        nodes = self.make_nodes(5, 1, 0)
149
        for node in nodes:
150
            builder.add_node(*node)
151
        # NamedTemporaryFile dies on builder.finish().read(). weird.
152
        temp_file = builder.finish()
153
        content = temp_file.read()
154
        del temp_file
155
        self.assertEqual(158, len(content))
156
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
157
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
158
            "row_lengths=1\n",
159
            content[:73])
160
        node_content = content[73:]
161
        node_bytes = zlib.decompress(node_content)
162
        expected_node = ("type=leaf\n"
163
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
164
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
165
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
166
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
167
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
168
        self.assertEqual(expected_node, node_bytes)
169
170
    def test_root_leaf_2_2(self):
171
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
172
        nodes = self.make_nodes(5, 2, 2)
173
        for node in nodes:
174
            builder.add_node(*node)
175
        # NamedTemporaryFile dies on builder.finish().read(). weird.
176
        temp_file = builder.finish()
177
        content = temp_file.read()
178
        del temp_file
179
        self.assertEqual(264, len(content))
180
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
181
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
182
            "row_lengths=1\n",
183
            content[:74])
184
        node_content = content[74:]
185
        node_bytes = zlib.decompress(node_content)
186
        expected_node = (
187
            "type=leaf\n"
188
            "0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
189
            "0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
190
            "0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
191
            "0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
192
            "0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
193
            "1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
194
            "1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
195
            "1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
196
            "1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
197
            "1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
198
            ""
199
            )
200
        self.assertEqual(expected_node, node_bytes)
201
202
    def test_2_leaves_1_0(self):
203
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
204
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
205
        for node in nodes:
206
            builder.add_node(*node)
207
        # NamedTemporaryFile dies on builder.finish().read(). weird.
208
        temp_file = builder.finish()
209
        content = temp_file.read()
210
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
211
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
212
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
213
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
214
            "row_lengths=1,2\n",
215
            content[:77])
216
        root = content[77:4096]
217
        leaf1 = content[4096:8192]
218
        leaf2 = content[8192:]
219
        root_bytes = zlib.decompress(root)
220
        expected_root = (
221
            "type=internal\n"
222
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
223
            ) + ("307" * 40) + "\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
224
        self.assertEqual(expected_root, root_bytes)
225
        # We already know serialisation works for leaves, check key selection:
226
        leaf1_bytes = zlib.decompress(leaf1)
227
        sorted_node_keys = sorted(node[0] for node in nodes)
228
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
229
        self.assertEqual(231, len(node.keys))
230
        self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
231
        leaf2_bytes = zlib.decompress(leaf2)
232
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
233
        self.assertEqual(400 - 231, len(node.keys))
234
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
235
236
    def test_last_page_rounded_1_layer(self):
237
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
238
        nodes = self.make_nodes(10, 1, 0)
239
        for node in nodes:
240
            builder.add_node(*node)
241
        # NamedTemporaryFile dies on builder.finish().read(). weird.
242
        temp_file = builder.finish()
243
        content = temp_file.read()
244
        del temp_file
245
        self.assertEqual(181, len(content))
246
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
247
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
248
            "row_lengths=1\n",
249
            content[:74])
250
        # Check thelast page is well formed
251
        leaf2 = content[74:]
252
        leaf2_bytes = zlib.decompress(leaf2)
253
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
254
        self.assertEqual(10, len(node.keys))
255
        sorted_node_keys = sorted(node[0] for node in nodes)
256
        self.assertEqual(sorted_node_keys, sorted(node.keys))
257
258
    def test_last_page_not_rounded_2_layer(self):
259
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
260
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
261
        for node in nodes:
262
            builder.add_node(*node)
263
        # NamedTemporaryFile dies on builder.finish().read(). weird.
264
        temp_file = builder.finish()
265
        content = temp_file.read()
266
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
267
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
268
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
269
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
270
            "row_lengths=1,2\n",
271
            content[:77])
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
272
        # Check the last page is well formed
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
273
        leaf2 = content[8192:]
274
        leaf2_bytes = zlib.decompress(leaf2)
275
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
276
        self.assertEqual(400 - 231, len(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
277
        sorted_node_keys = sorted(node[0] for node in nodes)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
278
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
279
280
    def test_three_level_tree_details(self):
281
        # The left most pointer in the second internal node in a row should
282
        # pointer to the second node that the internal node is for, _not_
283
        # the first, otherwise the first node overlaps with the last node of
284
        # the prior internal node on that row.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
285
        self.shrink_page_size()
286
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
287
        # 40K nodes is enough to create a two internal nodes on the second
288
        # level, with a 2K page size
289
        nodes = self.make_nodes(20000, 2, 2)
3641.3.5 by John Arbash Meinel
For iter_all and three_level tests adjust spill-at.
290
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
291
        for node in nodes:
292
            builder.add_node(*node)
293
        transport = get_transport('trace+' + self.get_url(''))
3641.3.7 by John Arbash Meinel
Make it easier to profile the btree writer time
294
        size = transport.put_file('index', self.time(builder.finish))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
295
        del builder
296
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
297
        # Seed the metadata, we're using internal calls now.
298
        index.key_count()
299
        self.assertEqual(3, len(index._row_lengths),
300
            "Not enough rows: %r" % index._row_lengths)
301
        self.assertEqual(4, len(index._row_offsets))
302
        self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
303
        internal_nodes = index._get_internal_nodes([0, 1, 2])
304
        root_node = internal_nodes[0]
305
        internal_node1 = internal_nodes[1]
306
        internal_node2 = internal_nodes[2]
307
        # The left most node node2 points at should be one after the right most
308
        # node pointed at by node1.
309
        self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
310
        # The left most key of the second node pointed at by internal_node2
311
        # should be its first key. We can check this by looking for its first key
312
        # in the second node it points at
313
        pos = index._row_offsets[2] + internal_node2.offset + 1
314
        leaf = index._get_leaf_nodes([pos])[pos]
315
        self.assertTrue(internal_node2.keys[0] in leaf.keys)
316
317
    def test_2_leaves_2_2(self):
318
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
319
        nodes = self.make_nodes(100, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
320
        for node in nodes:
321
            builder.add_node(*node)
322
        # NamedTemporaryFile dies on builder.finish().read(). weird.
323
        temp_file = builder.finish()
324
        content = temp_file.read()
325
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
326
        self.assertEqual(12643, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
327
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
328
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
329
            "row_lengths=1,3\n",
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
330
            content[:77])
331
        root = content[77:4096]
332
        leaf1 = content[4096:8192]
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
333
        leaf2 = content[8192:12288]
334
        leaf3 = content[12288:]
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
335
        root_bytes = zlib.decompress(root)
336
        expected_root = (
337
            "type=internal\n"
338
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
339
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
340
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
341
            )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
342
        self.assertEqual(expected_root, root_bytes)
343
        # We assume the other leaf nodes have been written correctly - layering
344
        # FTW.
345
346
    def test_spill_index_stress_1_1(self):
347
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
348
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
349
        builder.add_node(*nodes[0])
350
        # Test the parts of the index that take up memory are doing so
351
        # predictably.
352
        self.assertEqual(1, len(builder._nodes))
353
        self.assertEqual(1, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
354
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
355
        builder.add_node(*nodes[1])
356
        self.assertEqual(0, len(builder._nodes))
357
        self.assertEqual(0, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
358
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
359
        self.assertEqual(1, len(builder._backing_indices))
360
        self.assertEqual(2, builder._backing_indices[0].key_count())
361
        # now back to memory
362
        builder.add_node(*nodes[2])
363
        self.assertEqual(1, len(builder._nodes))
364
        self.assertEqual(1, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
365
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
366
        # And spills to a second backing index combing all
367
        builder.add_node(*nodes[3])
368
        self.assertEqual(0, len(builder._nodes))
369
        self.assertEqual(0, len(builder._keys))
370
        self.assertIs(None, builder._nodes_by_key)
371
        self.assertEqual(2, len(builder._backing_indices))
372
        self.assertEqual(None, builder._backing_indices[0])
373
        self.assertEqual(4, builder._backing_indices[1].key_count())
374
        # The next spills to the 2-len slot
375
        builder.add_node(*nodes[4])
376
        builder.add_node(*nodes[5])
377
        self.assertEqual(0, len(builder._nodes))
378
        self.assertEqual(0, len(builder._keys))
379
        self.assertIs(None, builder._nodes_by_key)
380
        self.assertEqual(2, len(builder._backing_indices))
381
        self.assertEqual(2, builder._backing_indices[0].key_count())
382
        self.assertEqual(4, builder._backing_indices[1].key_count())
383
        # Next spill combines
384
        builder.add_node(*nodes[6])
385
        builder.add_node(*nodes[7])
386
        self.assertEqual(3, len(builder._backing_indices))
387
        self.assertEqual(None, builder._backing_indices[0])
388
        self.assertEqual(None, builder._backing_indices[1])
389
        self.assertEqual(8, builder._backing_indices[2].key_count())
390
        # And so forth - counting up in binary.
391
        builder.add_node(*nodes[8])
392
        builder.add_node(*nodes[9])
393
        self.assertEqual(3, len(builder._backing_indices))
394
        self.assertEqual(2, builder._backing_indices[0].key_count())
395
        self.assertEqual(None, builder._backing_indices[1])
396
        self.assertEqual(8, builder._backing_indices[2].key_count())
397
        builder.add_node(*nodes[10])
398
        builder.add_node(*nodes[11])
399
        self.assertEqual(3, len(builder._backing_indices))
400
        self.assertEqual(None, builder._backing_indices[0])
401
        self.assertEqual(4, builder._backing_indices[1].key_count())
402
        self.assertEqual(8, builder._backing_indices[2].key_count())
403
        builder.add_node(*nodes[12])
404
        # Test that memory and disk are both used for query methods; and that
405
        # None is skipped over happily.
406
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
407
            list(builder.iter_all_entries()))
408
        # Two nodes - one memory one disk
409
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
410
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
411
        self.assertEqual(13, builder.key_count())
412
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
413
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
414
        builder.add_node(*nodes[13])
415
        self.assertEqual(3, len(builder._backing_indices))
416
        self.assertEqual(2, builder._backing_indices[0].key_count())
417
        self.assertEqual(4, builder._backing_indices[1].key_count())
418
        self.assertEqual(8, builder._backing_indices[2].key_count())
419
        builder.add_node(*nodes[14])
420
        builder.add_node(*nodes[15])
421
        self.assertEqual(4, len(builder._backing_indices))
422
        self.assertEqual(None, builder._backing_indices[0])
423
        self.assertEqual(None, builder._backing_indices[1])
424
        self.assertEqual(None, builder._backing_indices[2])
425
        self.assertEqual(16, builder._backing_indices[3].key_count())
426
        # Now finish, and check we got a correctly ordered tree
427
        transport = self.get_transport('')
428
        size = transport.put_file('index', builder.finish())
429
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
430
        nodes = list(index.iter_all_entries())
431
        self.assertEqual(sorted(nodes), nodes)
432
        self.assertEqual(16, len(nodes))
433
434
    def test_spill_index_stress_1_1_no_combine(self):
435
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
436
        builder.set_optimize(for_size=False, combine_backing_indices=False)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
437
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
438
        builder.add_node(*nodes[0])
439
        # Test the parts of the index that take up memory are doing so
440
        # predictably.
441
        self.assertEqual(1, len(builder._nodes))
442
        self.assertEqual(1, len(builder._keys))
443
        self.assertIs(None, builder._nodes_by_key)
444
        builder.add_node(*nodes[1])
445
        self.assertEqual(0, len(builder._nodes))
446
        self.assertEqual(0, len(builder._keys))
447
        self.assertIs(None, builder._nodes_by_key)
448
        self.assertEqual(1, len(builder._backing_indices))
449
        self.assertEqual(2, builder._backing_indices[0].key_count())
450
        # now back to memory
451
        builder.add_node(*nodes[2])
452
        self.assertEqual(1, len(builder._nodes))
453
        self.assertEqual(1, len(builder._keys))
454
        self.assertIs(None, builder._nodes_by_key)
455
        # And spills to a second backing index but doesn't combine
456
        builder.add_node(*nodes[3])
457
        self.assertEqual(0, len(builder._nodes))
458
        self.assertEqual(0, len(builder._keys))
459
        self.assertIs(None, builder._nodes_by_key)
460
        self.assertEqual(2, len(builder._backing_indices))
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
461
        for backing_index in builder._backing_indices:
462
            self.assertEqual(2, backing_index.key_count())
463
        # The next spills to the 3rd slot
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
464
        builder.add_node(*nodes[4])
465
        builder.add_node(*nodes[5])
466
        self.assertEqual(0, len(builder._nodes))
467
        self.assertEqual(0, len(builder._keys))
468
        self.assertIs(None, builder._nodes_by_key)
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
469
        self.assertEqual(3, len(builder._backing_indices))
470
        for backing_index in builder._backing_indices:
471
            self.assertEqual(2, backing_index.key_count())
472
        # Now spill a few more, and check that we don't combine
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
473
        builder.add_node(*nodes[6])
474
        builder.add_node(*nodes[7])
475
        builder.add_node(*nodes[8])
476
        builder.add_node(*nodes[9])
477
        builder.add_node(*nodes[10])
478
        builder.add_node(*nodes[11])
479
        builder.add_node(*nodes[12])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
480
        self.assertEqual(6, len(builder._backing_indices))
481
        for backing_index in builder._backing_indices:
482
            self.assertEqual(2, backing_index.key_count())
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
483
        # Test that memory and disk are both used for query methods; and that
484
        # None is skipped over happily.
485
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
486
            list(builder.iter_all_entries()))
487
        # Two nodes - one memory one disk
488
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
489
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
490
        self.assertEqual(13, builder.key_count())
491
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
492
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
493
        builder.add_node(*nodes[13])
494
        builder.add_node(*nodes[14])
495
        builder.add_node(*nodes[15])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
496
        self.assertEqual(8, len(builder._backing_indices))
497
        for backing_index in builder._backing_indices:
498
            self.assertEqual(2, backing_index.key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
499
        # Now finish, and check we got a correctly ordered tree
500
        transport = self.get_transport('')
501
        size = transport.put_file('index', builder.finish())
502
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
503
        nodes = list(index.iter_all_entries())
504
        self.assertEqual(sorted(nodes), nodes)
505
        self.assertEqual(16, len(nodes))
506
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
507
    def test_set_optimize(self):
508
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
509
        builder.set_optimize(for_size=True)
510
        self.assertTrue(builder._optimize_for_size)
511
        builder.set_optimize(for_size=False)
512
        self.assertFalse(builder._optimize_for_size)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
513
        # test that we can set combine_backing_indices without effecting
514
        # _optimize_for_size
515
        obj = object()
516
        builder._optimize_for_size = obj
517
        builder.set_optimize(combine_backing_indices=False)
518
        self.assertFalse(builder._combine_backing_indices)
519
        self.assertIs(obj, builder._optimize_for_size)
520
        builder.set_optimize(combine_backing_indices=True)
521
        self.assertTrue(builder._combine_backing_indices)
522
        self.assertIs(obj, builder._optimize_for_size)
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
523
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
524
    def test_spill_index_stress_2_2(self):
525
        # test that references and longer keys don't confuse things.
526
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
527
            spill_at=2)
528
        nodes = self.make_nodes(16, 2, 2)
529
        builder.add_node(*nodes[0])
530
        # Test the parts of the index that take up memory are doing so
531
        # predictably.
532
        self.assertEqual(1, len(builder._keys))
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
533
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
534
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
535
        builder.add_node(*nodes[1])
536
        self.assertEqual(0, len(builder._keys))
537
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
538
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
539
        self.assertEqual(1, len(builder._backing_indices))
540
        self.assertEqual(2, builder._backing_indices[0].key_count())
541
        # now back to memory
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
542
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
543
        builder.add_node(*nodes[2])
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
544
        self.assertEqual(1, len(builder._nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
545
        self.assertEqual(1, len(builder._keys))
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
546
        self.assertIsNot(None, builder._nodes_by_key)
547
        self.assertNotEqual({}, builder._nodes_by_key)
548
        # We should have a new entry
549
        self.assertNotEqual(old, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
550
        # And spills to a second backing index combing all
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
551
        builder.add_node(*nodes[3])
552
        self.assertEqual(0, len(builder._nodes))
553
        self.assertEqual(0, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
554
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
555
        self.assertEqual(2, len(builder._backing_indices))
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
556
        self.assertEqual(None, builder._backing_indices[0])
557
        self.assertEqual(4, builder._backing_indices[1].key_count())
558
        # The next spills to the 2-len slot
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
559
        builder.add_node(*nodes[4])
560
        builder.add_node(*nodes[5])
561
        self.assertEqual(0, len(builder._nodes))
562
        self.assertEqual(0, len(builder._keys))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
563
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
564
        self.assertEqual(2, len(builder._backing_indices))
565
        self.assertEqual(2, builder._backing_indices[0].key_count())
566
        self.assertEqual(4, builder._backing_indices[1].key_count())
567
        # Next spill combines
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
568
        builder.add_node(*nodes[6])
569
        builder.add_node(*nodes[7])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
570
        self.assertEqual(3, len(builder._backing_indices))
571
        self.assertEqual(None, builder._backing_indices[0])
572
        self.assertEqual(None, builder._backing_indices[1])
573
        self.assertEqual(8, builder._backing_indices[2].key_count())
574
        # And so forth - counting up in binary.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
575
        builder.add_node(*nodes[8])
576
        builder.add_node(*nodes[9])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
577
        self.assertEqual(3, len(builder._backing_indices))
578
        self.assertEqual(2, builder._backing_indices[0].key_count())
579
        self.assertEqual(None, builder._backing_indices[1])
580
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
581
        builder.add_node(*nodes[10])
582
        builder.add_node(*nodes[11])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
583
        self.assertEqual(3, len(builder._backing_indices))
584
        self.assertEqual(None, builder._backing_indices[0])
585
        self.assertEqual(4, builder._backing_indices[1].key_count())
586
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
587
        builder.add_node(*nodes[12])
588
        # Test that memory and disk are both used for query methods; and that
589
        # None is skipped over happily.
590
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
591
            list(builder.iter_all_entries()))
592
        # Two nodes - one memory one disk
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
593
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
594
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
595
        self.assertEqual(13, builder.key_count())
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
596
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
597
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
598
        builder.add_node(*nodes[13])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
599
        self.assertEqual(3, len(builder._backing_indices))
600
        self.assertEqual(2, builder._backing_indices[0].key_count())
601
        self.assertEqual(4, builder._backing_indices[1].key_count())
602
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
603
        builder.add_node(*nodes[14])
604
        builder.add_node(*nodes[15])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
605
        self.assertEqual(4, len(builder._backing_indices))
606
        self.assertEqual(None, builder._backing_indices[0])
607
        self.assertEqual(None, builder._backing_indices[1])
608
        self.assertEqual(None, builder._backing_indices[2])
609
        self.assertEqual(16, builder._backing_indices[3].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
610
        # Now finish, and check we got a correctly ordered tree
611
        transport = self.get_transport('')
612
        size = transport.put_file('index', builder.finish())
613
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
614
        nodes = list(index.iter_all_entries())
615
        self.assertEqual(sorted(nodes), nodes)
616
        self.assertEqual(16, len(nodes))
617
618
    def test_spill_index_duplicate_key_caught_on_finish(self):
619
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
620
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
621
        builder.add_node(*nodes[0])
622
        builder.add_node(*nodes[1])
623
        builder.add_node(*nodes[0])
624
        self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
625
626
627
class TestBTreeIndex(BTreeTestCase):
628
629
    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
630
        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
631
            key_elements=key_elements)
632
        for key, value, references in nodes:
633
            builder.add_node(key, value, references)
634
        stream = builder.finish()
635
        trans = get_transport('trace+' + self.get_url())
636
        size = trans.put_file('index', stream)
637
        return btree_index.BTreeGraphIndex(trans, 'index', size)
638
639
    def test_trivial_constructor(self):
640
        transport = get_transport('trace+' + self.get_url(''))
641
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
642
        # Checks the page size at load, but that isn't logged yet.
643
        self.assertEqual([], transport._activity)
644
645
    def test_with_size_constructor(self):
646
        transport = get_transport('trace+' + self.get_url(''))
647
        index = btree_index.BTreeGraphIndex(transport, 'index', 1)
648
        # Checks the page size at load, but that isn't logged yet.
649
        self.assertEqual([], transport._activity)
650
651
    def test_empty_key_count_no_size(self):
652
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
653
        transport = get_transport('trace+' + self.get_url(''))
654
        transport.put_file('index', builder.finish())
655
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
656
        del transport._activity[:]
657
        self.assertEqual([], transport._activity)
658
        self.assertEqual(0, index.key_count())
659
        # The entire index should have been requested (as we generally have the
660
        # size available, and doing many small readvs is inappropriate).
661
        # We can't tell how much was actually read here, but - check the code.
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
662
        self.assertEqual([('get', 'index')], transport._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
663
664
    def test_empty_key_count(self):
665
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
666
        transport = get_transport('trace+' + self.get_url(''))
667
        size = transport.put_file('index', builder.finish())
668
        self.assertEqual(72, size)
669
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
670
        del transport._activity[:]
671
        self.assertEqual([], transport._activity)
672
        self.assertEqual(0, index.key_count())
673
        # The entire index should have been read, as 4K > size
674
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
675
            transport._activity)
676
677
    def test_non_empty_key_count_2_2(self):
678
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
679
        nodes = self.make_nodes(35, 2, 2)
680
        for node in nodes:
681
            builder.add_node(*node)
682
        transport = get_transport('trace+' + self.get_url(''))
683
        size = transport.put_file('index', builder.finish())
684
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
685
        del transport._activity[:]
686
        self.assertEqual([], transport._activity)
687
        self.assertEqual(70, index.key_count())
688
        # The entire index should have been read, as it is one page long.
689
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
690
            transport._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
691
        self.assertEqual(1199, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
692
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
693
    def test__read_nodes_no_size_one_page_reads_once(self):
694
        self.make_index(nodes=[(('key',), 'value', ())])
695
        trans = get_transport('trace+' + self.get_url())
696
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
697
        del trans._activity[:]
698
        nodes = dict(index._read_nodes([0]))
699
        self.assertEqual([0], nodes.keys())
700
        node = nodes[0]
701
        self.assertEqual([('key',)], node.keys.keys())
702
        self.assertEqual([('get', 'index')], trans._activity)
703
704
    def test__read_nodes_no_size_multiple_pages(self):
705
        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
706
        index.key_count()
707
        num_pages = index._row_offsets[-1]
708
        # Reopen with a traced transport and no size
709
        trans = get_transport('trace+' + self.get_url())
710
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
711
        del trans._activity[:]
712
        nodes = dict(index._read_nodes([0]))
713
        self.assertEqual(range(num_pages), nodes.keys())
714
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
715
    def test_2_levels_key_count_2_2(self):
716
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
717
        nodes = self.make_nodes(160, 2, 2)
718
        for node in nodes:
719
            builder.add_node(*node)
720
        transport = get_transport('trace+' + self.get_url(''))
721
        size = transport.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
722
        self.assertEqual(17692, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
723
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
724
        del transport._activity[:]
725
        self.assertEqual([], transport._activity)
726
        self.assertEqual(320, index.key_count())
727
        # The entire index should not have been read.
728
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
729
            transport._activity)
730
731
    def test_validate_one_page(self):
732
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
733
        nodes = self.make_nodes(45, 2, 2)
734
        for node in nodes:
735
            builder.add_node(*node)
736
        transport = get_transport('trace+' + self.get_url(''))
737
        size = transport.put_file('index', builder.finish())
738
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
739
        del transport._activity[:]
740
        self.assertEqual([], transport._activity)
741
        index.validate()
742
        # The entire index should have been read linearly.
743
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
744
            transport._activity)
745
        self.assertEqual(1514, size)
746
747
    def test_validate_two_pages(self):
748
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
749
        nodes = self.make_nodes(80, 2, 2)
750
        for node in nodes:
751
            builder.add_node(*node)
752
        transport = get_transport('trace+' + self.get_url(''))
753
        size = transport.put_file('index', builder.finish())
754
        # Root page, 2 leaf pages
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
755
        self.assertEqual(9339, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
756
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
757
        del transport._activity[:]
758
        self.assertEqual([], transport._activity)
759
        index.validate()
760
        # The entire index should have been read linearly.
761
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
762
            ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
763
            transport._activity)
764
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
765
        # node and make validate find them.
766
767
    def test_eq_ne(self):
768
        # two indices are equal when constructed with the same parameters:
769
        transport1 = get_transport('trace+' + self.get_url(''))
770
        transport2 = get_transport(self.get_url(''))
771
        self.assertTrue(
772
            btree_index.BTreeGraphIndex(transport1, 'index', None) ==
773
            btree_index.BTreeGraphIndex(transport1, 'index', None))
774
        self.assertTrue(
775
            btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
776
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
777
        self.assertFalse(
778
            btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
779
            btree_index.BTreeGraphIndex(transport2, 'index', 20))
780
        self.assertFalse(
781
            btree_index.BTreeGraphIndex(transport1, 'inde1', 20) ==
782
            btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
783
        self.assertFalse(
784
            btree_index.BTreeGraphIndex(transport1, 'index', 10) ==
785
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
786
        self.assertFalse(
787
            btree_index.BTreeGraphIndex(transport1, 'index', None) !=
788
            btree_index.BTreeGraphIndex(transport1, 'index', None))
789
        self.assertFalse(
790
            btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
791
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
792
        self.assertTrue(
793
            btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
794
            btree_index.BTreeGraphIndex(transport2, 'index', 20))
795
        self.assertTrue(
796
            btree_index.BTreeGraphIndex(transport1, 'inde1', 20) !=
797
            btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
798
        self.assertTrue(
799
            btree_index.BTreeGraphIndex(transport1, 'index', 10) !=
800
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
801
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
802
    def test_iter_all_only_root_no_size(self):
803
        self.make_index(nodes=[(('key',), 'value', ())])
804
        trans = get_transport('trace+' + self.get_url(''))
805
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
806
        del trans._activity[:]
807
        self.assertEqual([(('key',), 'value')],
808
                         [x[1:] for x in index.iter_all_entries()])
809
        self.assertEqual([('get', 'index')], trans._activity)
810
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
811
    def test_iter_all_entries_reads(self):
812
        # iterating all entries reads the header, then does a linear
813
        # read.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
814
        self.shrink_page_size()
3641.5.11 by John Arbash Meinel
Some small test cleanup
815
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
816
        # 20k nodes is enough to create a two internal nodes on the second
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
817
        # level, with a 2K page size
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
818
        nodes = self.make_nodes(10000, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
819
        for node in nodes:
820
            builder.add_node(*node)
821
        transport = get_transport('trace+' + self.get_url(''))
822
        size = transport.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
823
        self.assertEqual(1303220, size, 'number of expected bytes in the'
824
                                        ' output changed')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
825
        page_size = btree_index._PAGE_SIZE
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
826
        del builder
827
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
828
        del transport._activity[:]
829
        self.assertEqual([], transport._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
830
        found_nodes = self.time(list, index.iter_all_entries())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
831
        bare_nodes = []
832
        for node in found_nodes:
833
            self.assertTrue(node[0] is index)
834
            bare_nodes.append(node[1:])
835
        self.assertEqual(3, len(index._row_lengths),
836
            "Not enough rows: %r" % index._row_lengths)
837
        # Should be as long as the nodes we supplied
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
838
        self.assertEqual(20000, len(found_nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
839
        # Should have the same content
840
        self.assertEqual(set(nodes), set(bare_nodes))
841
        # Should have done linear scan IO up the index, ignoring
842
        # the internal nodes:
843
        # The entire index should have been read
844
        total_pages = sum(index._row_lengths)
845
        self.assertEqual(total_pages, index._row_offsets[-1])
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
846
        self.assertEqual(1303220, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
847
        # The start of the leaves
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
848
        first_byte = index._row_offsets[-2] * page_size
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
849
        readv_request = []
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
850
        for offset in range(first_byte, size, page_size):
851
            readv_request.append((offset, page_size))
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
852
        # The last page is truncated
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
853
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
854
        expected = [('readv', 'index', [(0, page_size)], False, None),
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
855
             ('readv',  'index', readv_request, False, None)]
856
        if expected != transport._activity:
857
            self.assertEqualDiff(pprint.pformat(expected),
858
                                 pprint.pformat(transport._activity))
859
860
    def _test_iter_entries_references_resolved(self):
861
        index = self.make_index(1, nodes=[
862
            (('name', ), 'data', ([('ref', ), ('ref', )], )),
863
            (('ref', ), 'refdata', ([], ))])
864
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
865
            (index, ('ref', ), 'refdata', ((), ))]),
866
            set(index.iter_entries([('name',), ('ref',)])))
867
868
    def test_iter_entries_references_2_refs_resolved(self):
869
        # iterating some entries reads just the pages needed. For now, to
870
        # get it working and start measuring, only 4K pages are read.
871
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
872
        # 80 nodes is enough to create a two-level index.
873
        nodes = self.make_nodes(160, 2, 2)
874
        for node in nodes:
875
            builder.add_node(*node)
876
        transport = get_transport('trace+' + self.get_url(''))
877
        size = transport.put_file('index', builder.finish())
878
        del builder
879
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
880
        del transport._activity[:]
881
        self.assertEqual([], transport._activity)
882
        # search for one key
883
        found_nodes = list(index.iter_entries([nodes[30][0]]))
884
        bare_nodes = []
885
        for node in found_nodes:
886
            self.assertTrue(node[0] is index)
887
            bare_nodes.append(node[1:])
888
        # Should be as long as the nodes we supplied
889
        self.assertEqual(1, len(found_nodes))
890
        # Should have the same content
891
        self.assertEqual(nodes[30], bare_nodes[0])
892
        # Should have read the root node, then one leaf page:
893
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
894
             ('readv',  'index', [(8192, 4096), ], False, None)],
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
895
            transport._activity)
896
897
    def test_iter_key_prefix_1_element_key_None(self):
898
        index = self.make_index()
899
        self.assertRaises(errors.BadIndexKey, list,
900
            index.iter_entries_prefix([(None, )]))
901
902
    def test_iter_key_prefix_wrong_length(self):
903
        index = self.make_index()
904
        self.assertRaises(errors.BadIndexKey, list,
905
            index.iter_entries_prefix([('foo', None)]))
906
        index = self.make_index(key_elements=2)
907
        self.assertRaises(errors.BadIndexKey, list,
908
            index.iter_entries_prefix([('foo', )]))
909
        self.assertRaises(errors.BadIndexKey, list,
910
            index.iter_entries_prefix([('foo', None, None)]))
911
912
    def test_iter_key_prefix_1_key_element_no_refs(self):
913
        index = self.make_index( nodes=[
914
            (('name', ), 'data', ()),
915
            (('ref', ), 'refdata', ())])
916
        self.assertEqual(set([(index, ('name', ), 'data'),
917
            (index, ('ref', ), 'refdata')]),
918
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
919
920
    def test_iter_key_prefix_1_key_element_refs(self):
921
        index = self.make_index(1, nodes=[
922
            (('name', ), 'data', ([('ref', )], )),
923
            (('ref', ), 'refdata', ([], ))])
924
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
925
            (index, ('ref', ), 'refdata', ((), ))]),
926
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
927
928
    def test_iter_key_prefix_2_key_element_no_refs(self):
929
        index = self.make_index(key_elements=2, nodes=[
930
            (('name', 'fin1'), 'data', ()),
931
            (('name', 'fin2'), 'beta', ()),
932
            (('ref', 'erence'), 'refdata', ())])
933
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
934
            (index, ('ref', 'erence'), 'refdata')]),
935
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
936
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
937
            (index, ('name', 'fin2'), 'beta')]),
938
            set(index.iter_entries_prefix([('name', None)])))
939
940
    def test_iter_key_prefix_2_key_element_refs(self):
941
        index = self.make_index(1, key_elements=2, nodes=[
942
            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
943
            (('name', 'fin2'), 'beta', ([], )),
944
            (('ref', 'erence'), 'refdata', ([], ))])
945
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
946
            (index, ('ref', 'erence'), 'refdata', ((), ))]),
947
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
948
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
949
            (index, ('name', 'fin2'), 'beta', ((), ))]),
950
            set(index.iter_entries_prefix([('name', None)])))
951
4011.5.5 by Andrew Bennetts
Fix typo in comment.
952
    # XXX: external_references tests are duplicated in test_index.  We
4011.5.3 by Andrew Bennetts
Implement and test external_references on GraphIndex and BTreeGraphIndex.
953
    # probably should have per_graph_index tests...
954
    def test_external_references_no_refs(self):
955
        index = self.make_index(ref_lists=0, nodes=[])
956
        self.assertRaises(ValueError, index.external_references, 0)
957
958
    def test_external_references_no_results(self):
959
        index = self.make_index(ref_lists=1, nodes=[
960
            (('key',), 'value', ([],))])
961
        self.assertEqual(set(), index.external_references(0))
962
963
    def test_external_references_missing_ref(self):
964
        missing_key = ('missing',)
965
        index = self.make_index(ref_lists=1, nodes=[
966
            (('key',), 'value', ([missing_key],))])
967
        self.assertEqual(set([missing_key]), index.external_references(0))
968
969
    def test_external_references_multiple_ref_lists(self):
970
        missing_key = ('missing',)
971
        index = self.make_index(ref_lists=2, nodes=[
972
            (('key',), 'value', ([], [missing_key]))])
973
        self.assertEqual(set([]), index.external_references(0))
974
        self.assertEqual(set([missing_key]), index.external_references(1))
975
976
    def test_external_references_two_records(self):
977
        index = self.make_index(ref_lists=1, nodes=[
978
            (('key-1',), 'value', ([('key-2',)],)),
979
            (('key-2',), 'value', ([],)),
980
            ])
981
        self.assertEqual(set([]), index.external_references(0))
982
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
983
984
class TestBTreeNodes(BTreeTestCase):
985
986
    def restore_parser(self):
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
987
        btree_index._btree_serializer = self.saved_parser
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
988
989
    def setUp(self):
990
        BTreeTestCase.setUp(self)
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
991
        self.saved_parser = btree_index._btree_serializer
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
992
        self.addCleanup(self.restore_parser)
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
993
        btree_index._btree_serializer = self.parse_btree
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
994
995
    def test_LeafNode_1_0(self):
996
        node_bytes = ("type=leaf\n"
997
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
998
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
999
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
1000
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
1001
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
1002
        node = btree_index._LeafNode(node_bytes, 1, 0)
1003
        # We do direct access, or don't care about order, to leaf nodes most of
1004
        # the time, so a dict is useful:
1005
        self.assertEqual({
1006
            ("0000000000000000000000000000000000000000",): ("value:0", ()),
1007
            ("1111111111111111111111111111111111111111",): ("value:1", ()),
1008
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
1009
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
1010
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
1011
            }, node.keys)
1012
1013
    def test_LeafNode_2_2(self):
1014
        node_bytes = ("type=leaf\n"
1015
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1016
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1017
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1018
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1019
            ""
1020
            )
1021
        node = btree_index._LeafNode(node_bytes, 2, 2)
1022
        # We do direct access, or don't care about order, to leaf nodes most of
1023
        # the time, so a dict is useful:
1024
        self.assertEqual({
1025
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1026
            ('00', '11'): ('value:1',
1027
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1028
            ('11', '33'): ('value:3',
1029
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1030
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1031
            }, node.keys)
1032
1033
    def test_InternalNode_1(self):
1034
        node_bytes = ("type=internal\n"
1035
            "offset=1\n"
1036
            "0000000000000000000000000000000000000000\n"
1037
            "1111111111111111111111111111111111111111\n"
1038
            "2222222222222222222222222222222222222222\n"
1039
            "3333333333333333333333333333333333333333\n"
1040
            "4444444444444444444444444444444444444444\n"
1041
            )
1042
        node = btree_index._InternalNode(node_bytes)
1043
        # We want to bisect to find the right children from this node, so a
1044
        # vector is most useful.
1045
        self.assertEqual([
1046
            ("0000000000000000000000000000000000000000",),
1047
            ("1111111111111111111111111111111111111111",),
1048
            ("2222222222222222222222222222222222222222",),
1049
            ("3333333333333333333333333333333333333333",),
1050
            ("4444444444444444444444444444444444444444",),
1051
            ], node.keys)
1052
        self.assertEqual(1, node.offset)
1053
1054
    def test_LeafNode_2_2(self):
1055
        node_bytes = ("type=leaf\n"
1056
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1057
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1058
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1059
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1060
            ""
1061
            )
1062
        node = btree_index._LeafNode(node_bytes, 2, 2)
1063
        # We do direct access, or don't care about order, to leaf nodes most of
1064
        # the time, so a dict is useful:
1065
        self.assertEqual({
1066
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1067
            ('00', '11'): ('value:1',
1068
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1069
            ('11', '33'): ('value:3',
1070
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1071
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1072
            }, node.keys)
1073
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1074
    def assertFlattened(self, expected, key, value, refs):
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1075
        flat_key, flat_line = self.parse_btree._flatten_node(
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1076
            (None, key, value, refs), bool(refs))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1077
        self.assertEqual('\x00'.join(key), flat_key)
1078
        self.assertEqual(expected, flat_line)
1079
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1080
    def test__flatten_node(self):
1081
        self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1082
        self.assertFlattened('key\0tuple\0\0value str\n',
1083
                             ('key', 'tuple'), 'value str', [])
1084
        self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1085
                             ('key', 'tuple', 'triple'), 'value str', [])
1086
        self.assertFlattened('k\0t\0s\0ref\0value str\n',
1087
                             ('k', 't', 's'), 'value str', [[('ref',)]])
1088
        self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1089
                             ('key', 'tuple'), 'value str', [[('ref', 'key')]])
1090
        self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1091
            ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1092
        self.assertFlattened(
1093
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1094
            ('00', '11'), 'value:1',
1095
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1096
        self.assertFlattened(
1097
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1098
            ('11', '33'), 'value:3',
1099
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1100
        self.assertFlattened(
1101
            "11\x0044\x00\t11\x00ref00\x00value:4\n",
1102
            ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1103
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1104
1105
class TestCompiledBtree(tests.TestCase):
1106
1107
    def test_exists(self):
1108
        # This is just to let the user know if they don't have the feature
1109
        # available
1110
        self.requireFeature(CompiledBtreeParserFeature)
1111
1112
1113
class TestMultiBisectRight(tests.TestCase):
1114
1115
    def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1116
        self.assertEqual(offsets,
1117
                         btree_index.BTreeGraphIndex._multi_bisect_right(
1118
                            search_keys, fixed_keys))
1119
1120
    def test_after(self):
1121
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1122
        self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1123
                                    ['e', 'f', 'g'], ['a', 'b', 'c'])
1124
1125
    def test_before(self):
1126
        self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1127
        self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1128
                                    ['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1129
1130
    def test_exact(self):
1131
        self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1132
        self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1133
        self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1134
                                    ['a', 'c'], ['a', 'b', 'c'])
1135
1136
    def test_inbetween(self):
1137
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1138
        self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1139
                                    ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1140
1141
    def test_mixed(self):
1142
        self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1143
                                     (4, ['g', 'h'])],
1144
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1145
                                    ['c', 'd', 'f', 'g'])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1146
1147
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1148
class TestExpandOffsets(tests.TestCase):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1149
1150
    def make_index(self, size, recommended_pages=None):
1151
        """Make an index with a generic size.
1152
1153
        This doesn't actually create anything on disk, it just primes a
1154
        BTreeGraphIndex with the recommended information.
1155
        """
1156
        index = btree_index.BTreeGraphIndex(get_transport('memory:///'),
1157
                                            'test-index', size=size)
1158
        if recommended_pages is not None:
1159
            index._recommended_pages = recommended_pages
1160
        return index
1161
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1162
    def set_cached_offsets(self, index, cached_offsets):
1163
        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1164
        def _get_offsets_to_cached_pages():
1165
            cached = set(cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1166
            return cached
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1167
        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1168
1169
    def prepare_index(self, index, node_ref_lists, key_length, key_count,
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1170
                      row_lengths, cached_offsets):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1171
        """Setup the BTreeGraphIndex with some pre-canned information."""
1172
        index.node_ref_lists = node_ref_lists
1173
        index._key_length = key_length
1174
        index._key_count = key_count
1175
        index._row_lengths = row_lengths
1176
        index._compute_row_offsets()
1177
        index._root_node = btree_index._InternalNode('internal\noffset=0\n')
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1178
        self.set_cached_offsets(index, cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1179
1180
    def make_100_node_index(self):
1181
        index = self.make_index(4096*100, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1182
        # Consider we've already made a single request at the middle
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1183
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1184
                           key_count=1000, row_lengths=[1, 99],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1185
                           cached_offsets=[0, 50])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1186
        return index
1187
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1188
    def make_1000_node_index(self):
1189
        index = self.make_index(4096*1000, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1190
        # Pretend we've already made a single request in the middle
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1191
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1192
                           key_count=90000, row_lengths=[1, 9, 990],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1193
                           cached_offsets=[0, 5, 500])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1194
        return index
1195
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1196
    def assertNumPages(self, expected_pages, index, size):
1197
        index._size = size
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1198
        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1199
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1200
    def assertExpandOffsets(self, expected, index, offsets):
1201
        self.assertEqual(expected, index._expand_offsets(offsets),
1202
                         'We did not get the expected value after expanding'
1203
                         ' %s' % (offsets,))
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1204
1205
    def test_default_recommended_pages(self):
1206
        index = self.make_index(None)
1207
        # local transport recommends 4096 byte reads, which is 1 page
1208
        self.assertEqual(1, index._recommended_pages)
1209
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1210
    def test__compute_total_pages_in_index(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1211
        index = self.make_index(None)
1212
        self.assertNumPages(1, index, 1024)
1213
        self.assertNumPages(1, index, 4095)
1214
        self.assertNumPages(1, index, 4096)
1215
        self.assertNumPages(2, index, 4097)
1216
        self.assertNumPages(2, index, 8192)
1217
        self.assertNumPages(76, index, 4096*75 + 10)
1218
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1219
    def test__find_layer_start_and_stop(self):
1220
        index = self.make_1000_node_index()
1221
        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1222
        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1223
        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1224
        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1225
        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1226
        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1227
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1228
    def test_unknown_size(self):
1229
        # We should not expand if we don't know the file size
1230
        index = self.make_index(None, 10)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1231
        self.assertExpandOffsets([0], index, [0])
1232
        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1233
1234
    def test_more_than_recommended(self):
1235
        index = self.make_index(4096*100, 2)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1236
        self.assertExpandOffsets([1, 10], index, [1, 10])
1237
        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1238
1239
    def test_read_all_from_root(self):
1240
        index = self.make_index(4096*10, 20)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1241
        self.assertExpandOffsets(range(10), index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1242
1243
    def test_read_all_when_cached(self):
1244
        # We've read enough that we can grab all the rest in a single request
1245
        index = self.make_index(4096*10, 5)
1246
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1247
                           key_count=1000, row_lengths=[1, 9],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1248
                           cached_offsets=[0, 1, 2, 5, 6])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1249
        # It should fill the remaining nodes, regardless of the one requested
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1250
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1251
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1252
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1253
1254
    def test_no_root_node(self):
1255
        index = self.make_index(4096*10, 5)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1256
        self.assertExpandOffsets([0], index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1257
1258
    def test_include_neighbors(self):
1259
        index = self.make_100_node_index()
1260
        # We expand in both directions, until we have at least 'recommended'
1261
        # pages
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1262
        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1263
        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1264
        # If we hit an 'edge' we continue in the other direction
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1265
        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1266
        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1267
1268
        # Requesting many nodes will expand all locations equally
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1269
        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1270
        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1271
                               [2, 10, 81])
1272
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1273
    def test_stop_at_cached(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1274
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1275
        self.set_cached_offsets(index, [0, 10, 19])
1276
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1277
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1278
        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1279
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1280
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1281
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1282
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1283
    def test_cannot_fully_expand(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1284
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1285
        self.set_cached_offsets(index, [0, 10, 12])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1286
        # We don't go into an endless loop if we are bound by cached nodes
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1287
        self.assertExpandOffsets([11], index, [11])
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1288
1289
    def test_overlap(self):
1290
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1291
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1292
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1293
1294
    def test_stay_within_layer(self):
1295
        index = self.make_1000_node_index()
1296
        # When expanding a request, we won't read nodes from the next layer
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1297
        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1298
        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1299
        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1300
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1301
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1302
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1303
        self.set_cached_offsets(index, [0, 4, 12])
1304
        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1305
        self.assertExpandOffsets([10, 11], index, [11])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1306
1307
    def test_small_requests_unexpanded(self):
1308
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1309
        self.set_cached_offsets(index, [0])
1310
        self.assertExpandOffsets([1], index, [1])
1311
        self.assertExpandOffsets([50], index, [50])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1312
        # If we request more than one node, then we'll expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1313
        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1314
1315
        # The first pass does not expand
1316
        index = self.make_1000_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1317
        self.set_cached_offsets(index, [0])
1318
        self.assertExpandOffsets([1], index, [1])
1319
        self.set_cached_offsets(index, [0, 1])
1320
        self.assertExpandOffsets([100], index, [100])
1321
        self.set_cached_offsets(index, [0, 1, 100])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1322
        # But after the first depth, we will expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1323
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1324
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1325
        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1326
        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1327
                                 [105])