~bzr-pqm/bzr/bzr.dev

5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
2
#
3
# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
16
#
17
18
"""Tests for btree indices."""
19
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
20
import pprint
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
21
import zlib
22
23
from bzrlib import (
24
    btree_index,
25
    errors,
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
26
    fifo_cache,
27
    lru_cache,
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
28
    osutils,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
29
    tests,
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
30
    transport,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
31
    )
32
from bzrlib.tests import (
33
    TestCaseWithTransport,
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
34
    scenarios,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
35
    )
36
37
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
38
load_tests = scenarios.load_tests_apply_scenarios
39
40
41
def btreeparser_scenarios():
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
42
    import bzrlib._btree_serializer_py as py_module
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
43
    scenarios = [('python', {'parse_btree': py_module})]
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
44
    if compiled_btreeparser_feature.available():
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
45
        scenarios.append(('C', 
46
            {'parse_btree': compiled_btreeparser_feature.module}))
47
    return scenarios
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
48
49
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
50
compiled_btreeparser_feature = tests.ModuleAvailableFeature(
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
51
    'bzrlib._btree_serializer_pyx')
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
52
53
54
class BTreeTestCase(TestCaseWithTransport):
55
    # test names here are suffixed by the key length and reference list count
56
    # that they test.
57
58
    def setUp(self):
59
        TestCaseWithTransport.setUp(self)
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
60
        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
61
62
    def make_nodes(self, count, key_elements, reference_lists):
63
        """Generate count*key_elements sample nodes."""
64
        keys = []
65
        for prefix_pos in range(key_elements):
66
            if key_elements - 1:
67
                prefix = (str(prefix_pos) * 40,)
68
            else:
69
                prefix = ()
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
70
            for pos in xrange(count):
71
                # TODO: This creates odd keys. When count == 100,000, it
72
                #       creates a 240 byte key
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
73
                key = prefix + (str(pos) * 40,)
74
                value = "value:%s" % pos
75
                if reference_lists:
76
                    # generate some references
77
                    refs = []
78
                    for list_pos in range(reference_lists):
79
                        # as many keys in each list as its index + the key depth
80
                        # mod 2 - this generates both 0 length lists and
81
                        # ones slightly longer than the number of lists.
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
82
                        # It also ensures we have non homogeneous lists.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
83
                        refs.append([])
84
                        for ref_pos in range(list_pos + pos % 2):
85
                            if pos % 2:
86
                                # refer to a nearby key
87
                                refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
88
                            else:
89
                                # serial of this ref in the ref list
90
                                refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
91
                        refs[-1] = tuple(refs[-1])
92
                    refs = tuple(refs)
93
                else:
94
                    refs = ()
95
                keys.append((key, value, refs))
96
        return keys
97
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
98
    def shrink_page_size(self):
99
        """Shrink the default page size so that less fits in a page."""
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
100
        self.overrideAttr(btree_index, '_PAGE_SIZE')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
101
        btree_index._PAGE_SIZE = 2048
102
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
103
104
class TestBTreeBuilder(BTreeTestCase):
105
4744.2.7 by John Arbash Meinel
Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder.
106
    def test_clear_cache(self):
107
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
108
        # This is a no-op, but we need the api to be consistent with other
109
        # BTreeGraphIndex apis.
110
        builder.clear_cache()
111
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
112
    def test_empty_1_0(self):
113
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
114
        # NamedTemporaryFile dies on builder.finish().read(). weird.
115
        temp_file = builder.finish()
116
        content = temp_file.read()
117
        del temp_file
118
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
119
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
120
            "row_lengths=\n",
121
            content)
122
123
    def test_empty_2_1(self):
124
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
125
        # NamedTemporaryFile dies on builder.finish().read(). weird.
126
        temp_file = builder.finish()
127
        content = temp_file.read()
128
        del temp_file
129
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
130
            "B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
131
            "row_lengths=\n",
132
            content)
133
134
    def test_root_leaf_1_0(self):
135
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
136
        nodes = self.make_nodes(5, 1, 0)
137
        for node in nodes:
138
            builder.add_node(*node)
139
        # NamedTemporaryFile dies on builder.finish().read(). weird.
140
        temp_file = builder.finish()
141
        content = temp_file.read()
142
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
143
        self.assertEqual(131, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
144
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
145
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
146
            "row_lengths=1\n",
147
            content[:73])
148
        node_content = content[73:]
149
        node_bytes = zlib.decompress(node_content)
150
        expected_node = ("type=leaf\n"
151
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
152
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
153
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
154
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
155
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
156
        self.assertEqual(expected_node, node_bytes)
157
158
    def test_root_leaf_2_2(self):
159
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
160
        nodes = self.make_nodes(5, 2, 2)
161
        for node in nodes:
162
            builder.add_node(*node)
163
        # NamedTemporaryFile dies on builder.finish().read(). weird.
164
        temp_file = builder.finish()
165
        content = temp_file.read()
166
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
167
        self.assertEqual(238, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
168
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
169
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
170
            "row_lengths=1\n",
171
            content[:74])
172
        node_content = content[74:]
173
        node_bytes = zlib.decompress(node_content)
174
        expected_node = (
175
            "type=leaf\n"
176
            "0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
177
            "0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
178
            "0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
179
            "0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
180
            "0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
181
            "1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
182
            "1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
183
            "1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
184
            "1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
185
            "1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
186
            ""
187
            )
188
        self.assertEqual(expected_node, node_bytes)
189
190
    def test_2_leaves_1_0(self):
191
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
192
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
193
        for node in nodes:
194
            builder.add_node(*node)
195
        # NamedTemporaryFile dies on builder.finish().read(). weird.
196
        temp_file = builder.finish()
197
        content = temp_file.read()
198
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
199
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
200
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
201
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
202
            "row_lengths=1,2\n",
203
            content[:77])
204
        root = content[77:4096]
205
        leaf1 = content[4096:8192]
206
        leaf2 = content[8192:]
207
        root_bytes = zlib.decompress(root)
208
        expected_root = (
209
            "type=internal\n"
210
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
211
            ) + ("307" * 40) + "\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
212
        self.assertEqual(expected_root, root_bytes)
213
        # We already know serialisation works for leaves, check key selection:
214
        leaf1_bytes = zlib.decompress(leaf1)
215
        sorted_node_keys = sorted(node[0] for node in nodes)
216
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
217
        self.assertEqual(231, len(node))
218
        self.assertEqual(sorted_node_keys[:231], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
219
        leaf2_bytes = zlib.decompress(leaf2)
220
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
221
        self.assertEqual(400 - 231, len(node))
222
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
223
224
    def test_last_page_rounded_1_layer(self):
225
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
226
        nodes = self.make_nodes(10, 1, 0)
227
        for node in nodes:
228
            builder.add_node(*node)
229
        # NamedTemporaryFile dies on builder.finish().read(). weird.
230
        temp_file = builder.finish()
231
        content = temp_file.read()
232
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
233
        self.assertEqual(155, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
234
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
235
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
236
            "row_lengths=1\n",
237
            content[:74])
238
        # Check thelast page is well formed
239
        leaf2 = content[74:]
240
        leaf2_bytes = zlib.decompress(leaf2)
241
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
242
        self.assertEqual(10, len(node))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
243
        sorted_node_keys = sorted(node[0] for node in nodes)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
244
        self.assertEqual(sorted_node_keys, node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
245
246
    def test_last_page_not_rounded_2_layer(self):
247
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
248
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
249
        for node in nodes:
250
            builder.add_node(*node)
251
        # NamedTemporaryFile dies on builder.finish().read(). weird.
252
        temp_file = builder.finish()
253
        content = temp_file.read()
254
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
255
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
256
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
257
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
258
            "row_lengths=1,2\n",
259
            content[:77])
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
260
        # Check the last page is well formed
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
261
        leaf2 = content[8192:]
262
        leaf2_bytes = zlib.decompress(leaf2)
263
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
264
        self.assertEqual(400 - 231, len(node))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
265
        sorted_node_keys = sorted(node[0] for node in nodes)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
266
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
267
268
    def test_three_level_tree_details(self):
269
        # The left most pointer in the second internal node in a row should
270
        # pointer to the second node that the internal node is for, _not_
271
        # the first, otherwise the first node overlaps with the last node of
272
        # the prior internal node on that row.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
273
        self.shrink_page_size()
274
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
275
        # 40K nodes is enough to create a two internal nodes on the second
276
        # level, with a 2K page size
277
        nodes = self.make_nodes(20000, 2, 2)
3641.3.5 by John Arbash Meinel
For iter_all and three_level tests adjust spill-at.
278
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
279
        for node in nodes:
280
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
281
        t = transport.get_transport('trace+' + self.get_url(''))
282
        size = t.put_file('index', self.time(builder.finish))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
283
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
284
        index = btree_index.BTreeGraphIndex(t, 'index', size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
285
        # Seed the metadata, we're using internal calls now.
286
        index.key_count()
287
        self.assertEqual(3, len(index._row_lengths),
288
            "Not enough rows: %r" % index._row_lengths)
289
        self.assertEqual(4, len(index._row_offsets))
290
        self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
291
        internal_nodes = index._get_internal_nodes([0, 1, 2])
292
        root_node = internal_nodes[0]
293
        internal_node1 = internal_nodes[1]
294
        internal_node2 = internal_nodes[2]
295
        # The left most node node2 points at should be one after the right most
296
        # node pointed at by node1.
297
        self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
298
        # The left most key of the second node pointed at by internal_node2
299
        # should be its first key. We can check this by looking for its first key
300
        # in the second node it points at
301
        pos = index._row_offsets[2] + internal_node2.offset + 1
302
        leaf = index._get_leaf_nodes([pos])[pos]
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
303
        self.assertTrue(internal_node2.keys[0] in leaf)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
304
305
    def test_2_leaves_2_2(self):
306
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
307
        nodes = self.make_nodes(100, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
308
        for node in nodes:
309
            builder.add_node(*node)
310
        # NamedTemporaryFile dies on builder.finish().read(). weird.
311
        temp_file = builder.finish()
312
        content = temp_file.read()
313
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
314
        self.assertEqual(12643, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
315
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
316
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
317
            "row_lengths=1,3\n",
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
318
            content[:77])
319
        root = content[77:4096]
320
        leaf1 = content[4096:8192]
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
321
        leaf2 = content[8192:12288]
322
        leaf3 = content[12288:]
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
323
        root_bytes = zlib.decompress(root)
324
        expected_root = (
325
            "type=internal\n"
326
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
327
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
328
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
329
            )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
330
        self.assertEqual(expected_root, root_bytes)
331
        # We assume the other leaf nodes have been written correctly - layering
332
        # FTW.
333
334
    def test_spill_index_stress_1_1(self):
335
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
336
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
337
        builder.add_node(*nodes[0])
338
        # Test the parts of the index that take up memory are doing so
339
        # predictably.
340
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
341
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
342
        builder.add_node(*nodes[1])
343
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
344
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
345
        self.assertEqual(1, len(builder._backing_indices))
346
        self.assertEqual(2, builder._backing_indices[0].key_count())
347
        # now back to memory
348
        builder.add_node(*nodes[2])
349
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
350
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
351
        # And spills to a second backing index combing all
352
        builder.add_node(*nodes[3])
353
        self.assertEqual(0, len(builder._nodes))
354
        self.assertIs(None, builder._nodes_by_key)
355
        self.assertEqual(2, len(builder._backing_indices))
356
        self.assertEqual(None, builder._backing_indices[0])
357
        self.assertEqual(4, builder._backing_indices[1].key_count())
358
        # The next spills to the 2-len slot
359
        builder.add_node(*nodes[4])
360
        builder.add_node(*nodes[5])
361
        self.assertEqual(0, len(builder._nodes))
362
        self.assertIs(None, builder._nodes_by_key)
363
        self.assertEqual(2, len(builder._backing_indices))
364
        self.assertEqual(2, builder._backing_indices[0].key_count())
365
        self.assertEqual(4, builder._backing_indices[1].key_count())
366
        # Next spill combines
367
        builder.add_node(*nodes[6])
368
        builder.add_node(*nodes[7])
369
        self.assertEqual(3, len(builder._backing_indices))
370
        self.assertEqual(None, builder._backing_indices[0])
371
        self.assertEqual(None, builder._backing_indices[1])
372
        self.assertEqual(8, builder._backing_indices[2].key_count())
373
        # And so forth - counting up in binary.
374
        builder.add_node(*nodes[8])
375
        builder.add_node(*nodes[9])
376
        self.assertEqual(3, len(builder._backing_indices))
377
        self.assertEqual(2, builder._backing_indices[0].key_count())
378
        self.assertEqual(None, builder._backing_indices[1])
379
        self.assertEqual(8, builder._backing_indices[2].key_count())
380
        builder.add_node(*nodes[10])
381
        builder.add_node(*nodes[11])
382
        self.assertEqual(3, len(builder._backing_indices))
383
        self.assertEqual(None, builder._backing_indices[0])
384
        self.assertEqual(4, builder._backing_indices[1].key_count())
385
        self.assertEqual(8, builder._backing_indices[2].key_count())
386
        builder.add_node(*nodes[12])
387
        # Test that memory and disk are both used for query methods; and that
388
        # None is skipped over happily.
389
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
390
            list(builder.iter_all_entries()))
391
        # Two nodes - one memory one disk
392
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
393
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
394
        self.assertEqual(13, builder.key_count())
395
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
396
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
397
        builder.add_node(*nodes[13])
398
        self.assertEqual(3, len(builder._backing_indices))
399
        self.assertEqual(2, builder._backing_indices[0].key_count())
400
        self.assertEqual(4, builder._backing_indices[1].key_count())
401
        self.assertEqual(8, builder._backing_indices[2].key_count())
402
        builder.add_node(*nodes[14])
403
        builder.add_node(*nodes[15])
404
        self.assertEqual(4, len(builder._backing_indices))
405
        self.assertEqual(None, builder._backing_indices[0])
406
        self.assertEqual(None, builder._backing_indices[1])
407
        self.assertEqual(None, builder._backing_indices[2])
408
        self.assertEqual(16, builder._backing_indices[3].key_count())
409
        # Now finish, and check we got a correctly ordered tree
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
410
        t = self.get_transport('')
411
        size = t.put_file('index', builder.finish())
412
        index = btree_index.BTreeGraphIndex(t, 'index', size)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
413
        nodes = list(index.iter_all_entries())
414
        self.assertEqual(sorted(nodes), nodes)
415
        self.assertEqual(16, len(nodes))
416
417
    def test_spill_index_stress_1_1_no_combine(self):
418
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
419
        builder.set_optimize(for_size=False, combine_backing_indices=False)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
420
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
421
        builder.add_node(*nodes[0])
422
        # Test the parts of the index that take up memory are doing so
423
        # predictably.
424
        self.assertEqual(1, len(builder._nodes))
425
        self.assertIs(None, builder._nodes_by_key)
426
        builder.add_node(*nodes[1])
427
        self.assertEqual(0, len(builder._nodes))
428
        self.assertIs(None, builder._nodes_by_key)
429
        self.assertEqual(1, len(builder._backing_indices))
430
        self.assertEqual(2, builder._backing_indices[0].key_count())
431
        # now back to memory
432
        builder.add_node(*nodes[2])
433
        self.assertEqual(1, len(builder._nodes))
434
        self.assertIs(None, builder._nodes_by_key)
435
        # And spills to a second backing index but doesn't combine
436
        builder.add_node(*nodes[3])
437
        self.assertEqual(0, len(builder._nodes))
438
        self.assertIs(None, builder._nodes_by_key)
439
        self.assertEqual(2, len(builder._backing_indices))
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
440
        for backing_index in builder._backing_indices:
441
            self.assertEqual(2, backing_index.key_count())
442
        # The next spills to the 3rd slot
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
443
        builder.add_node(*nodes[4])
444
        builder.add_node(*nodes[5])
445
        self.assertEqual(0, len(builder._nodes))
446
        self.assertIs(None, builder._nodes_by_key)
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
447
        self.assertEqual(3, len(builder._backing_indices))
448
        for backing_index in builder._backing_indices:
449
            self.assertEqual(2, backing_index.key_count())
450
        # Now spill a few more, and check that we don't combine
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
451
        builder.add_node(*nodes[6])
452
        builder.add_node(*nodes[7])
453
        builder.add_node(*nodes[8])
454
        builder.add_node(*nodes[9])
455
        builder.add_node(*nodes[10])
456
        builder.add_node(*nodes[11])
457
        builder.add_node(*nodes[12])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
458
        self.assertEqual(6, len(builder._backing_indices))
459
        for backing_index in builder._backing_indices:
460
            self.assertEqual(2, backing_index.key_count())
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
461
        # Test that memory and disk are both used for query methods; and that
462
        # None is skipped over happily.
463
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
464
            list(builder.iter_all_entries()))
465
        # Two nodes - one memory one disk
466
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
467
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
468
        self.assertEqual(13, builder.key_count())
469
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
470
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
471
        builder.add_node(*nodes[13])
472
        builder.add_node(*nodes[14])
473
        builder.add_node(*nodes[15])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
474
        self.assertEqual(8, len(builder._backing_indices))
475
        for backing_index in builder._backing_indices:
476
            self.assertEqual(2, backing_index.key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
477
        # Now finish, and check we got a correctly ordered tree
478
        transport = self.get_transport('')
479
        size = transport.put_file('index', builder.finish())
480
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
481
        nodes = list(index.iter_all_entries())
482
        self.assertEqual(sorted(nodes), nodes)
483
        self.assertEqual(16, len(nodes))
484
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
485
    def test_set_optimize(self):
486
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
487
        builder.set_optimize(for_size=True)
488
        self.assertTrue(builder._optimize_for_size)
489
        builder.set_optimize(for_size=False)
490
        self.assertFalse(builder._optimize_for_size)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
491
        # test that we can set combine_backing_indices without effecting
492
        # _optimize_for_size
493
        obj = object()
494
        builder._optimize_for_size = obj
495
        builder.set_optimize(combine_backing_indices=False)
496
        self.assertFalse(builder._combine_backing_indices)
497
        self.assertIs(obj, builder._optimize_for_size)
498
        builder.set_optimize(combine_backing_indices=True)
499
        self.assertTrue(builder._combine_backing_indices)
500
        self.assertIs(obj, builder._optimize_for_size)
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
501
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
502
    def test_spill_index_stress_2_2(self):
503
        # test that references and longer keys don't confuse things.
504
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
505
            spill_at=2)
506
        nodes = self.make_nodes(16, 2, 2)
507
        builder.add_node(*nodes[0])
508
        # Test the parts of the index that take up memory are doing so
509
        # predictably.
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
510
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
511
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
512
        builder.add_node(*nodes[1])
513
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
514
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
515
        self.assertEqual(1, len(builder._backing_indices))
516
        self.assertEqual(2, builder._backing_indices[0].key_count())
517
        # now back to memory
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
518
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
519
        builder.add_node(*nodes[2])
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
520
        self.assertEqual(1, len(builder._nodes))
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
521
        self.assertIsNot(None, builder._nodes_by_key)
522
        self.assertNotEqual({}, builder._nodes_by_key)
523
        # We should have a new entry
524
        self.assertNotEqual(old, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
525
        # And spills to a second backing index combing all
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
526
        builder.add_node(*nodes[3])
527
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
528
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
529
        self.assertEqual(2, len(builder._backing_indices))
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
530
        self.assertEqual(None, builder._backing_indices[0])
531
        self.assertEqual(4, builder._backing_indices[1].key_count())
532
        # The next spills to the 2-len slot
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
533
        builder.add_node(*nodes[4])
534
        builder.add_node(*nodes[5])
535
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
536
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
537
        self.assertEqual(2, len(builder._backing_indices))
538
        self.assertEqual(2, builder._backing_indices[0].key_count())
539
        self.assertEqual(4, builder._backing_indices[1].key_count())
540
        # Next spill combines
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
541
        builder.add_node(*nodes[6])
542
        builder.add_node(*nodes[7])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
543
        self.assertEqual(3, len(builder._backing_indices))
544
        self.assertEqual(None, builder._backing_indices[0])
545
        self.assertEqual(None, builder._backing_indices[1])
546
        self.assertEqual(8, builder._backing_indices[2].key_count())
547
        # And so forth - counting up in binary.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
548
        builder.add_node(*nodes[8])
549
        builder.add_node(*nodes[9])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
550
        self.assertEqual(3, len(builder._backing_indices))
551
        self.assertEqual(2, builder._backing_indices[0].key_count())
552
        self.assertEqual(None, builder._backing_indices[1])
553
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
554
        builder.add_node(*nodes[10])
555
        builder.add_node(*nodes[11])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
556
        self.assertEqual(3, len(builder._backing_indices))
557
        self.assertEqual(None, builder._backing_indices[0])
558
        self.assertEqual(4, builder._backing_indices[1].key_count())
559
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
560
        builder.add_node(*nodes[12])
561
        # Test that memory and disk are both used for query methods; and that
562
        # None is skipped over happily.
563
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
564
            list(builder.iter_all_entries()))
565
        # Two nodes - one memory one disk
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
566
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
567
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
568
        self.assertEqual(13, builder.key_count())
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
569
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
570
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
571
        builder.add_node(*nodes[13])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
572
        self.assertEqual(3, len(builder._backing_indices))
573
        self.assertEqual(2, builder._backing_indices[0].key_count())
574
        self.assertEqual(4, builder._backing_indices[1].key_count())
575
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
576
        builder.add_node(*nodes[14])
577
        builder.add_node(*nodes[15])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
578
        self.assertEqual(4, len(builder._backing_indices))
579
        self.assertEqual(None, builder._backing_indices[0])
580
        self.assertEqual(None, builder._backing_indices[1])
581
        self.assertEqual(None, builder._backing_indices[2])
582
        self.assertEqual(16, builder._backing_indices[3].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
583
        # Now finish, and check we got a correctly ordered tree
584
        transport = self.get_transport('')
585
        size = transport.put_file('index', builder.finish())
586
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
587
        nodes = list(index.iter_all_entries())
588
        self.assertEqual(sorted(nodes), nodes)
589
        self.assertEqual(16, len(nodes))
590
591
    def test_spill_index_duplicate_key_caught_on_finish(self):
592
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
593
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
594
        builder.add_node(*nodes[0])
595
        builder.add_node(*nodes[1])
596
        builder.add_node(*nodes[0])
597
        self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
598
599
600
class TestBTreeIndex(BTreeTestCase):
601
602
    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
603
        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
604
            key_elements=key_elements)
605
        for key, value, references in nodes:
606
            builder.add_node(key, value, references)
607
        stream = builder.finish()
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
608
        trans = transport.get_transport('trace+' + self.get_url())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
609
        size = trans.put_file('index', stream)
610
        return btree_index.BTreeGraphIndex(trans, 'index', size)
611
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
612
    def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
613
                               offset=0):
614
        builder = btree_index.BTreeBuilder(key_elements=key_elements,
615
                                           reference_lists=ref_lists)
616
        builder.add_nodes(nodes)
617
        transport = self.get_transport('')
5074.4.7 by John Arbash Meinel
Workaround a strange bug about the file closing before it is read.
618
        # NamedTemporaryFile dies on builder.finish().read(). weird.
619
        temp_file = builder.finish()
620
        content = temp_file.read()
621
        del temp_file
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
622
        size = len(content)
623
        transport.put_bytes('index', (' '*offset)+content)
624
        return btree_index.BTreeGraphIndex(transport, 'index', size=size,
625
                                           offset=offset)
626
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
627
    def test_clear_cache(self):
628
        nodes = self.make_nodes(160, 2, 2)
629
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
630
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
631
        self.assertEqual([1, 4], index._row_lengths)
632
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
633
        internal_node_pre_clear = index._internal_node_cache.keys()
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
634
        self.assertTrue(len(index._leaf_node_cache) > 0)
635
        index.clear_cache()
636
        # We don't touch _root_node or _internal_node_cache, both should be
637
        # small, and can save a round trip or two
638
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
639
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
640
        #       it will be small, and if we ever do touch this index again, it
641
        #       will save round-trips.  This assertion isn't very strong,
642
        #       becuase without a 3-level index, we don't have any internal
643
        #       nodes cached.
644
        self.assertEqual(internal_node_pre_clear,
645
                         index._internal_node_cache.keys())
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
646
        self.assertEqual(0, len(index._leaf_node_cache))
647
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
648
    def test_trivial_constructor(self):
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
649
        t = transport.get_transport('trace+' + self.get_url(''))
650
        index = btree_index.BTreeGraphIndex(t, 'index', None)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
651
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
652
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
653
654
    def test_with_size_constructor(self):
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
655
        t = transport.get_transport('trace+' + self.get_url(''))
656
        index = btree_index.BTreeGraphIndex(t, 'index', 1)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
657
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
658
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
659
660
    def test_empty_key_count_no_size(self):
661
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
662
        t = transport.get_transport('trace+' + self.get_url(''))
663
        t.put_file('index', builder.finish())
664
        index = btree_index.BTreeGraphIndex(t, 'index', None)
665
        del t._activity[:]
666
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
667
        self.assertEqual(0, index.key_count())
668
        # The entire index should have been requested (as we generally have the
669
        # size available, and doing many small readvs is inappropriate).
670
        # We can't tell how much was actually read here, but - check the code.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
671
        self.assertEqual([('get', 'index')], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
672
673
    def test_empty_key_count(self):
674
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
675
        t = transport.get_transport('trace+' + self.get_url(''))
676
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
677
        self.assertEqual(72, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
678
        index = btree_index.BTreeGraphIndex(t, 'index', size)
679
        del t._activity[:]
680
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
681
        self.assertEqual(0, index.key_count())
682
        # The entire index should have been read, as 4K > size
683
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
684
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
685
686
    def test_non_empty_key_count_2_2(self):
687
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
688
        nodes = self.make_nodes(35, 2, 2)
689
        for node in nodes:
690
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
691
        t = transport.get_transport('trace+' + self.get_url(''))
692
        size = t.put_file('index', builder.finish())
693
        index = btree_index.BTreeGraphIndex(t, 'index', size)
694
        del t._activity[:]
695
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
696
        self.assertEqual(70, index.key_count())
697
        # The entire index should have been read, as it is one page long.
698
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
699
            t._activity)
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
700
        self.assertEqual(1173, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
701
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
702
    def test_with_offset_no_size(self):
703
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
704
                                            offset=1234,
705
                                            nodes=self.make_nodes(200, 1, 1))
706
        index._size = None # throw away the size info
707
        self.assertEqual(200, index.key_count())
708
709
    def test_with_small_offset(self):
710
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
711
                                            offset=1234,
712
                                            nodes=self.make_nodes(200, 1, 1))
713
        self.assertEqual(200, index.key_count())
714
715
    def test_with_large_offset(self):
716
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
717
                                            offset=123456,
718
                                            nodes=self.make_nodes(200, 1, 1))
719
        self.assertEqual(200, index.key_count())
720
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
721
    def test__read_nodes_no_size_one_page_reads_once(self):
722
        self.make_index(nodes=[(('key',), 'value', ())])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
723
        trans = transport.get_transport('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
724
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
725
        del trans._activity[:]
726
        nodes = dict(index._read_nodes([0]))
727
        self.assertEqual([0], nodes.keys())
728
        node = nodes[0]
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
729
        self.assertEqual([('key',)], node.all_keys())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
730
        self.assertEqual([('get', 'index')], trans._activity)
731
732
    def test__read_nodes_no_size_multiple_pages(self):
733
        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
734
        index.key_count()
735
        num_pages = index._row_offsets[-1]
736
        # Reopen with a traced transport and no size
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
737
        trans = transport.get_transport('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
738
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
739
        del trans._activity[:]
740
        nodes = dict(index._read_nodes([0]))
741
        self.assertEqual(range(num_pages), nodes.keys())
742
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
743
    def test_2_levels_key_count_2_2(self):
744
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
745
        nodes = self.make_nodes(160, 2, 2)
746
        for node in nodes:
747
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
748
        t = transport.get_transport('trace+' + self.get_url(''))
749
        size = t.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
750
        self.assertEqual(17692, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
751
        index = btree_index.BTreeGraphIndex(t, 'index', size)
752
        del t._activity[:]
753
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
754
        self.assertEqual(320, index.key_count())
755
        # The entire index should not have been read.
756
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
757
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
758
759
    def test_validate_one_page(self):
760
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
761
        nodes = self.make_nodes(45, 2, 2)
762
        for node in nodes:
763
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
764
        t = transport.get_transport('trace+' + self.get_url(''))
765
        size = t.put_file('index', builder.finish())
766
        index = btree_index.BTreeGraphIndex(t, 'index', size)
767
        del t._activity[:]
768
        self.assertEqual([], t._activity)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
769
        index.validate()
770
        # The entire index should have been read linearly.
771
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
772
                         t._activity)
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
773
        self.assertEqual(1488, size)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
774
775
    def test_validate_two_pages(self):
776
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
777
        nodes = self.make_nodes(80, 2, 2)
778
        for node in nodes:
779
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
780
        t = transport.get_transport('trace+' + self.get_url(''))
781
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
782
        # Root page, 2 leaf pages
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
783
        self.assertEqual(9339, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
784
        index = btree_index.BTreeGraphIndex(t, 'index', size)
785
        del t._activity[:]
786
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
787
        index.validate()
788
        # The entire index should have been read linearly.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
789
        self.assertEqual(
790
            [('readv', 'index', [(0, 4096)], False, None),
791
             ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
792
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
793
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
794
        # node and make validate find them.
795
796
    def test_eq_ne(self):
797
        # two indices are equal when constructed with the same parameters:
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
798
        t1 = transport.get_transport('trace+' + self.get_url(''))
5609.9.4 by Vincent Ladeuil
Use self.get_transport instead of transport.get_transport where possible.
799
        t2 = self.get_transport()
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
800
        self.assertTrue(
801
            btree_index.BTreeGraphIndex(t1, 'index', None) ==
802
            btree_index.BTreeGraphIndex(t1, 'index', None))
803
        self.assertTrue(
804
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
805
            btree_index.BTreeGraphIndex(t1, 'index', 20))
806
        self.assertFalse(
807
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
808
            btree_index.BTreeGraphIndex(t2, 'index', 20))
809
        self.assertFalse(
810
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
811
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
812
        self.assertFalse(
813
            btree_index.BTreeGraphIndex(t1, 'index', 10) ==
814
            btree_index.BTreeGraphIndex(t1, 'index', 20))
815
        self.assertFalse(
816
            btree_index.BTreeGraphIndex(t1, 'index', None) !=
817
            btree_index.BTreeGraphIndex(t1, 'index', None))
818
        self.assertFalse(
819
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
820
            btree_index.BTreeGraphIndex(t1, 'index', 20))
821
        self.assertTrue(
822
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
823
            btree_index.BTreeGraphIndex(t2, 'index', 20))
824
        self.assertTrue(
825
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
826
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
827
        self.assertTrue(
828
            btree_index.BTreeGraphIndex(t1, 'index', 10) !=
829
            btree_index.BTreeGraphIndex(t1, 'index', 20))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
830
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
831
    def test_iter_all_only_root_no_size(self):
832
        self.make_index(nodes=[(('key',), 'value', ())])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
833
        t = transport.get_transport('trace+' + self.get_url(''))
834
        index = btree_index.BTreeGraphIndex(t, 'index', None)
835
        del t._activity[:]
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
836
        self.assertEqual([(('key',), 'value')],
837
                         [x[1:] for x in index.iter_all_entries()])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
838
        self.assertEqual([('get', 'index')], t._activity)
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
839
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
840
    def test_iter_all_entries_reads(self):
841
        # iterating all entries reads the header, then does a linear
842
        # read.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
843
        self.shrink_page_size()
3641.5.11 by John Arbash Meinel
Some small test cleanup
844
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
845
        # 20k nodes is enough to create a two internal nodes on the second
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
846
        # level, with a 2K page size
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
847
        nodes = self.make_nodes(10000, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
848
        for node in nodes:
849
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
850
        t = transport.get_transport('trace+' + self.get_url(''))
851
        size = t.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
852
        self.assertEqual(1303220, size, 'number of expected bytes in the'
853
                                        ' output changed')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
854
        page_size = btree_index._PAGE_SIZE
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
855
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
856
        index = btree_index.BTreeGraphIndex(t, 'index', size)
857
        del t._activity[:]
858
        self.assertEqual([], t._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
859
        found_nodes = self.time(list, index.iter_all_entries())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
860
        bare_nodes = []
861
        for node in found_nodes:
862
            self.assertTrue(node[0] is index)
863
            bare_nodes.append(node[1:])
864
        self.assertEqual(3, len(index._row_lengths),
865
            "Not enough rows: %r" % index._row_lengths)
866
        # Should be as long as the nodes we supplied
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
867
        self.assertEqual(20000, len(found_nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
868
        # Should have the same content
869
        self.assertEqual(set(nodes), set(bare_nodes))
870
        # Should have done linear scan IO up the index, ignoring
871
        # the internal nodes:
872
        # The entire index should have been read
873
        total_pages = sum(index._row_lengths)
874
        self.assertEqual(total_pages, index._row_offsets[-1])
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
875
        self.assertEqual(1303220, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
876
        # The start of the leaves
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
877
        first_byte = index._row_offsets[-2] * page_size
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
878
        readv_request = []
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
879
        for offset in range(first_byte, size, page_size):
880
            readv_request.append((offset, page_size))
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
881
        # The last page is truncated
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
882
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
883
        expected = [('readv', 'index', [(0, page_size)], False, None),
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
884
             ('readv',  'index', readv_request, False, None)]
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
885
        if expected != t._activity:
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
886
            self.assertEqualDiff(pprint.pformat(expected),
887
                                 pprint.pformat(transport._activity))
888
889
    def _test_iter_entries_references_resolved(self):
890
        index = self.make_index(1, nodes=[
891
            (('name', ), 'data', ([('ref', ), ('ref', )], )),
892
            (('ref', ), 'refdata', ([], ))])
893
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
894
            (index, ('ref', ), 'refdata', ((), ))]),
895
            set(index.iter_entries([('name',), ('ref',)])))
896
897
    def test_iter_entries_references_2_refs_resolved(self):
898
        # iterating some entries reads just the pages needed. For now, to
899
        # get it working and start measuring, only 4K pages are read.
900
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
901
        # 80 nodes is enough to create a two-level index.
902
        nodes = self.make_nodes(160, 2, 2)
903
        for node in nodes:
904
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
905
        t = transport.get_transport('trace+' + self.get_url(''))
906
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
907
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
908
        index = btree_index.BTreeGraphIndex(t, 'index', size)
909
        del t._activity[:]
910
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
911
        # search for one key
912
        found_nodes = list(index.iter_entries([nodes[30][0]]))
913
        bare_nodes = []
914
        for node in found_nodes:
915
            self.assertTrue(node[0] is index)
916
            bare_nodes.append(node[1:])
917
        # Should be as long as the nodes we supplied
918
        self.assertEqual(1, len(found_nodes))
919
        # Should have the same content
920
        self.assertEqual(nodes[30], bare_nodes[0])
921
        # Should have read the root node, then one leaf page:
922
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
923
             ('readv',  'index', [(8192, 4096), ], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
924
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
925
926
    def test_iter_key_prefix_1_element_key_None(self):
927
        index = self.make_index()
928
        self.assertRaises(errors.BadIndexKey, list,
929
            index.iter_entries_prefix([(None, )]))
930
931
    def test_iter_key_prefix_wrong_length(self):
932
        index = self.make_index()
933
        self.assertRaises(errors.BadIndexKey, list,
934
            index.iter_entries_prefix([('foo', None)]))
935
        index = self.make_index(key_elements=2)
936
        self.assertRaises(errors.BadIndexKey, list,
937
            index.iter_entries_prefix([('foo', )]))
938
        self.assertRaises(errors.BadIndexKey, list,
939
            index.iter_entries_prefix([('foo', None, None)]))
940
941
    def test_iter_key_prefix_1_key_element_no_refs(self):
942
        index = self.make_index( nodes=[
943
            (('name', ), 'data', ()),
944
            (('ref', ), 'refdata', ())])
945
        self.assertEqual(set([(index, ('name', ), 'data'),
946
            (index, ('ref', ), 'refdata')]),
947
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
948
949
    def test_iter_key_prefix_1_key_element_refs(self):
950
        index = self.make_index(1, nodes=[
951
            (('name', ), 'data', ([('ref', )], )),
952
            (('ref', ), 'refdata', ([], ))])
953
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
954
            (index, ('ref', ), 'refdata', ((), ))]),
955
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
956
957
    def test_iter_key_prefix_2_key_element_no_refs(self):
958
        index = self.make_index(key_elements=2, nodes=[
959
            (('name', 'fin1'), 'data', ()),
960
            (('name', 'fin2'), 'beta', ()),
961
            (('ref', 'erence'), 'refdata', ())])
962
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
963
            (index, ('ref', 'erence'), 'refdata')]),
964
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
965
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
966
            (index, ('name', 'fin2'), 'beta')]),
967
            set(index.iter_entries_prefix([('name', None)])))
968
969
    def test_iter_key_prefix_2_key_element_refs(self):
970
        index = self.make_index(1, key_elements=2, nodes=[
971
            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
972
            (('name', 'fin2'), 'beta', ([], )),
973
            (('ref', 'erence'), 'refdata', ([], ))])
974
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
975
            (index, ('ref', 'erence'), 'refdata', ((), ))]),
976
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
977
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
978
            (index, ('name', 'fin2'), 'beta', ((), ))]),
979
            set(index.iter_entries_prefix([('name', None)])))
980
4011.5.5 by Andrew Bennetts
Fix typo in comment.
981
    # XXX: external_references tests are duplicated in test_index.  We
4011.5.3 by Andrew Bennetts
Implement and test external_references on GraphIndex and BTreeGraphIndex.
982
    # probably should have per_graph_index tests...
983
    def test_external_references_no_refs(self):
984
        index = self.make_index(ref_lists=0, nodes=[])
985
        self.assertRaises(ValueError, index.external_references, 0)
986
987
    def test_external_references_no_results(self):
988
        index = self.make_index(ref_lists=1, nodes=[
989
            (('key',), 'value', ([],))])
990
        self.assertEqual(set(), index.external_references(0))
991
992
    def test_external_references_missing_ref(self):
993
        missing_key = ('missing',)
994
        index = self.make_index(ref_lists=1, nodes=[
995
            (('key',), 'value', ([missing_key],))])
996
        self.assertEqual(set([missing_key]), index.external_references(0))
997
998
    def test_external_references_multiple_ref_lists(self):
999
        missing_key = ('missing',)
1000
        index = self.make_index(ref_lists=2, nodes=[
1001
            (('key',), 'value', ([], [missing_key]))])
1002
        self.assertEqual(set([]), index.external_references(0))
1003
        self.assertEqual(set([missing_key]), index.external_references(1))
1004
1005
    def test_external_references_two_records(self):
1006
        index = self.make_index(ref_lists=1, nodes=[
1007
            (('key-1',), 'value', ([('key-2',)],)),
1008
            (('key-2',), 'value', ([],)),
1009
            ])
1010
        self.assertEqual(set([]), index.external_references(0))
1011
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1012
    def test__find_ancestors_one_page(self):
4593.4.5 by John Arbash Meinel
Start adding some tests.
1013
        key1 = ('key-1',)
1014
        key2 = ('key-2',)
1015
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1016
            (key1, 'value', ([key2],)),
1017
            (key2, 'value', ([],)),
1018
            ])
1019
        parent_map = {}
1020
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1021
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1022
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1023
        self.assertEqual(set(), missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1024
        self.assertEqual(set(), search_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1025
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1026
    def test__find_ancestors_one_page_w_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1027
        key1 = ('key-1',)
1028
        key2 = ('key-2',)
1029
        key3 = ('key-3',)
1030
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1031
            (key1, 'value', ([key2],)),
1032
            (key2, 'value', ([],)),
1033
            ])
1034
        parent_map = {}
1035
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1036
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1037
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1038
        self.assertEqual({key2: ()}, parent_map)
1039
        # we know that key3 is missing because we read the page that it would
1040
        # otherwise be on
1041
        self.assertEqual(set([key3]), missing_keys)
1042
        self.assertEqual(set(), search_keys)
1043
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1044
    def test__find_ancestors_one_parent_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1045
        key1 = ('key-1',)
1046
        key2 = ('key-2',)
1047
        key3 = ('key-3',)
1048
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1049
            (key1, 'value', ([key2],)),
1050
            (key2, 'value', ([key3],)),
1051
            ])
1052
        parent_map = {}
1053
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1054
        search_keys = index._find_ancestors([key1], 0, parent_map,
1055
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1056
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1057
        self.assertEqual(set(), missing_keys)
1058
        # all we know is that key3 wasn't present on the page we were reading
1059
        # but if you look, the last key is key2 which comes before key3, so we
1060
        # don't know whether key3 would land on this page or not.
1061
        self.assertEqual(set([key3]), search_keys)
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1062
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1063
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1064
        # passing it back in, we are sure it is 'missing'
1065
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1066
        self.assertEqual(set([key3]), missing_keys)
1067
        self.assertEqual(set([]), search_keys)
1068
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1069
    def test__find_ancestors_dont_search_known(self):
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1070
        key1 = ('key-1',)
1071
        key2 = ('key-2',)
1072
        key3 = ('key-3',)
1073
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1074
            (key1, 'value', ([key2],)),
1075
            (key2, 'value', ([key3],)),
1076
            (key3, 'value', ([],)),
1077
            ])
1078
        # We already know about key2, so we won't try to search for key3
1079
        parent_map = {key2: (key3,)}
1080
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1081
        search_keys = index._find_ancestors([key1], 0, parent_map,
1082
                                            missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1083
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1084
        self.assertEqual(set(), missing_keys)
1085
        self.assertEqual(set(), search_keys)
1086
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1087
    def test__find_ancestors_multiple_pages(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1088
        # We need to use enough keys that we actually cause a split
1089
        start_time = 1249671539
1090
        email = "joebob@example.com"
1091
        nodes = []
1092
        ref_lists = ((),)
1093
        rev_keys = []
1094
        for i in xrange(400):
1095
            rev_id = '%s-%s-%s' % (email,
1096
                                   osutils.compact_date(start_time + i),
1097
                                   osutils.rand_chars(16))
1098
            rev_key = (rev_id,)
1099
            nodes.append((rev_key, 'value', ref_lists))
1100
            # We have a ref 'list' of length 1, with a list of parents, with 1
1101
            # parent which is a key
1102
            ref_lists = ((rev_key,),)
1103
            rev_keys.append(rev_key)
1104
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1105
        self.assertEqual(400, index.key_count())
1106
        self.assertEqual(3, len(index._row_offsets))
1107
        nodes = dict(index._read_nodes([1, 2]))
1108
        l1 = nodes[1]
1109
        l2 = nodes[2]
1110
        min_l2_key = l2.min_key
1111
        max_l1_key = l1.max_key
1112
        self.assertTrue(max_l1_key < min_l2_key)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1113
        parents_min_l2_key = l2[min_l2_key][1][0]
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1114
        self.assertEqual((l1.max_key,), parents_min_l2_key)
1115
        # Now, whatever key we select that would fall on the second page,
1116
        # should give us all the parents until the page break
1117
        key_idx = rev_keys.index(min_l2_key)
1118
        next_key = rev_keys[key_idx+1]
1119
        # So now when we get the parent map, we should get the key we are
1120
        # looking for, min_l2_key, and then a reference to go look for the
1121
        # parent of that key
1122
        parent_map = {}
1123
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1124
        search_keys = index._find_ancestors([next_key], 0, parent_map,
1125
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1126
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1127
        self.assertEqual(set(), missing_keys)
1128
        self.assertEqual(set([max_l1_key]), search_keys)
1129
        parent_map = {}
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1130
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1131
                                            missing_keys)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1132
        self.assertEqual(l1.all_keys(), sorted(parent_map))
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1133
        self.assertEqual(set(), missing_keys)
1134
        self.assertEqual(set(), search_keys)
1135
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1136
    def test__find_ancestors_empty_index(self):
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1137
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1138
        parent_map = {}
1139
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1140
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1141
                                            missing_keys)
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1142
        self.assertEqual(set(), search_keys)
1143
        self.assertEqual({}, parent_map)
1144
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
1145
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1146
    def test_supports_unlimited_cache(self):
1147
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1148
        # We need enough nodes to cause a page split (so we have both an
1149
        # internal node and a couple leaf nodes. 500 seems to be enough.)
4634.71.2 by John Arbash Meinel
If we are going to sometimes use a dict, we have to conform to just the dict interface.
1150
        nodes = self.make_nodes(500, 1, 0)
1151
        for node in nodes:
1152
            builder.add_node(*node)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1153
        stream = builder.finish()
5609.9.4 by Vincent Ladeuil
Use self.get_transport instead of transport.get_transport where possible.
1154
        trans = self.get_transport()
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1155
        size = trans.put_file('index', stream)
1156
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1157
        self.assertEqual(500, index.key_count())
1158
        # We have an internal node
1159
        self.assertEqual(2, len(index._row_lengths))
1160
        # We have at least 2 leaf nodes
1161
        self.assertTrue(index._row_lengths[-1] >= 2)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1162
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1163
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1164
                         index._leaf_node_cache._max_cache)
1165
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1166
        self.assertEqual(100, index._internal_node_cache._max_cache)
1167
        # No change if unlimited_cache=False is passed
1168
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1169
                                            unlimited_cache=False)
1170
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1171
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1172
                         index._leaf_node_cache._max_cache)
1173
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1174
        self.assertEqual(100, index._internal_node_cache._max_cache)
1175
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1176
                                            unlimited_cache=True)
1177
        self.assertIsInstance(index._leaf_node_cache, dict)
1178
        self.assertIs(type(index._internal_node_cache), dict)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1179
        # Exercise the lookup code
1180
        entries = set(index.iter_entries([n[0] for n in nodes]))
1181
        self.assertEqual(500, len(entries))
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1182
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1183
1184
class TestBTreeNodes(BTreeTestCase):
1185
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
1186
    scenarios = btreeparser_scenarios()
1187
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1188
    def setUp(self):
1189
        BTreeTestCase.setUp(self)
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
1190
        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1191
1192
    def test_LeafNode_1_0(self):
1193
        node_bytes = ("type=leaf\n"
1194
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
1195
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
1196
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
1197
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
1198
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
1199
        node = btree_index._LeafNode(node_bytes, 1, 0)
1200
        # We do direct access, or don't care about order, to leaf nodes most of
1201
        # the time, so a dict is useful:
1202
        self.assertEqual({
1203
            ("0000000000000000000000000000000000000000",): ("value:0", ()),
1204
            ("1111111111111111111111111111111111111111",): ("value:1", ()),
1205
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
1206
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
1207
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1208
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1209
1210
    def test_LeafNode_2_2(self):
1211
        node_bytes = ("type=leaf\n"
1212
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1213
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1214
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1215
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1216
            ""
1217
            )
1218
        node = btree_index._LeafNode(node_bytes, 2, 2)
1219
        # We do direct access, or don't care about order, to leaf nodes most of
1220
        # the time, so a dict is useful:
1221
        self.assertEqual({
1222
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1223
            ('00', '11'): ('value:1',
1224
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1225
            ('11', '33'): ('value:3',
1226
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1227
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1228
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1229
1230
    def test_InternalNode_1(self):
1231
        node_bytes = ("type=internal\n"
1232
            "offset=1\n"
1233
            "0000000000000000000000000000000000000000\n"
1234
            "1111111111111111111111111111111111111111\n"
1235
            "2222222222222222222222222222222222222222\n"
1236
            "3333333333333333333333333333333333333333\n"
1237
            "4444444444444444444444444444444444444444\n"
1238
            )
1239
        node = btree_index._InternalNode(node_bytes)
1240
        # We want to bisect to find the right children from this node, so a
1241
        # vector is most useful.
1242
        self.assertEqual([
1243
            ("0000000000000000000000000000000000000000",),
1244
            ("1111111111111111111111111111111111111111",),
1245
            ("2222222222222222222222222222222222222222",),
1246
            ("3333333333333333333333333333333333333333",),
1247
            ("4444444444444444444444444444444444444444",),
1248
            ], node.keys)
1249
        self.assertEqual(1, node.offset)
1250
1251
    def test_LeafNode_2_2(self):
1252
        node_bytes = ("type=leaf\n"
1253
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1254
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1255
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1256
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1257
            ""
1258
            )
1259
        node = btree_index._LeafNode(node_bytes, 2, 2)
1260
        # We do direct access, or don't care about order, to leaf nodes most of
1261
        # the time, so a dict is useful:
1262
        self.assertEqual({
1263
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1264
            ('00', '11'): ('value:1',
1265
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1266
            ('11', '33'): ('value:3',
1267
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1268
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1269
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1270
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1271
    def assertFlattened(self, expected, key, value, refs):
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1272
        flat_key, flat_line = self.parse_btree._flatten_node(
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1273
            (None, key, value, refs), bool(refs))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1274
        self.assertEqual('\x00'.join(key), flat_key)
1275
        self.assertEqual(expected, flat_line)
1276
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1277
    def test__flatten_node(self):
1278
        self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1279
        self.assertFlattened('key\0tuple\0\0value str\n',
1280
                             ('key', 'tuple'), 'value str', [])
1281
        self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1282
                             ('key', 'tuple', 'triple'), 'value str', [])
1283
        self.assertFlattened('k\0t\0s\0ref\0value str\n',
1284
                             ('k', 't', 's'), 'value str', [[('ref',)]])
1285
        self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1286
                             ('key', 'tuple'), 'value str', [[('ref', 'key')]])
1287
        self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1288
            ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1289
        self.assertFlattened(
1290
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1291
            ('00', '11'), 'value:1',
1292
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1293
        self.assertFlattened(
1294
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1295
            ('11', '33'), 'value:3',
1296
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1297
        self.assertFlattened(
1298
            "11\x0044\x00\t11\x00ref00\x00value:4\n",
1299
            ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1300
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1301
1302
class TestCompiledBtree(tests.TestCase):
1303
1304
    def test_exists(self):
1305
        # This is just to let the user know if they don't have the feature
1306
        # available
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
1307
        self.requireFeature(compiled_btreeparser_feature)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1308
1309
1310
class TestMultiBisectRight(tests.TestCase):
1311
1312
    def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1313
        self.assertEqual(offsets,
1314
                         btree_index.BTreeGraphIndex._multi_bisect_right(
1315
                            search_keys, fixed_keys))
1316
1317
    def test_after(self):
1318
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1319
        self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1320
                                    ['e', 'f', 'g'], ['a', 'b', 'c'])
1321
1322
    def test_before(self):
1323
        self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1324
        self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1325
                                    ['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1326
1327
    def test_exact(self):
1328
        self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1329
        self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1330
        self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1331
                                    ['a', 'c'], ['a', 'b', 'c'])
1332
1333
    def test_inbetween(self):
1334
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1335
        self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1336
                                    ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1337
1338
    def test_mixed(self):
1339
        self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1340
                                     (4, ['g', 'h'])],
1341
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1342
                                    ['c', 'd', 'f', 'g'])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1343
1344
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1345
class TestExpandOffsets(tests.TestCase):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1346
1347
    def make_index(self, size, recommended_pages=None):
1348
        """Make an index with a generic size.
1349
1350
        This doesn't actually create anything on disk, it just primes a
1351
        BTreeGraphIndex with the recommended information.
1352
        """
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
1353
        index = btree_index.BTreeGraphIndex(
1354
            transport.get_transport('memory:///'), 'test-index', size=size)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1355
        if recommended_pages is not None:
1356
            index._recommended_pages = recommended_pages
1357
        return index
1358
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1359
    def set_cached_offsets(self, index, cached_offsets):
1360
        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1361
        def _get_offsets_to_cached_pages():
1362
            cached = set(cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1363
            return cached
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1364
        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1365
1366
    def prepare_index(self, index, node_ref_lists, key_length, key_count,
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1367
                      row_lengths, cached_offsets):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1368
        """Setup the BTreeGraphIndex with some pre-canned information."""
1369
        index.node_ref_lists = node_ref_lists
1370
        index._key_length = key_length
1371
        index._key_count = key_count
1372
        index._row_lengths = row_lengths
1373
        index._compute_row_offsets()
1374
        index._root_node = btree_index._InternalNode('internal\noffset=0\n')
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1375
        self.set_cached_offsets(index, cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1376
1377
    def make_100_node_index(self):
1378
        index = self.make_index(4096*100, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1379
        # Consider we've already made a single request at the middle
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1380
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1381
                           key_count=1000, row_lengths=[1, 99],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1382
                           cached_offsets=[0, 50])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1383
        return index
1384
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1385
    def make_1000_node_index(self):
1386
        index = self.make_index(4096*1000, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1387
        # Pretend we've already made a single request in the middle
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1388
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1389
                           key_count=90000, row_lengths=[1, 9, 990],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1390
                           cached_offsets=[0, 5, 500])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1391
        return index
1392
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1393
    def assertNumPages(self, expected_pages, index, size):
1394
        index._size = size
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1395
        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1396
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1397
    def assertExpandOffsets(self, expected, index, offsets):
1398
        self.assertEqual(expected, index._expand_offsets(offsets),
1399
                         'We did not get the expected value after expanding'
1400
                         ' %s' % (offsets,))
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1401
1402
    def test_default_recommended_pages(self):
1403
        index = self.make_index(None)
1404
        # local transport recommends 4096 byte reads, which is 1 page
1405
        self.assertEqual(1, index._recommended_pages)
1406
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1407
    def test__compute_total_pages_in_index(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1408
        index = self.make_index(None)
1409
        self.assertNumPages(1, index, 1024)
1410
        self.assertNumPages(1, index, 4095)
1411
        self.assertNumPages(1, index, 4096)
1412
        self.assertNumPages(2, index, 4097)
1413
        self.assertNumPages(2, index, 8192)
1414
        self.assertNumPages(76, index, 4096*75 + 10)
1415
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1416
    def test__find_layer_start_and_stop(self):
1417
        index = self.make_1000_node_index()
1418
        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1419
        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1420
        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1421
        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1422
        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1423
        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1424
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1425
    def test_unknown_size(self):
1426
        # We should not expand if we don't know the file size
1427
        index = self.make_index(None, 10)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1428
        self.assertExpandOffsets([0], index, [0])
1429
        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1430
1431
    def test_more_than_recommended(self):
1432
        index = self.make_index(4096*100, 2)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1433
        self.assertExpandOffsets([1, 10], index, [1, 10])
1434
        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1435
1436
    def test_read_all_from_root(self):
1437
        index = self.make_index(4096*10, 20)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1438
        self.assertExpandOffsets(range(10), index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1439
1440
    def test_read_all_when_cached(self):
1441
        # We've read enough that we can grab all the rest in a single request
1442
        index = self.make_index(4096*10, 5)
1443
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1444
                           key_count=1000, row_lengths=[1, 9],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1445
                           cached_offsets=[0, 1, 2, 5, 6])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1446
        # It should fill the remaining nodes, regardless of the one requested
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1447
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1448
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1449
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1450
1451
    def test_no_root_node(self):
1452
        index = self.make_index(4096*10, 5)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1453
        self.assertExpandOffsets([0], index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1454
1455
    def test_include_neighbors(self):
1456
        index = self.make_100_node_index()
1457
        # We expand in both directions, until we have at least 'recommended'
1458
        # pages
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1459
        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1460
        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1461
        # If we hit an 'edge' we continue in the other direction
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1462
        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1463
        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1464
1465
        # Requesting many nodes will expand all locations equally
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1466
        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1467
        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1468
                               [2, 10, 81])
1469
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1470
    def test_stop_at_cached(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1471
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1472
        self.set_cached_offsets(index, [0, 10, 19])
1473
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1474
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1475
        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1476
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1477
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1478
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1479
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1480
    def test_cannot_fully_expand(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1481
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1482
        self.set_cached_offsets(index, [0, 10, 12])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1483
        # We don't go into an endless loop if we are bound by cached nodes
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1484
        self.assertExpandOffsets([11], index, [11])
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1485
1486
    def test_overlap(self):
1487
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1488
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1489
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1490
1491
    def test_stay_within_layer(self):
1492
        index = self.make_1000_node_index()
1493
        # When expanding a request, we won't read nodes from the next layer
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1494
        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1495
        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1496
        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1497
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1498
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1499
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1500
        self.set_cached_offsets(index, [0, 4, 12])
1501
        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1502
        self.assertExpandOffsets([10, 11], index, [11])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1503
1504
    def test_small_requests_unexpanded(self):
1505
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1506
        self.set_cached_offsets(index, [0])
1507
        self.assertExpandOffsets([1], index, [1])
1508
        self.assertExpandOffsets([50], index, [50])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1509
        # If we request more than one node, then we'll expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1510
        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1511
1512
        # The first pass does not expand
1513
        index = self.make_1000_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1514
        self.set_cached_offsets(index, [0])
1515
        self.assertExpandOffsets([1], index, [1])
1516
        self.set_cached_offsets(index, [0, 1])
1517
        self.assertExpandOffsets([100], index, [100])
1518
        self.set_cached_offsets(index, [0, 1, 100])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1519
        # But after the first depth, we will expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1520
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1521
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1522
        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1523
        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1524
                                 [105])