~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
2
#
3
# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
16
#
17
18
"""Tests for btree indices."""
19
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
20
import pprint
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
21
import zlib
22
23
from bzrlib import (
24
    btree_index,
25
    errors,
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
26
    fifo_cache,
27
    lru_cache,
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
28
    osutils,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
29
    tests,
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
30
    transport,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
31
    )
32
from bzrlib.tests import (
33
    TestCaseWithTransport,
34
    condition_isinstance,
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
35
    multiply_tests,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
36
    split_suite_by_condition,
37
    )
38
39
40
def load_tests(standard_tests, module, loader):
41
    # parameterise the TestBTreeNodes tests
42
    node_tests, others = split_suite_by_condition(standard_tests,
43
        condition_isinstance(TestBTreeNodes))
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
44
    import bzrlib._btree_serializer_py as py_module
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
45
    scenarios = [('python', {'parse_btree': py_module})]
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
46
    if compiled_btreeparser_feature.available():
47
        scenarios.append(('C', {'parse_btree':
48
                                compiled_btreeparser_feature.module}))
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
49
    return multiply_tests(node_tests, scenarios, others)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
50
51
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
52
compiled_btreeparser_feature = tests.ModuleAvailableFeature(
53
                                'bzrlib._btree_serializer_pyx')
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
54
55
56
class BTreeTestCase(TestCaseWithTransport):
57
    # test names here are suffixed by the key length and reference list count
58
    # that they test.
59
60
    def setUp(self):
61
        TestCaseWithTransport.setUp(self)
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
62
        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
63
64
    def make_nodes(self, count, key_elements, reference_lists):
65
        """Generate count*key_elements sample nodes."""
66
        keys = []
67
        for prefix_pos in range(key_elements):
68
            if key_elements - 1:
69
                prefix = (str(prefix_pos) * 40,)
70
            else:
71
                prefix = ()
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
72
            for pos in xrange(count):
73
                # TODO: This creates odd keys. When count == 100,000, it
74
                #       creates a 240 byte key
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
75
                key = prefix + (str(pos) * 40,)
76
                value = "value:%s" % pos
77
                if reference_lists:
78
                    # generate some references
79
                    refs = []
80
                    for list_pos in range(reference_lists):
81
                        # as many keys in each list as its index + the key depth
82
                        # mod 2 - this generates both 0 length lists and
83
                        # ones slightly longer than the number of lists.
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
84
                        # It also ensures we have non homogeneous lists.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
85
                        refs.append([])
86
                        for ref_pos in range(list_pos + pos % 2):
87
                            if pos % 2:
88
                                # refer to a nearby key
89
                                refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
90
                            else:
91
                                # serial of this ref in the ref list
92
                                refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
93
                        refs[-1] = tuple(refs[-1])
94
                    refs = tuple(refs)
95
                else:
96
                    refs = ()
97
                keys.append((key, value, refs))
98
        return keys
99
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
100
    def shrink_page_size(self):
101
        """Shrink the default page size so that less fits in a page."""
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
102
        self.overrideAttr(btree_index, '_PAGE_SIZE')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
103
        btree_index._PAGE_SIZE = 2048
104
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
105
106
class TestBTreeBuilder(BTreeTestCase):
107
4744.2.7 by John Arbash Meinel
Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder.
108
    def test_clear_cache(self):
109
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
110
        # This is a no-op, but we need the api to be consistent with other
111
        # BTreeGraphIndex apis.
112
        builder.clear_cache()
113
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
114
    def test_empty_1_0(self):
115
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
116
        # NamedTemporaryFile dies on builder.finish().read(). weird.
117
        temp_file = builder.finish()
118
        content = temp_file.read()
119
        del temp_file
120
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
121
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
122
            "row_lengths=\n",
123
            content)
124
125
    def test_empty_2_1(self):
126
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
127
        # NamedTemporaryFile dies on builder.finish().read(). weird.
128
        temp_file = builder.finish()
129
        content = temp_file.read()
130
        del temp_file
131
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
132
            "B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
133
            "row_lengths=\n",
134
            content)
135
136
    def test_root_leaf_1_0(self):
137
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
138
        nodes = self.make_nodes(5, 1, 0)
139
        for node in nodes:
140
            builder.add_node(*node)
141
        # NamedTemporaryFile dies on builder.finish().read(). weird.
142
        temp_file = builder.finish()
143
        content = temp_file.read()
144
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
145
        self.assertEqual(131, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
146
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
147
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
148
            "row_lengths=1\n",
149
            content[:73])
150
        node_content = content[73:]
151
        node_bytes = zlib.decompress(node_content)
152
        expected_node = ("type=leaf\n"
153
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
154
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
155
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
156
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
157
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
158
        self.assertEqual(expected_node, node_bytes)
159
160
    def test_root_leaf_2_2(self):
161
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
162
        nodes = self.make_nodes(5, 2, 2)
163
        for node in nodes:
164
            builder.add_node(*node)
165
        # NamedTemporaryFile dies on builder.finish().read(). weird.
166
        temp_file = builder.finish()
167
        content = temp_file.read()
168
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
169
        self.assertEqual(238, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
170
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
171
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
172
            "row_lengths=1\n",
173
            content[:74])
174
        node_content = content[74:]
175
        node_bytes = zlib.decompress(node_content)
176
        expected_node = (
177
            "type=leaf\n"
178
            "0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
179
            "0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
180
            "0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
181
            "0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
182
            "0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
183
            "1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
184
            "1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
185
            "1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
186
            "1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
187
            "1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
188
            ""
189
            )
190
        self.assertEqual(expected_node, node_bytes)
191
192
    def test_2_leaves_1_0(self):
193
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
194
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
195
        for node in nodes:
196
            builder.add_node(*node)
197
        # NamedTemporaryFile dies on builder.finish().read(). weird.
198
        temp_file = builder.finish()
199
        content = temp_file.read()
200
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
201
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
202
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
203
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
204
            "row_lengths=1,2\n",
205
            content[:77])
206
        root = content[77:4096]
207
        leaf1 = content[4096:8192]
208
        leaf2 = content[8192:]
209
        root_bytes = zlib.decompress(root)
210
        expected_root = (
211
            "type=internal\n"
212
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
213
            ) + ("307" * 40) + "\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
214
        self.assertEqual(expected_root, root_bytes)
215
        # We already know serialisation works for leaves, check key selection:
216
        leaf1_bytes = zlib.decompress(leaf1)
217
        sorted_node_keys = sorted(node[0] for node in nodes)
218
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
219
        self.assertEqual(231, len(node.keys))
220
        self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
221
        leaf2_bytes = zlib.decompress(leaf2)
222
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
223
        self.assertEqual(400 - 231, len(node.keys))
224
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
225
226
    def test_last_page_rounded_1_layer(self):
227
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
228
        nodes = self.make_nodes(10, 1, 0)
229
        for node in nodes:
230
            builder.add_node(*node)
231
        # NamedTemporaryFile dies on builder.finish().read(). weird.
232
        temp_file = builder.finish()
233
        content = temp_file.read()
234
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
235
        self.assertEqual(155, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
236
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
237
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
238
            "row_lengths=1\n",
239
            content[:74])
240
        # Check thelast page is well formed
241
        leaf2 = content[74:]
242
        leaf2_bytes = zlib.decompress(leaf2)
243
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
244
        self.assertEqual(10, len(node.keys))
245
        sorted_node_keys = sorted(node[0] for node in nodes)
246
        self.assertEqual(sorted_node_keys, sorted(node.keys))
247
248
    def test_last_page_not_rounded_2_layer(self):
249
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
250
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
251
        for node in nodes:
252
            builder.add_node(*node)
253
        # NamedTemporaryFile dies on builder.finish().read(). weird.
254
        temp_file = builder.finish()
255
        content = temp_file.read()
256
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
257
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
258
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
259
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
260
            "row_lengths=1,2\n",
261
            content[:77])
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
262
        # Check the last page is well formed
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
263
        leaf2 = content[8192:]
264
        leaf2_bytes = zlib.decompress(leaf2)
265
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
266
        self.assertEqual(400 - 231, len(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
267
        sorted_node_keys = sorted(node[0] for node in nodes)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
268
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
269
270
    def test_three_level_tree_details(self):
271
        # The left most pointer in the second internal node in a row should
272
        # pointer to the second node that the internal node is for, _not_
273
        # the first, otherwise the first node overlaps with the last node of
274
        # the prior internal node on that row.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
275
        self.shrink_page_size()
276
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
277
        # 40K nodes is enough to create a two internal nodes on the second
278
        # level, with a 2K page size
279
        nodes = self.make_nodes(20000, 2, 2)
3641.3.5 by John Arbash Meinel
For iter_all and three_level tests adjust spill-at.
280
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
281
        for node in nodes:
282
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
283
        t = transport.get_transport('trace+' + self.get_url(''))
284
        size = t.put_file('index', self.time(builder.finish))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
285
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
286
        index = btree_index.BTreeGraphIndex(t, 'index', size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
287
        # Seed the metadata, we're using internal calls now.
288
        index.key_count()
289
        self.assertEqual(3, len(index._row_lengths),
290
            "Not enough rows: %r" % index._row_lengths)
291
        self.assertEqual(4, len(index._row_offsets))
292
        self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
293
        internal_nodes = index._get_internal_nodes([0, 1, 2])
294
        root_node = internal_nodes[0]
295
        internal_node1 = internal_nodes[1]
296
        internal_node2 = internal_nodes[2]
297
        # The left most node node2 points at should be one after the right most
298
        # node pointed at by node1.
299
        self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
300
        # The left most key of the second node pointed at by internal_node2
301
        # should be its first key. We can check this by looking for its first key
302
        # in the second node it points at
303
        pos = index._row_offsets[2] + internal_node2.offset + 1
304
        leaf = index._get_leaf_nodes([pos])[pos]
305
        self.assertTrue(internal_node2.keys[0] in leaf.keys)
306
307
    def test_2_leaves_2_2(self):
308
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
309
        nodes = self.make_nodes(100, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
310
        for node in nodes:
311
            builder.add_node(*node)
312
        # NamedTemporaryFile dies on builder.finish().read(). weird.
313
        temp_file = builder.finish()
314
        content = temp_file.read()
315
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
316
        self.assertEqual(12643, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
317
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
318
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
319
            "row_lengths=1,3\n",
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
320
            content[:77])
321
        root = content[77:4096]
322
        leaf1 = content[4096:8192]
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
323
        leaf2 = content[8192:12288]
324
        leaf3 = content[12288:]
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
325
        root_bytes = zlib.decompress(root)
326
        expected_root = (
327
            "type=internal\n"
328
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
329
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
330
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
331
            )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
332
        self.assertEqual(expected_root, root_bytes)
333
        # We assume the other leaf nodes have been written correctly - layering
334
        # FTW.
335
336
    def test_spill_index_stress_1_1(self):
337
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
338
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
339
        builder.add_node(*nodes[0])
340
        # Test the parts of the index that take up memory are doing so
341
        # predictably.
342
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
343
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
344
        builder.add_node(*nodes[1])
345
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
346
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
347
        self.assertEqual(1, len(builder._backing_indices))
348
        self.assertEqual(2, builder._backing_indices[0].key_count())
349
        # now back to memory
350
        builder.add_node(*nodes[2])
351
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
352
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
353
        # And spills to a second backing index combing all
354
        builder.add_node(*nodes[3])
355
        self.assertEqual(0, len(builder._nodes))
356
        self.assertIs(None, builder._nodes_by_key)
357
        self.assertEqual(2, len(builder._backing_indices))
358
        self.assertEqual(None, builder._backing_indices[0])
359
        self.assertEqual(4, builder._backing_indices[1].key_count())
360
        # The next spills to the 2-len slot
361
        builder.add_node(*nodes[4])
362
        builder.add_node(*nodes[5])
363
        self.assertEqual(0, len(builder._nodes))
364
        self.assertIs(None, builder._nodes_by_key)
365
        self.assertEqual(2, len(builder._backing_indices))
366
        self.assertEqual(2, builder._backing_indices[0].key_count())
367
        self.assertEqual(4, builder._backing_indices[1].key_count())
368
        # Next spill combines
369
        builder.add_node(*nodes[6])
370
        builder.add_node(*nodes[7])
371
        self.assertEqual(3, len(builder._backing_indices))
372
        self.assertEqual(None, builder._backing_indices[0])
373
        self.assertEqual(None, builder._backing_indices[1])
374
        self.assertEqual(8, builder._backing_indices[2].key_count())
375
        # And so forth - counting up in binary.
376
        builder.add_node(*nodes[8])
377
        builder.add_node(*nodes[9])
378
        self.assertEqual(3, len(builder._backing_indices))
379
        self.assertEqual(2, builder._backing_indices[0].key_count())
380
        self.assertEqual(None, builder._backing_indices[1])
381
        self.assertEqual(8, builder._backing_indices[2].key_count())
382
        builder.add_node(*nodes[10])
383
        builder.add_node(*nodes[11])
384
        self.assertEqual(3, len(builder._backing_indices))
385
        self.assertEqual(None, builder._backing_indices[0])
386
        self.assertEqual(4, builder._backing_indices[1].key_count())
387
        self.assertEqual(8, builder._backing_indices[2].key_count())
388
        builder.add_node(*nodes[12])
389
        # Test that memory and disk are both used for query methods; and that
390
        # None is skipped over happily.
391
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
392
            list(builder.iter_all_entries()))
393
        # Two nodes - one memory one disk
394
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
395
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
396
        self.assertEqual(13, builder.key_count())
397
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
398
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
399
        builder.add_node(*nodes[13])
400
        self.assertEqual(3, len(builder._backing_indices))
401
        self.assertEqual(2, builder._backing_indices[0].key_count())
402
        self.assertEqual(4, builder._backing_indices[1].key_count())
403
        self.assertEqual(8, builder._backing_indices[2].key_count())
404
        builder.add_node(*nodes[14])
405
        builder.add_node(*nodes[15])
406
        self.assertEqual(4, len(builder._backing_indices))
407
        self.assertEqual(None, builder._backing_indices[0])
408
        self.assertEqual(None, builder._backing_indices[1])
409
        self.assertEqual(None, builder._backing_indices[2])
410
        self.assertEqual(16, builder._backing_indices[3].key_count())
411
        # Now finish, and check we got a correctly ordered tree
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
412
        t = self.get_transport('')
413
        size = t.put_file('index', builder.finish())
414
        index = btree_index.BTreeGraphIndex(t, 'index', size)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
415
        nodes = list(index.iter_all_entries())
416
        self.assertEqual(sorted(nodes), nodes)
417
        self.assertEqual(16, len(nodes))
418
419
    def test_spill_index_stress_1_1_no_combine(self):
420
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
421
        builder.set_optimize(for_size=False, combine_backing_indices=False)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
422
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
423
        builder.add_node(*nodes[0])
424
        # Test the parts of the index that take up memory are doing so
425
        # predictably.
426
        self.assertEqual(1, len(builder._nodes))
427
        self.assertIs(None, builder._nodes_by_key)
428
        builder.add_node(*nodes[1])
429
        self.assertEqual(0, len(builder._nodes))
430
        self.assertIs(None, builder._nodes_by_key)
431
        self.assertEqual(1, len(builder._backing_indices))
432
        self.assertEqual(2, builder._backing_indices[0].key_count())
433
        # now back to memory
434
        builder.add_node(*nodes[2])
435
        self.assertEqual(1, len(builder._nodes))
436
        self.assertIs(None, builder._nodes_by_key)
437
        # And spills to a second backing index but doesn't combine
438
        builder.add_node(*nodes[3])
439
        self.assertEqual(0, len(builder._nodes))
440
        self.assertIs(None, builder._nodes_by_key)
441
        self.assertEqual(2, len(builder._backing_indices))
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
442
        for backing_index in builder._backing_indices:
443
            self.assertEqual(2, backing_index.key_count())
444
        # The next spills to the 3rd slot
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
445
        builder.add_node(*nodes[4])
446
        builder.add_node(*nodes[5])
447
        self.assertEqual(0, len(builder._nodes))
448
        self.assertIs(None, builder._nodes_by_key)
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
449
        self.assertEqual(3, len(builder._backing_indices))
450
        for backing_index in builder._backing_indices:
451
            self.assertEqual(2, backing_index.key_count())
452
        # Now spill a few more, and check that we don't combine
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
453
        builder.add_node(*nodes[6])
454
        builder.add_node(*nodes[7])
455
        builder.add_node(*nodes[8])
456
        builder.add_node(*nodes[9])
457
        builder.add_node(*nodes[10])
458
        builder.add_node(*nodes[11])
459
        builder.add_node(*nodes[12])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
460
        self.assertEqual(6, len(builder._backing_indices))
461
        for backing_index in builder._backing_indices:
462
            self.assertEqual(2, backing_index.key_count())
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
463
        # Test that memory and disk are both used for query methods; and that
464
        # None is skipped over happily.
465
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
466
            list(builder.iter_all_entries()))
467
        # Two nodes - one memory one disk
468
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
469
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
470
        self.assertEqual(13, builder.key_count())
471
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
472
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
473
        builder.add_node(*nodes[13])
474
        builder.add_node(*nodes[14])
475
        builder.add_node(*nodes[15])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
476
        self.assertEqual(8, len(builder._backing_indices))
477
        for backing_index in builder._backing_indices:
478
            self.assertEqual(2, backing_index.key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
479
        # Now finish, and check we got a correctly ordered tree
480
        transport = self.get_transport('')
481
        size = transport.put_file('index', builder.finish())
482
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
483
        nodes = list(index.iter_all_entries())
484
        self.assertEqual(sorted(nodes), nodes)
485
        self.assertEqual(16, len(nodes))
486
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
487
    def test_set_optimize(self):
488
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
489
        builder.set_optimize(for_size=True)
490
        self.assertTrue(builder._optimize_for_size)
491
        builder.set_optimize(for_size=False)
492
        self.assertFalse(builder._optimize_for_size)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
493
        # test that we can set combine_backing_indices without effecting
494
        # _optimize_for_size
495
        obj = object()
496
        builder._optimize_for_size = obj
497
        builder.set_optimize(combine_backing_indices=False)
498
        self.assertFalse(builder._combine_backing_indices)
499
        self.assertIs(obj, builder._optimize_for_size)
500
        builder.set_optimize(combine_backing_indices=True)
501
        self.assertTrue(builder._combine_backing_indices)
502
        self.assertIs(obj, builder._optimize_for_size)
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
503
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
504
    def test_spill_index_stress_2_2(self):
505
        # test that references and longer keys don't confuse things.
506
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
507
            spill_at=2)
508
        nodes = self.make_nodes(16, 2, 2)
509
        builder.add_node(*nodes[0])
510
        # Test the parts of the index that take up memory are doing so
511
        # predictably.
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
512
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
513
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
514
        builder.add_node(*nodes[1])
515
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
516
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
517
        self.assertEqual(1, len(builder._backing_indices))
518
        self.assertEqual(2, builder._backing_indices[0].key_count())
519
        # now back to memory
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
520
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
521
        builder.add_node(*nodes[2])
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
522
        self.assertEqual(1, len(builder._nodes))
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
523
        self.assertIsNot(None, builder._nodes_by_key)
524
        self.assertNotEqual({}, builder._nodes_by_key)
525
        # We should have a new entry
526
        self.assertNotEqual(old, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
527
        # And spills to a second backing index combing all
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
528
        builder.add_node(*nodes[3])
529
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
530
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
531
        self.assertEqual(2, len(builder._backing_indices))
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
532
        self.assertEqual(None, builder._backing_indices[0])
533
        self.assertEqual(4, builder._backing_indices[1].key_count())
534
        # The next spills to the 2-len slot
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
535
        builder.add_node(*nodes[4])
536
        builder.add_node(*nodes[5])
537
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
538
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
539
        self.assertEqual(2, len(builder._backing_indices))
540
        self.assertEqual(2, builder._backing_indices[0].key_count())
541
        self.assertEqual(4, builder._backing_indices[1].key_count())
542
        # Next spill combines
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
543
        builder.add_node(*nodes[6])
544
        builder.add_node(*nodes[7])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
545
        self.assertEqual(3, len(builder._backing_indices))
546
        self.assertEqual(None, builder._backing_indices[0])
547
        self.assertEqual(None, builder._backing_indices[1])
548
        self.assertEqual(8, builder._backing_indices[2].key_count())
549
        # And so forth - counting up in binary.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
550
        builder.add_node(*nodes[8])
551
        builder.add_node(*nodes[9])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
552
        self.assertEqual(3, len(builder._backing_indices))
553
        self.assertEqual(2, builder._backing_indices[0].key_count())
554
        self.assertEqual(None, builder._backing_indices[1])
555
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
556
        builder.add_node(*nodes[10])
557
        builder.add_node(*nodes[11])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
558
        self.assertEqual(3, len(builder._backing_indices))
559
        self.assertEqual(None, builder._backing_indices[0])
560
        self.assertEqual(4, builder._backing_indices[1].key_count())
561
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
562
        builder.add_node(*nodes[12])
563
        # Test that memory and disk are both used for query methods; and that
564
        # None is skipped over happily.
565
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
566
            list(builder.iter_all_entries()))
567
        # Two nodes - one memory one disk
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
568
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
569
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
570
        self.assertEqual(13, builder.key_count())
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
571
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
572
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
573
        builder.add_node(*nodes[13])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
574
        self.assertEqual(3, len(builder._backing_indices))
575
        self.assertEqual(2, builder._backing_indices[0].key_count())
576
        self.assertEqual(4, builder._backing_indices[1].key_count())
577
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
578
        builder.add_node(*nodes[14])
579
        builder.add_node(*nodes[15])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
580
        self.assertEqual(4, len(builder._backing_indices))
581
        self.assertEqual(None, builder._backing_indices[0])
582
        self.assertEqual(None, builder._backing_indices[1])
583
        self.assertEqual(None, builder._backing_indices[2])
584
        self.assertEqual(16, builder._backing_indices[3].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
585
        # Now finish, and check we got a correctly ordered tree
586
        transport = self.get_transport('')
587
        size = transport.put_file('index', builder.finish())
588
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
589
        nodes = list(index.iter_all_entries())
590
        self.assertEqual(sorted(nodes), nodes)
591
        self.assertEqual(16, len(nodes))
592
593
    def test_spill_index_duplicate_key_caught_on_finish(self):
594
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
595
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
596
        builder.add_node(*nodes[0])
597
        builder.add_node(*nodes[1])
598
        builder.add_node(*nodes[0])
599
        self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
600
601
602
class TestBTreeIndex(BTreeTestCase):
603
604
    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
605
        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
606
            key_elements=key_elements)
607
        for key, value, references in nodes:
608
            builder.add_node(key, value, references)
609
        stream = builder.finish()
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
610
        trans = transport.get_transport('trace+' + self.get_url())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
611
        size = trans.put_file('index', stream)
612
        return btree_index.BTreeGraphIndex(trans, 'index', size)
613
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
614
    def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
615
                               offset=0):
616
        builder = btree_index.BTreeBuilder(key_elements=key_elements,
617
                                           reference_lists=ref_lists)
618
        builder.add_nodes(nodes)
619
        transport = self.get_transport('')
5074.4.7 by John Arbash Meinel
Workaround a strange bug about the file closing before it is read.
620
        # NamedTemporaryFile dies on builder.finish().read(). weird.
621
        temp_file = builder.finish()
622
        content = temp_file.read()
623
        del temp_file
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
624
        size = len(content)
625
        transport.put_bytes('index', (' '*offset)+content)
626
        return btree_index.BTreeGraphIndex(transport, 'index', size=size,
627
                                           offset=offset)
628
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
629
    def test_clear_cache(self):
630
        nodes = self.make_nodes(160, 2, 2)
631
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
632
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
633
        self.assertEqual([1, 4], index._row_lengths)
634
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
635
        internal_node_pre_clear = index._internal_node_cache.keys()
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
636
        self.assertTrue(len(index._leaf_node_cache) > 0)
637
        index.clear_cache()
638
        # We don't touch _root_node or _internal_node_cache, both should be
639
        # small, and can save a round trip or two
640
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
641
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
642
        #       it will be small, and if we ever do touch this index again, it
643
        #       will save round-trips.  This assertion isn't very strong,
644
        #       becuase without a 3-level index, we don't have any internal
645
        #       nodes cached.
646
        self.assertEqual(internal_node_pre_clear,
647
                         index._internal_node_cache.keys())
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
648
        self.assertEqual(0, len(index._leaf_node_cache))
649
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
650
    def test_trivial_constructor(self):
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
651
        t = transport.get_transport('trace+' + self.get_url(''))
652
        index = btree_index.BTreeGraphIndex(t, 'index', None)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
653
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
654
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
655
656
    def test_with_size_constructor(self):
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
657
        t = transport.get_transport('trace+' + self.get_url(''))
658
        index = btree_index.BTreeGraphIndex(t, 'index', 1)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
659
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
660
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
661
662
    def test_empty_key_count_no_size(self):
663
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
664
        t = transport.get_transport('trace+' + self.get_url(''))
665
        t.put_file('index', builder.finish())
666
        index = btree_index.BTreeGraphIndex(t, 'index', None)
667
        del t._activity[:]
668
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
669
        self.assertEqual(0, index.key_count())
670
        # The entire index should have been requested (as we generally have the
671
        # size available, and doing many small readvs is inappropriate).
672
        # We can't tell how much was actually read here, but - check the code.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
673
        self.assertEqual([('get', 'index')], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
674
675
    def test_empty_key_count(self):
676
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
677
        t = transport.get_transport('trace+' + self.get_url(''))
678
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
679
        self.assertEqual(72, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
680
        index = btree_index.BTreeGraphIndex(t, 'index', size)
681
        del t._activity[:]
682
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
683
        self.assertEqual(0, index.key_count())
684
        # The entire index should have been read, as 4K > size
685
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
686
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
687
688
    def test_non_empty_key_count_2_2(self):
689
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
690
        nodes = self.make_nodes(35, 2, 2)
691
        for node in nodes:
692
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
693
        t = transport.get_transport('trace+' + self.get_url(''))
694
        size = t.put_file('index', builder.finish())
695
        index = btree_index.BTreeGraphIndex(t, 'index', size)
696
        del t._activity[:]
697
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
698
        self.assertEqual(70, index.key_count())
699
        # The entire index should have been read, as it is one page long.
700
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
701
            t._activity)
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
702
        self.assertEqual(1173, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
703
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
704
    def test_with_offset_no_size(self):
705
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
706
                                            offset=1234,
707
                                            nodes=self.make_nodes(200, 1, 1))
708
        index._size = None # throw away the size info
709
        self.assertEqual(200, index.key_count())
710
711
    def test_with_small_offset(self):
712
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
713
                                            offset=1234,
714
                                            nodes=self.make_nodes(200, 1, 1))
715
        self.assertEqual(200, index.key_count())
716
717
    def test_with_large_offset(self):
718
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
719
                                            offset=123456,
720
                                            nodes=self.make_nodes(200, 1, 1))
721
        self.assertEqual(200, index.key_count())
722
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
723
    def test__read_nodes_no_size_one_page_reads_once(self):
724
        self.make_index(nodes=[(('key',), 'value', ())])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
725
        trans = transport.get_transport('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
726
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
727
        del trans._activity[:]
728
        nodes = dict(index._read_nodes([0]))
729
        self.assertEqual([0], nodes.keys())
730
        node = nodes[0]
731
        self.assertEqual([('key',)], node.keys.keys())
732
        self.assertEqual([('get', 'index')], trans._activity)
733
734
    def test__read_nodes_no_size_multiple_pages(self):
735
        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
736
        index.key_count()
737
        num_pages = index._row_offsets[-1]
738
        # Reopen with a traced transport and no size
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
739
        trans = transport.get_transport('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
740
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
741
        del trans._activity[:]
742
        nodes = dict(index._read_nodes([0]))
743
        self.assertEqual(range(num_pages), nodes.keys())
744
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
745
    def test_2_levels_key_count_2_2(self):
746
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
747
        nodes = self.make_nodes(160, 2, 2)
748
        for node in nodes:
749
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
750
        t = transport.get_transport('trace+' + self.get_url(''))
751
        size = t.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
752
        self.assertEqual(17692, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
753
        index = btree_index.BTreeGraphIndex(t, 'index', size)
754
        del t._activity[:]
755
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
756
        self.assertEqual(320, index.key_count())
757
        # The entire index should not have been read.
758
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
759
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
760
761
    def test_validate_one_page(self):
762
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
763
        nodes = self.make_nodes(45, 2, 2)
764
        for node in nodes:
765
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
766
        t = transport.get_transport('trace+' + self.get_url(''))
767
        size = t.put_file('index', builder.finish())
768
        index = btree_index.BTreeGraphIndex(t, 'index', size)
769
        del t._activity[:]
770
        self.assertEqual([], t._activity)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
771
        index.validate()
772
        # The entire index should have been read linearly.
773
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
774
                         t._activity)
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
775
        self.assertEqual(1488, size)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
776
777
    def test_validate_two_pages(self):
778
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
779
        nodes = self.make_nodes(80, 2, 2)
780
        for node in nodes:
781
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
782
        t = transport.get_transport('trace+' + self.get_url(''))
783
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
784
        # Root page, 2 leaf pages
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
785
        self.assertEqual(9339, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
786
        index = btree_index.BTreeGraphIndex(t, 'index', size)
787
        del t._activity[:]
788
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
789
        index.validate()
790
        # The entire index should have been read linearly.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
791
        self.assertEqual(
792
            [('readv', 'index', [(0, 4096)], False, None),
793
             ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
794
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
795
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
796
        # node and make validate find them.
797
798
    def test_eq_ne(self):
799
        # two indices are equal when constructed with the same parameters:
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
800
        t1 = transport.get_transport('trace+' + self.get_url(''))
801
        t2 = transport.get_transport(self.get_url(''))
802
        self.assertTrue(
803
            btree_index.BTreeGraphIndex(t1, 'index', None) ==
804
            btree_index.BTreeGraphIndex(t1, 'index', None))
805
        self.assertTrue(
806
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
807
            btree_index.BTreeGraphIndex(t1, 'index', 20))
808
        self.assertFalse(
809
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
810
            btree_index.BTreeGraphIndex(t2, 'index', 20))
811
        self.assertFalse(
812
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
813
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
814
        self.assertFalse(
815
            btree_index.BTreeGraphIndex(t1, 'index', 10) ==
816
            btree_index.BTreeGraphIndex(t1, 'index', 20))
817
        self.assertFalse(
818
            btree_index.BTreeGraphIndex(t1, 'index', None) !=
819
            btree_index.BTreeGraphIndex(t1, 'index', None))
820
        self.assertFalse(
821
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
822
            btree_index.BTreeGraphIndex(t1, 'index', 20))
823
        self.assertTrue(
824
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
825
            btree_index.BTreeGraphIndex(t2, 'index', 20))
826
        self.assertTrue(
827
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
828
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
829
        self.assertTrue(
830
            btree_index.BTreeGraphIndex(t1, 'index', 10) !=
831
            btree_index.BTreeGraphIndex(t1, 'index', 20))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
832
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
833
    def test_iter_all_only_root_no_size(self):
834
        self.make_index(nodes=[(('key',), 'value', ())])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
835
        t = transport.get_transport('trace+' + self.get_url(''))
836
        index = btree_index.BTreeGraphIndex(t, 'index', None)
837
        del t._activity[:]
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
838
        self.assertEqual([(('key',), 'value')],
839
                         [x[1:] for x in index.iter_all_entries()])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
840
        self.assertEqual([('get', 'index')], t._activity)
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
841
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
842
    def test_iter_all_entries_reads(self):
843
        # iterating all entries reads the header, then does a linear
844
        # read.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
845
        self.shrink_page_size()
3641.5.11 by John Arbash Meinel
Some small test cleanup
846
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
847
        # 20k nodes is enough to create a two internal nodes on the second
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
848
        # level, with a 2K page size
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
849
        nodes = self.make_nodes(10000, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
850
        for node in nodes:
851
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
852
        t = transport.get_transport('trace+' + self.get_url(''))
853
        size = t.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
854
        self.assertEqual(1303220, size, 'number of expected bytes in the'
855
                                        ' output changed')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
856
        page_size = btree_index._PAGE_SIZE
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
857
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
858
        index = btree_index.BTreeGraphIndex(t, 'index', size)
859
        del t._activity[:]
860
        self.assertEqual([], t._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
861
        found_nodes = self.time(list, index.iter_all_entries())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
862
        bare_nodes = []
863
        for node in found_nodes:
864
            self.assertTrue(node[0] is index)
865
            bare_nodes.append(node[1:])
866
        self.assertEqual(3, len(index._row_lengths),
867
            "Not enough rows: %r" % index._row_lengths)
868
        # Should be as long as the nodes we supplied
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
869
        self.assertEqual(20000, len(found_nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
870
        # Should have the same content
871
        self.assertEqual(set(nodes), set(bare_nodes))
872
        # Should have done linear scan IO up the index, ignoring
873
        # the internal nodes:
874
        # The entire index should have been read
875
        total_pages = sum(index._row_lengths)
876
        self.assertEqual(total_pages, index._row_offsets[-1])
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
877
        self.assertEqual(1303220, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
878
        # The start of the leaves
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
879
        first_byte = index._row_offsets[-2] * page_size
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
880
        readv_request = []
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
881
        for offset in range(first_byte, size, page_size):
882
            readv_request.append((offset, page_size))
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
883
        # The last page is truncated
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
884
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
885
        expected = [('readv', 'index', [(0, page_size)], False, None),
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
886
             ('readv',  'index', readv_request, False, None)]
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
887
        if expected != t._activity:
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
888
            self.assertEqualDiff(pprint.pformat(expected),
889
                                 pprint.pformat(transport._activity))
890
891
    def _test_iter_entries_references_resolved(self):
892
        index = self.make_index(1, nodes=[
893
            (('name', ), 'data', ([('ref', ), ('ref', )], )),
894
            (('ref', ), 'refdata', ([], ))])
895
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
896
            (index, ('ref', ), 'refdata', ((), ))]),
897
            set(index.iter_entries([('name',), ('ref',)])))
898
899
    def test_iter_entries_references_2_refs_resolved(self):
900
        # iterating some entries reads just the pages needed. For now, to
901
        # get it working and start measuring, only 4K pages are read.
902
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
903
        # 80 nodes is enough to create a two-level index.
904
        nodes = self.make_nodes(160, 2, 2)
905
        for node in nodes:
906
            builder.add_node(*node)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
907
        t = transport.get_transport('trace+' + self.get_url(''))
908
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
909
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
910
        index = btree_index.BTreeGraphIndex(t, 'index', size)
911
        del t._activity[:]
912
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
913
        # search for one key
914
        found_nodes = list(index.iter_entries([nodes[30][0]]))
915
        bare_nodes = []
916
        for node in found_nodes:
917
            self.assertTrue(node[0] is index)
918
            bare_nodes.append(node[1:])
919
        # Should be as long as the nodes we supplied
920
        self.assertEqual(1, len(found_nodes))
921
        # Should have the same content
922
        self.assertEqual(nodes[30], bare_nodes[0])
923
        # Should have read the root node, then one leaf page:
924
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
925
             ('readv',  'index', [(8192, 4096), ], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
926
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
927
928
    def test_iter_key_prefix_1_element_key_None(self):
929
        index = self.make_index()
930
        self.assertRaises(errors.BadIndexKey, list,
931
            index.iter_entries_prefix([(None, )]))
932
933
    def test_iter_key_prefix_wrong_length(self):
934
        index = self.make_index()
935
        self.assertRaises(errors.BadIndexKey, list,
936
            index.iter_entries_prefix([('foo', None)]))
937
        index = self.make_index(key_elements=2)
938
        self.assertRaises(errors.BadIndexKey, list,
939
            index.iter_entries_prefix([('foo', )]))
940
        self.assertRaises(errors.BadIndexKey, list,
941
            index.iter_entries_prefix([('foo', None, None)]))
942
943
    def test_iter_key_prefix_1_key_element_no_refs(self):
944
        index = self.make_index( nodes=[
945
            (('name', ), 'data', ()),
946
            (('ref', ), 'refdata', ())])
947
        self.assertEqual(set([(index, ('name', ), 'data'),
948
            (index, ('ref', ), 'refdata')]),
949
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
950
951
    def test_iter_key_prefix_1_key_element_refs(self):
952
        index = self.make_index(1, nodes=[
953
            (('name', ), 'data', ([('ref', )], )),
954
            (('ref', ), 'refdata', ([], ))])
955
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
956
            (index, ('ref', ), 'refdata', ((), ))]),
957
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
958
959
    def test_iter_key_prefix_2_key_element_no_refs(self):
960
        index = self.make_index(key_elements=2, nodes=[
961
            (('name', 'fin1'), 'data', ()),
962
            (('name', 'fin2'), 'beta', ()),
963
            (('ref', 'erence'), 'refdata', ())])
964
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
965
            (index, ('ref', 'erence'), 'refdata')]),
966
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
967
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
968
            (index, ('name', 'fin2'), 'beta')]),
969
            set(index.iter_entries_prefix([('name', None)])))
970
971
    def test_iter_key_prefix_2_key_element_refs(self):
972
        index = self.make_index(1, key_elements=2, nodes=[
973
            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
974
            (('name', 'fin2'), 'beta', ([], )),
975
            (('ref', 'erence'), 'refdata', ([], ))])
976
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
977
            (index, ('ref', 'erence'), 'refdata', ((), ))]),
978
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
979
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
980
            (index, ('name', 'fin2'), 'beta', ((), ))]),
981
            set(index.iter_entries_prefix([('name', None)])))
982
4011.5.5 by Andrew Bennetts
Fix typo in comment.
983
    # XXX: external_references tests are duplicated in test_index.  We
4011.5.3 by Andrew Bennetts
Implement and test external_references on GraphIndex and BTreeGraphIndex.
984
    # probably should have per_graph_index tests...
985
    def test_external_references_no_refs(self):
986
        index = self.make_index(ref_lists=0, nodes=[])
987
        self.assertRaises(ValueError, index.external_references, 0)
988
989
    def test_external_references_no_results(self):
990
        index = self.make_index(ref_lists=1, nodes=[
991
            (('key',), 'value', ([],))])
992
        self.assertEqual(set(), index.external_references(0))
993
994
    def test_external_references_missing_ref(self):
995
        missing_key = ('missing',)
996
        index = self.make_index(ref_lists=1, nodes=[
997
            (('key',), 'value', ([missing_key],))])
998
        self.assertEqual(set([missing_key]), index.external_references(0))
999
1000
    def test_external_references_multiple_ref_lists(self):
1001
        missing_key = ('missing',)
1002
        index = self.make_index(ref_lists=2, nodes=[
1003
            (('key',), 'value', ([], [missing_key]))])
1004
        self.assertEqual(set([]), index.external_references(0))
1005
        self.assertEqual(set([missing_key]), index.external_references(1))
1006
1007
    def test_external_references_two_records(self):
1008
        index = self.make_index(ref_lists=1, nodes=[
1009
            (('key-1',), 'value', ([('key-2',)],)),
1010
            (('key-2',), 'value', ([],)),
1011
            ])
1012
        self.assertEqual(set([]), index.external_references(0))
1013
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1014
    def test__find_ancestors_one_page(self):
4593.4.5 by John Arbash Meinel
Start adding some tests.
1015
        key1 = ('key-1',)
1016
        key2 = ('key-2',)
1017
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1018
            (key1, 'value', ([key2],)),
1019
            (key2, 'value', ([],)),
1020
            ])
1021
        parent_map = {}
1022
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1023
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1024
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1025
        self.assertEqual(set(), missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1026
        self.assertEqual(set(), search_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1027
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1028
    def test__find_ancestors_one_page_w_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1029
        key1 = ('key-1',)
1030
        key2 = ('key-2',)
1031
        key3 = ('key-3',)
1032
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1033
            (key1, 'value', ([key2],)),
1034
            (key2, 'value', ([],)),
1035
            ])
1036
        parent_map = {}
1037
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1038
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1039
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1040
        self.assertEqual({key2: ()}, parent_map)
1041
        # we know that key3 is missing because we read the page that it would
1042
        # otherwise be on
1043
        self.assertEqual(set([key3]), missing_keys)
1044
        self.assertEqual(set(), search_keys)
1045
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1046
    def test__find_ancestors_one_parent_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1047
        key1 = ('key-1',)
1048
        key2 = ('key-2',)
1049
        key3 = ('key-3',)
1050
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1051
            (key1, 'value', ([key2],)),
1052
            (key2, 'value', ([key3],)),
1053
            ])
1054
        parent_map = {}
1055
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1056
        search_keys = index._find_ancestors([key1], 0, parent_map,
1057
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1058
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1059
        self.assertEqual(set(), missing_keys)
1060
        # all we know is that key3 wasn't present on the page we were reading
1061
        # but if you look, the last key is key2 which comes before key3, so we
1062
        # don't know whether key3 would land on this page or not.
1063
        self.assertEqual(set([key3]), search_keys)
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1064
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1065
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1066
        # passing it back in, we are sure it is 'missing'
1067
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1068
        self.assertEqual(set([key3]), missing_keys)
1069
        self.assertEqual(set([]), search_keys)
1070
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1071
    def test__find_ancestors_dont_search_known(self):
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1072
        key1 = ('key-1',)
1073
        key2 = ('key-2',)
1074
        key3 = ('key-3',)
1075
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1076
            (key1, 'value', ([key2],)),
1077
            (key2, 'value', ([key3],)),
1078
            (key3, 'value', ([],)),
1079
            ])
1080
        # We already know about key2, so we won't try to search for key3
1081
        parent_map = {key2: (key3,)}
1082
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1083
        search_keys = index._find_ancestors([key1], 0, parent_map,
1084
                                            missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1085
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1086
        self.assertEqual(set(), missing_keys)
1087
        self.assertEqual(set(), search_keys)
1088
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1089
    def test__find_ancestors_multiple_pages(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1090
        # We need to use enough keys that we actually cause a split
1091
        start_time = 1249671539
1092
        email = "joebob@example.com"
1093
        nodes = []
1094
        ref_lists = ((),)
1095
        rev_keys = []
1096
        for i in xrange(400):
1097
            rev_id = '%s-%s-%s' % (email,
1098
                                   osutils.compact_date(start_time + i),
1099
                                   osutils.rand_chars(16))
1100
            rev_key = (rev_id,)
1101
            nodes.append((rev_key, 'value', ref_lists))
1102
            # We have a ref 'list' of length 1, with a list of parents, with 1
1103
            # parent which is a key
1104
            ref_lists = ((rev_key,),)
1105
            rev_keys.append(rev_key)
1106
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1107
        self.assertEqual(400, index.key_count())
1108
        self.assertEqual(3, len(index._row_offsets))
1109
        nodes = dict(index._read_nodes([1, 2]))
1110
        l1 = nodes[1]
1111
        l2 = nodes[2]
1112
        min_l2_key = l2.min_key
1113
        max_l1_key = l1.max_key
1114
        self.assertTrue(max_l1_key < min_l2_key)
1115
        parents_min_l2_key = l2.keys[min_l2_key][1][0]
1116
        self.assertEqual((l1.max_key,), parents_min_l2_key)
1117
        # Now, whatever key we select that would fall on the second page,
1118
        # should give us all the parents until the page break
1119
        key_idx = rev_keys.index(min_l2_key)
1120
        next_key = rev_keys[key_idx+1]
1121
        # So now when we get the parent map, we should get the key we are
1122
        # looking for, min_l2_key, and then a reference to go look for the
1123
        # parent of that key
1124
        parent_map = {}
1125
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1126
        search_keys = index._find_ancestors([next_key], 0, parent_map,
1127
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1128
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1129
        self.assertEqual(set(), missing_keys)
1130
        self.assertEqual(set([max_l1_key]), search_keys)
1131
        parent_map = {}
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1132
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1133
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1134
        self.assertEqual(sorted(l1.keys), sorted(parent_map))
1135
        self.assertEqual(set(), missing_keys)
1136
        self.assertEqual(set(), search_keys)
1137
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1138
    def test__find_ancestors_empty_index(self):
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1139
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1140
        parent_map = {}
1141
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1142
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1143
                                            missing_keys)
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1144
        self.assertEqual(set(), search_keys)
1145
        self.assertEqual({}, parent_map)
1146
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
1147
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1148
    def test_supports_unlimited_cache(self):
1149
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1150
        # We need enough nodes to cause a page split (so we have both an
1151
        # internal node and a couple leaf nodes. 500 seems to be enough.)
4634.71.2 by John Arbash Meinel
If we are going to sometimes use a dict, we have to conform to just the dict interface.
1152
        nodes = self.make_nodes(500, 1, 0)
1153
        for node in nodes:
1154
            builder.add_node(*node)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1155
        stream = builder.finish()
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
1156
        trans = transport.get_transport(self.get_url())
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1157
        size = trans.put_file('index', stream)
1158
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1159
        self.assertEqual(500, index.key_count())
1160
        # We have an internal node
1161
        self.assertEqual(2, len(index._row_lengths))
1162
        # We have at least 2 leaf nodes
1163
        self.assertTrue(index._row_lengths[-1] >= 2)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1164
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1165
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1166
                         index._leaf_node_cache._max_cache)
1167
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1168
        self.assertEqual(100, index._internal_node_cache._max_cache)
1169
        # No change if unlimited_cache=False is passed
1170
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1171
                                            unlimited_cache=False)
1172
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1173
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1174
                         index._leaf_node_cache._max_cache)
1175
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1176
        self.assertEqual(100, index._internal_node_cache._max_cache)
1177
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1178
                                            unlimited_cache=True)
1179
        self.assertIsInstance(index._leaf_node_cache, dict)
1180
        self.assertIs(type(index._internal_node_cache), dict)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1181
        # Exercise the lookup code
1182
        entries = set(index.iter_entries([n[0] for n in nodes]))
1183
        self.assertEqual(500, len(entries))
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1184
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1185
1186
class TestBTreeNodes(BTreeTestCase):
1187
1188
    def setUp(self):
1189
        BTreeTestCase.setUp(self)
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
1190
        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1191
1192
    def test_LeafNode_1_0(self):
1193
        node_bytes = ("type=leaf\n"
1194
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
1195
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
1196
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
1197
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
1198
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
1199
        node = btree_index._LeafNode(node_bytes, 1, 0)
1200
        # We do direct access, or don't care about order, to leaf nodes most of
1201
        # the time, so a dict is useful:
1202
        self.assertEqual({
1203
            ("0000000000000000000000000000000000000000",): ("value:0", ()),
1204
            ("1111111111111111111111111111111111111111",): ("value:1", ()),
1205
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
1206
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
1207
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
1208
            }, node.keys)
1209
1210
    def test_LeafNode_2_2(self):
1211
        node_bytes = ("type=leaf\n"
1212
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1213
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1214
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1215
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1216
            ""
1217
            )
1218
        node = btree_index._LeafNode(node_bytes, 2, 2)
1219
        # We do direct access, or don't care about order, to leaf nodes most of
1220
        # the time, so a dict is useful:
1221
        self.assertEqual({
1222
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1223
            ('00', '11'): ('value:1',
1224
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1225
            ('11', '33'): ('value:3',
1226
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1227
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1228
            }, node.keys)
1229
1230
    def test_InternalNode_1(self):
1231
        node_bytes = ("type=internal\n"
1232
            "offset=1\n"
1233
            "0000000000000000000000000000000000000000\n"
1234
            "1111111111111111111111111111111111111111\n"
1235
            "2222222222222222222222222222222222222222\n"
1236
            "3333333333333333333333333333333333333333\n"
1237
            "4444444444444444444444444444444444444444\n"
1238
            )
1239
        node = btree_index._InternalNode(node_bytes)
1240
        # We want to bisect to find the right children from this node, so a
1241
        # vector is most useful.
1242
        self.assertEqual([
1243
            ("0000000000000000000000000000000000000000",),
1244
            ("1111111111111111111111111111111111111111",),
1245
            ("2222222222222222222222222222222222222222",),
1246
            ("3333333333333333333333333333333333333333",),
1247
            ("4444444444444444444444444444444444444444",),
1248
            ], node.keys)
1249
        self.assertEqual(1, node.offset)
1250
1251
    def test_LeafNode_2_2(self):
1252
        node_bytes = ("type=leaf\n"
1253
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1254
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1255
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1256
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1257
            ""
1258
            )
1259
        node = btree_index._LeafNode(node_bytes, 2, 2)
1260
        # We do direct access, or don't care about order, to leaf nodes most of
1261
        # the time, so a dict is useful:
1262
        self.assertEqual({
1263
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1264
            ('00', '11'): ('value:1',
1265
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1266
            ('11', '33'): ('value:3',
1267
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1268
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1269
            }, node.keys)
1270
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1271
    def assertFlattened(self, expected, key, value, refs):
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1272
        flat_key, flat_line = self.parse_btree._flatten_node(
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1273
            (None, key, value, refs), bool(refs))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1274
        self.assertEqual('\x00'.join(key), flat_key)
1275
        self.assertEqual(expected, flat_line)
1276
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1277
    def test__flatten_node(self):
1278
        self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1279
        self.assertFlattened('key\0tuple\0\0value str\n',
1280
                             ('key', 'tuple'), 'value str', [])
1281
        self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1282
                             ('key', 'tuple', 'triple'), 'value str', [])
1283
        self.assertFlattened('k\0t\0s\0ref\0value str\n',
1284
                             ('k', 't', 's'), 'value str', [[('ref',)]])
1285
        self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1286
                             ('key', 'tuple'), 'value str', [[('ref', 'key')]])
1287
        self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1288
            ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1289
        self.assertFlattened(
1290
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1291
            ('00', '11'), 'value:1',
1292
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1293
        self.assertFlattened(
1294
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1295
            ('11', '33'), 'value:3',
1296
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1297
        self.assertFlattened(
1298
            "11\x0044\x00\t11\x00ref00\x00value:4\n",
1299
            ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1300
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1301
1302
class TestCompiledBtree(tests.TestCase):
1303
1304
    def test_exists(self):
1305
        # This is just to let the user know if they don't have the feature
1306
        # available
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
1307
        self.requireFeature(compiled_btreeparser_feature)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1308
1309
1310
class TestMultiBisectRight(tests.TestCase):
1311
1312
    def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1313
        self.assertEqual(offsets,
1314
                         btree_index.BTreeGraphIndex._multi_bisect_right(
1315
                            search_keys, fixed_keys))
1316
1317
    def test_after(self):
1318
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1319
        self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1320
                                    ['e', 'f', 'g'], ['a', 'b', 'c'])
1321
1322
    def test_before(self):
1323
        self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1324
        self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1325
                                    ['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1326
1327
    def test_exact(self):
1328
        self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1329
        self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1330
        self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1331
                                    ['a', 'c'], ['a', 'b', 'c'])
1332
1333
    def test_inbetween(self):
1334
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1335
        self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1336
                                    ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1337
1338
    def test_mixed(self):
1339
        self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1340
                                     (4, ['g', 'h'])],
1341
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1342
                                    ['c', 'd', 'f', 'g'])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1343
1344
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1345
class TestExpandOffsets(tests.TestCase):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1346
1347
    def make_index(self, size, recommended_pages=None):
1348
        """Make an index with a generic size.
1349
1350
        This doesn't actually create anything on disk, it just primes a
1351
        BTreeGraphIndex with the recommended information.
1352
        """
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
1353
        index = btree_index.BTreeGraphIndex(
1354
            transport.get_transport('memory:///'), 'test-index', size=size)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1355
        if recommended_pages is not None:
1356
            index._recommended_pages = recommended_pages
1357
        return index
1358
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1359
    def set_cached_offsets(self, index, cached_offsets):
1360
        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1361
        def _get_offsets_to_cached_pages():
1362
            cached = set(cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1363
            return cached
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1364
        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1365
1366
    def prepare_index(self, index, node_ref_lists, key_length, key_count,
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1367
                      row_lengths, cached_offsets):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1368
        """Setup the BTreeGraphIndex with some pre-canned information."""
1369
        index.node_ref_lists = node_ref_lists
1370
        index._key_length = key_length
1371
        index._key_count = key_count
1372
        index._row_lengths = row_lengths
1373
        index._compute_row_offsets()
1374
        index._root_node = btree_index._InternalNode('internal\noffset=0\n')
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1375
        self.set_cached_offsets(index, cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1376
1377
    def make_100_node_index(self):
1378
        index = self.make_index(4096*100, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1379
        # Consider we've already made a single request at the middle
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1380
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1381
                           key_count=1000, row_lengths=[1, 99],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1382
                           cached_offsets=[0, 50])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1383
        return index
1384
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1385
    def make_1000_node_index(self):
1386
        index = self.make_index(4096*1000, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1387
        # Pretend we've already made a single request in the middle
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1388
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1389
                           key_count=90000, row_lengths=[1, 9, 990],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1390
                           cached_offsets=[0, 5, 500])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1391
        return index
1392
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1393
    def assertNumPages(self, expected_pages, index, size):
1394
        index._size = size
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1395
        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1396
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1397
    def assertExpandOffsets(self, expected, index, offsets):
1398
        self.assertEqual(expected, index._expand_offsets(offsets),
1399
                         'We did not get the expected value after expanding'
1400
                         ' %s' % (offsets,))
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1401
1402
    def test_default_recommended_pages(self):
1403
        index = self.make_index(None)
1404
        # local transport recommends 4096 byte reads, which is 1 page
1405
        self.assertEqual(1, index._recommended_pages)
1406
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1407
    def test__compute_total_pages_in_index(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1408
        index = self.make_index(None)
1409
        self.assertNumPages(1, index, 1024)
1410
        self.assertNumPages(1, index, 4095)
1411
        self.assertNumPages(1, index, 4096)
1412
        self.assertNumPages(2, index, 4097)
1413
        self.assertNumPages(2, index, 8192)
1414
        self.assertNumPages(76, index, 4096*75 + 10)
1415
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1416
    def test__find_layer_start_and_stop(self):
1417
        index = self.make_1000_node_index()
1418
        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1419
        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1420
        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1421
        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1422
        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1423
        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1424
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1425
    def test_unknown_size(self):
1426
        # We should not expand if we don't know the file size
1427
        index = self.make_index(None, 10)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1428
        self.assertExpandOffsets([0], index, [0])
1429
        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1430
1431
    def test_more_than_recommended(self):
1432
        index = self.make_index(4096*100, 2)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1433
        self.assertExpandOffsets([1, 10], index, [1, 10])
1434
        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1435
1436
    def test_read_all_from_root(self):
1437
        index = self.make_index(4096*10, 20)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1438
        self.assertExpandOffsets(range(10), index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1439
1440
    def test_read_all_when_cached(self):
1441
        # We've read enough that we can grab all the rest in a single request
1442
        index = self.make_index(4096*10, 5)
1443
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1444
                           key_count=1000, row_lengths=[1, 9],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1445
                           cached_offsets=[0, 1, 2, 5, 6])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1446
        # It should fill the remaining nodes, regardless of the one requested
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1447
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1448
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1449
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1450
1451
    def test_no_root_node(self):
1452
        index = self.make_index(4096*10, 5)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1453
        self.assertExpandOffsets([0], index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1454
1455
    def test_include_neighbors(self):
1456
        index = self.make_100_node_index()
1457
        # We expand in both directions, until we have at least 'recommended'
1458
        # pages
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1459
        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1460
        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1461
        # If we hit an 'edge' we continue in the other direction
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1462
        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1463
        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1464
1465
        # Requesting many nodes will expand all locations equally
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1466
        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1467
        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1468
                               [2, 10, 81])
1469
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1470
    def test_stop_at_cached(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1471
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1472
        self.set_cached_offsets(index, [0, 10, 19])
1473
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1474
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1475
        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1476
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1477
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1478
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1479
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1480
    def test_cannot_fully_expand(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1481
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1482
        self.set_cached_offsets(index, [0, 10, 12])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1483
        # We don't go into an endless loop if we are bound by cached nodes
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1484
        self.assertExpandOffsets([11], index, [11])
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1485
1486
    def test_overlap(self):
1487
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1488
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1489
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1490
1491
    def test_stay_within_layer(self):
1492
        index = self.make_1000_node_index()
1493
        # When expanding a request, we won't read nodes from the next layer
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1494
        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1495
        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1496
        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1497
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1498
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1499
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1500
        self.set_cached_offsets(index, [0, 4, 12])
1501
        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1502
        self.assertExpandOffsets([10, 11], index, [11])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1503
1504
    def test_small_requests_unexpanded(self):
1505
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1506
        self.set_cached_offsets(index, [0])
1507
        self.assertExpandOffsets([1], index, [1])
1508
        self.assertExpandOffsets([50], index, [50])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1509
        # If we request more than one node, then we'll expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1510
        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1511
1512
        # The first pass does not expand
1513
        index = self.make_1000_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1514
        self.set_cached_offsets(index, [0])
1515
        self.assertExpandOffsets([1], index, [1])
1516
        self.set_cached_offsets(index, [0, 1])
1517
        self.assertExpandOffsets([100], index, [100])
1518
        self.set_cached_offsets(index, [0, 1, 100])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1519
        # But after the first depth, we will expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1520
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1521
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1522
        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1523
        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1524
                                 [105])