~bzr-pqm/bzr/bzr.dev

5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
2
#
3
# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
16
#
17
18
"""Tests for btree indices."""
19
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
20
import pprint
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
21
import zlib
22
23
from bzrlib import (
24
    btree_index,
25
    errors,
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
26
    fifo_cache,
27
    lru_cache,
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
28
    osutils,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
29
    tests,
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
30
    transport,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
31
    )
32
from bzrlib.tests import (
33
    TestCaseWithTransport,
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
34
    scenarios,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
35
    )
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
36
from bzrlib.tests import (
37
    features,
38
    )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
39
40
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
41
load_tests = scenarios.load_tests_apply_scenarios
42
43
44
def btreeparser_scenarios():
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
45
    import bzrlib._btree_serializer_py as py_module
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
46
    scenarios = [('python', {'parse_btree': py_module})]
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
47
    if compiled_btreeparser_feature.available():
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
48
        scenarios.append(('C', 
49
            {'parse_btree': compiled_btreeparser_feature.module}))
50
    return scenarios
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
51
52
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
53
compiled_btreeparser_feature = features.ModuleAvailableFeature(
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
54
    'bzrlib._btree_serializer_pyx')
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
55
56
57
class BTreeTestCase(TestCaseWithTransport):
58
    # test names here are suffixed by the key length and reference list count
59
    # that they test.
60
61
    def setUp(self):
6552.1.4 by Vincent Ladeuil
Remaining tests matching setup(self) that can be rewritten with super().
62
        super(BTreeTestCase, self).setUp()
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
63
        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
64
65
    def make_nodes(self, count, key_elements, reference_lists):
66
        """Generate count*key_elements sample nodes."""
67
        keys = []
68
        for prefix_pos in range(key_elements):
69
            if key_elements - 1:
70
                prefix = (str(prefix_pos) * 40,)
71
            else:
72
                prefix = ()
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
73
            for pos in xrange(count):
74
                # TODO: This creates odd keys. When count == 100,000, it
75
                #       creates a 240 byte key
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
76
                key = prefix + (str(pos) * 40,)
77
                value = "value:%s" % pos
78
                if reference_lists:
79
                    # generate some references
80
                    refs = []
81
                    for list_pos in range(reference_lists):
82
                        # as many keys in each list as its index + the key depth
83
                        # mod 2 - this generates both 0 length lists and
84
                        # ones slightly longer than the number of lists.
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
85
                        # It also ensures we have non homogeneous lists.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
86
                        refs.append([])
87
                        for ref_pos in range(list_pos + pos % 2):
88
                            if pos % 2:
89
                                # refer to a nearby key
90
                                refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
91
                            else:
92
                                # serial of this ref in the ref list
93
                                refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
94
                        refs[-1] = tuple(refs[-1])
95
                    refs = tuple(refs)
96
                else:
97
                    refs = ()
98
                keys.append((key, value, refs))
99
        return keys
100
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
101
    def shrink_page_size(self):
102
        """Shrink the default page size so that less fits in a page."""
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
103
        self.overrideAttr(btree_index, '_PAGE_SIZE')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
104
        btree_index._PAGE_SIZE = 2048
105
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
106
    def assertEqualsApproxCompressed(self, expected, actual, slop=6):
107
        """Check a count of compressed bytes is approximately as expected
108
109
        Relying on compressed length being stable even with fixed inputs is
110
        slightly bogus, but zlib is stable enough that this mostly works.
111
        """
112
        if not expected - slop < actual < expected + slop:
113
            self.fail("Expected around %d bytes compressed but got %d" %
114
                (expected, actual))
115
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
116
117
class TestBTreeBuilder(BTreeTestCase):
118
4744.2.7 by John Arbash Meinel
Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder.
119
    def test_clear_cache(self):
120
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
121
        # This is a no-op, but we need the api to be consistent with other
122
        # BTreeGraphIndex apis.
123
        builder.clear_cache()
124
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
125
    def test_empty_1_0(self):
126
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
127
        # NamedTemporaryFile dies on builder.finish().read(). weird.
128
        temp_file = builder.finish()
129
        content = temp_file.read()
130
        del temp_file
131
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
132
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
133
            "row_lengths=\n",
134
            content)
135
136
    def test_empty_2_1(self):
137
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
138
        # NamedTemporaryFile dies on builder.finish().read(). weird.
139
        temp_file = builder.finish()
140
        content = temp_file.read()
141
        del temp_file
142
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
143
            "B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
144
            "row_lengths=\n",
145
            content)
146
147
    def test_root_leaf_1_0(self):
148
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
149
        nodes = self.make_nodes(5, 1, 0)
150
        for node in nodes:
151
            builder.add_node(*node)
152
        # NamedTemporaryFile dies on builder.finish().read(). weird.
153
        temp_file = builder.finish()
154
        content = temp_file.read()
155
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
156
        self.assertEqual(131, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
157
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
158
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
159
            "row_lengths=1\n",
160
            content[:73])
161
        node_content = content[73:]
162
        node_bytes = zlib.decompress(node_content)
163
        expected_node = ("type=leaf\n"
164
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
165
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
166
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
167
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
168
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
169
        self.assertEqual(expected_node, node_bytes)
170
171
    def test_root_leaf_2_2(self):
172
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
173
        nodes = self.make_nodes(5, 2, 2)
174
        for node in nodes:
175
            builder.add_node(*node)
176
        # NamedTemporaryFile dies on builder.finish().read(). weird.
177
        temp_file = builder.finish()
178
        content = temp_file.read()
179
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
180
        self.assertEqual(238, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
181
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
182
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
183
            "row_lengths=1\n",
184
            content[:74])
185
        node_content = content[74:]
186
        node_bytes = zlib.decompress(node_content)
187
        expected_node = (
188
            "type=leaf\n"
189
            "0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
190
            "0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
191
            "0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
192
            "0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
193
            "0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
194
            "1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
195
            "1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
196
            "1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
197
            "1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
198
            "1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
199
            ""
200
            )
201
        self.assertEqual(expected_node, node_bytes)
202
203
    def test_2_leaves_1_0(self):
204
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
205
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
206
        for node in nodes:
207
            builder.add_node(*node)
208
        # NamedTemporaryFile dies on builder.finish().read(). weird.
209
        temp_file = builder.finish()
210
        content = temp_file.read()
211
        del temp_file
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
212
        self.assertEqualsApproxCompressed(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
213
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
214
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
215
            "row_lengths=1,2\n",
216
            content[:77])
217
        root = content[77:4096]
218
        leaf1 = content[4096:8192]
219
        leaf2 = content[8192:]
220
        root_bytes = zlib.decompress(root)
221
        expected_root = (
222
            "type=internal\n"
223
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
224
            ) + ("307" * 40) + "\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
225
        self.assertEqual(expected_root, root_bytes)
226
        # We already know serialisation works for leaves, check key selection:
227
        leaf1_bytes = zlib.decompress(leaf1)
228
        sorted_node_keys = sorted(node[0] for node in nodes)
229
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
230
        self.assertEqual(231, len(node))
231
        self.assertEqual(sorted_node_keys[:231], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
232
        leaf2_bytes = zlib.decompress(leaf2)
233
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
234
        self.assertEqual(400 - 231, len(node))
235
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
236
237
    def test_last_page_rounded_1_layer(self):
238
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
239
        nodes = self.make_nodes(10, 1, 0)
240
        for node in nodes:
241
            builder.add_node(*node)
242
        # NamedTemporaryFile dies on builder.finish().read(). weird.
243
        temp_file = builder.finish()
244
        content = temp_file.read()
245
        del temp_file
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
246
        self.assertEqualsApproxCompressed(155, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
247
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
248
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
249
            "row_lengths=1\n",
250
            content[:74])
251
        # Check thelast page is well formed
252
        leaf2 = content[74:]
253
        leaf2_bytes = zlib.decompress(leaf2)
254
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
255
        self.assertEqual(10, len(node))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
256
        sorted_node_keys = sorted(node[0] for node in nodes)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
257
        self.assertEqual(sorted_node_keys, node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
258
259
    def test_last_page_not_rounded_2_layer(self):
260
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
261
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
262
        for node in nodes:
263
            builder.add_node(*node)
264
        # NamedTemporaryFile dies on builder.finish().read(). weird.
265
        temp_file = builder.finish()
266
        content = temp_file.read()
267
        del temp_file
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
268
        self.assertEqualsApproxCompressed(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
269
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
270
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
271
            "row_lengths=1,2\n",
272
            content[:77])
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
273
        # Check the last page is well formed
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
274
        leaf2 = content[8192:]
275
        leaf2_bytes = zlib.decompress(leaf2)
276
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
277
        self.assertEqual(400 - 231, len(node))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
278
        sorted_node_keys = sorted(node[0] for node in nodes)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
279
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
280
281
    def test_three_level_tree_details(self):
282
        # The left most pointer in the second internal node in a row should
283
        # pointer to the second node that the internal node is for, _not_
284
        # the first, otherwise the first node overlaps with the last node of
285
        # the prior internal node on that row.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
286
        self.shrink_page_size()
287
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
288
        # 40K nodes is enough to create a two internal nodes on the second
289
        # level, with a 2K page size
290
        nodes = self.make_nodes(20000, 2, 2)
3641.3.5 by John Arbash Meinel
For iter_all and three_level tests adjust spill-at.
291
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
292
        for node in nodes:
293
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
294
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
295
        size = t.put_file('index', self.time(builder.finish))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
296
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
297
        index = btree_index.BTreeGraphIndex(t, 'index', size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
298
        # Seed the metadata, we're using internal calls now.
299
        index.key_count()
300
        self.assertEqual(3, len(index._row_lengths),
301
            "Not enough rows: %r" % index._row_lengths)
302
        self.assertEqual(4, len(index._row_offsets))
303
        self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
304
        internal_nodes = index._get_internal_nodes([0, 1, 2])
305
        root_node = internal_nodes[0]
306
        internal_node1 = internal_nodes[1]
307
        internal_node2 = internal_nodes[2]
308
        # The left most node node2 points at should be one after the right most
309
        # node pointed at by node1.
310
        self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
311
        # The left most key of the second node pointed at by internal_node2
312
        # should be its first key. We can check this by looking for its first key
313
        # in the second node it points at
314
        pos = index._row_offsets[2] + internal_node2.offset + 1
315
        leaf = index._get_leaf_nodes([pos])[pos]
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
316
        self.assertTrue(internal_node2.keys[0] in leaf)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
317
318
    def test_2_leaves_2_2(self):
319
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
320
        nodes = self.make_nodes(100, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
321
        for node in nodes:
322
            builder.add_node(*node)
323
        # NamedTemporaryFile dies on builder.finish().read(). weird.
324
        temp_file = builder.finish()
325
        content = temp_file.read()
326
        del temp_file
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
327
        self.assertEqualsApproxCompressed(12643, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
328
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
329
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
330
            "row_lengths=1,3\n",
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
331
            content[:77])
332
        root = content[77:4096]
333
        leaf1 = content[4096:8192]
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
334
        leaf2 = content[8192:12288]
335
        leaf3 = content[12288:]
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
336
        root_bytes = zlib.decompress(root)
337
        expected_root = (
338
            "type=internal\n"
339
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
340
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
341
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
342
            )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
343
        self.assertEqual(expected_root, root_bytes)
344
        # We assume the other leaf nodes have been written correctly - layering
345
        # FTW.
346
347
    def test_spill_index_stress_1_1(self):
348
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
349
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
350
        builder.add_node(*nodes[0])
351
        # Test the parts of the index that take up memory are doing so
352
        # predictably.
353
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
354
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
355
        builder.add_node(*nodes[1])
356
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
357
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
358
        self.assertEqual(1, len(builder._backing_indices))
359
        self.assertEqual(2, builder._backing_indices[0].key_count())
360
        # now back to memory
361
        builder.add_node(*nodes[2])
362
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
363
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
364
        # And spills to a second backing index combing all
365
        builder.add_node(*nodes[3])
366
        self.assertEqual(0, len(builder._nodes))
367
        self.assertIs(None, builder._nodes_by_key)
368
        self.assertEqual(2, len(builder._backing_indices))
369
        self.assertEqual(None, builder._backing_indices[0])
370
        self.assertEqual(4, builder._backing_indices[1].key_count())
371
        # The next spills to the 2-len slot
372
        builder.add_node(*nodes[4])
373
        builder.add_node(*nodes[5])
374
        self.assertEqual(0, len(builder._nodes))
375
        self.assertIs(None, builder._nodes_by_key)
376
        self.assertEqual(2, len(builder._backing_indices))
377
        self.assertEqual(2, builder._backing_indices[0].key_count())
378
        self.assertEqual(4, builder._backing_indices[1].key_count())
379
        # Next spill combines
380
        builder.add_node(*nodes[6])
381
        builder.add_node(*nodes[7])
382
        self.assertEqual(3, len(builder._backing_indices))
383
        self.assertEqual(None, builder._backing_indices[0])
384
        self.assertEqual(None, builder._backing_indices[1])
385
        self.assertEqual(8, builder._backing_indices[2].key_count())
386
        # And so forth - counting up in binary.
387
        builder.add_node(*nodes[8])
388
        builder.add_node(*nodes[9])
389
        self.assertEqual(3, len(builder._backing_indices))
390
        self.assertEqual(2, builder._backing_indices[0].key_count())
391
        self.assertEqual(None, builder._backing_indices[1])
392
        self.assertEqual(8, builder._backing_indices[2].key_count())
393
        builder.add_node(*nodes[10])
394
        builder.add_node(*nodes[11])
395
        self.assertEqual(3, len(builder._backing_indices))
396
        self.assertEqual(None, builder._backing_indices[0])
397
        self.assertEqual(4, builder._backing_indices[1].key_count())
398
        self.assertEqual(8, builder._backing_indices[2].key_count())
399
        builder.add_node(*nodes[12])
400
        # Test that memory and disk are both used for query methods; and that
401
        # None is skipped over happily.
402
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
403
            list(builder.iter_all_entries()))
404
        # Two nodes - one memory one disk
405
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
406
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
407
        self.assertEqual(13, builder.key_count())
408
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
409
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
410
        builder.add_node(*nodes[13])
411
        self.assertEqual(3, len(builder._backing_indices))
412
        self.assertEqual(2, builder._backing_indices[0].key_count())
413
        self.assertEqual(4, builder._backing_indices[1].key_count())
414
        self.assertEqual(8, builder._backing_indices[2].key_count())
415
        builder.add_node(*nodes[14])
416
        builder.add_node(*nodes[15])
417
        self.assertEqual(4, len(builder._backing_indices))
418
        self.assertEqual(None, builder._backing_indices[0])
419
        self.assertEqual(None, builder._backing_indices[1])
420
        self.assertEqual(None, builder._backing_indices[2])
421
        self.assertEqual(16, builder._backing_indices[3].key_count())
422
        # Now finish, and check we got a correctly ordered tree
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
423
        t = self.get_transport('')
424
        size = t.put_file('index', builder.finish())
425
        index = btree_index.BTreeGraphIndex(t, 'index', size)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
426
        nodes = list(index.iter_all_entries())
427
        self.assertEqual(sorted(nodes), nodes)
428
        self.assertEqual(16, len(nodes))
429
430
    def test_spill_index_stress_1_1_no_combine(self):
431
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
432
        builder.set_optimize(for_size=False, combine_backing_indices=False)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
433
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
434
        builder.add_node(*nodes[0])
435
        # Test the parts of the index that take up memory are doing so
436
        # predictably.
437
        self.assertEqual(1, len(builder._nodes))
438
        self.assertIs(None, builder._nodes_by_key)
439
        builder.add_node(*nodes[1])
440
        self.assertEqual(0, len(builder._nodes))
441
        self.assertIs(None, builder._nodes_by_key)
442
        self.assertEqual(1, len(builder._backing_indices))
443
        self.assertEqual(2, builder._backing_indices[0].key_count())
444
        # now back to memory
445
        builder.add_node(*nodes[2])
446
        self.assertEqual(1, len(builder._nodes))
447
        self.assertIs(None, builder._nodes_by_key)
448
        # And spills to a second backing index but doesn't combine
449
        builder.add_node(*nodes[3])
450
        self.assertEqual(0, len(builder._nodes))
451
        self.assertIs(None, builder._nodes_by_key)
452
        self.assertEqual(2, len(builder._backing_indices))
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
453
        for backing_index in builder._backing_indices:
454
            self.assertEqual(2, backing_index.key_count())
455
        # The next spills to the 3rd slot
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
456
        builder.add_node(*nodes[4])
457
        builder.add_node(*nodes[5])
458
        self.assertEqual(0, len(builder._nodes))
459
        self.assertIs(None, builder._nodes_by_key)
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
460
        self.assertEqual(3, len(builder._backing_indices))
461
        for backing_index in builder._backing_indices:
462
            self.assertEqual(2, backing_index.key_count())
463
        # Now spill a few more, and check that we don't combine
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
464
        builder.add_node(*nodes[6])
465
        builder.add_node(*nodes[7])
466
        builder.add_node(*nodes[8])
467
        builder.add_node(*nodes[9])
468
        builder.add_node(*nodes[10])
469
        builder.add_node(*nodes[11])
470
        builder.add_node(*nodes[12])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
471
        self.assertEqual(6, len(builder._backing_indices))
472
        for backing_index in builder._backing_indices:
473
            self.assertEqual(2, backing_index.key_count())
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
474
        # Test that memory and disk are both used for query methods; and that
475
        # None is skipped over happily.
476
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
477
            list(builder.iter_all_entries()))
478
        # Two nodes - one memory one disk
479
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
480
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
481
        self.assertEqual(13, builder.key_count())
482
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
483
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
484
        builder.add_node(*nodes[13])
485
        builder.add_node(*nodes[14])
486
        builder.add_node(*nodes[15])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
487
        self.assertEqual(8, len(builder._backing_indices))
488
        for backing_index in builder._backing_indices:
489
            self.assertEqual(2, backing_index.key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
490
        # Now finish, and check we got a correctly ordered tree
491
        transport = self.get_transport('')
492
        size = transport.put_file('index', builder.finish())
493
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
494
        nodes = list(index.iter_all_entries())
495
        self.assertEqual(sorted(nodes), nodes)
496
        self.assertEqual(16, len(nodes))
497
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
498
    def test_set_optimize(self):
499
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
500
        builder.set_optimize(for_size=True)
501
        self.assertTrue(builder._optimize_for_size)
502
        builder.set_optimize(for_size=False)
503
        self.assertFalse(builder._optimize_for_size)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
504
        # test that we can set combine_backing_indices without effecting
505
        # _optimize_for_size
506
        obj = object()
507
        builder._optimize_for_size = obj
508
        builder.set_optimize(combine_backing_indices=False)
509
        self.assertFalse(builder._combine_backing_indices)
510
        self.assertIs(obj, builder._optimize_for_size)
511
        builder.set_optimize(combine_backing_indices=True)
512
        self.assertTrue(builder._combine_backing_indices)
513
        self.assertIs(obj, builder._optimize_for_size)
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
514
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
515
    def test_spill_index_stress_2_2(self):
516
        # test that references and longer keys don't confuse things.
517
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
518
            spill_at=2)
519
        nodes = self.make_nodes(16, 2, 2)
520
        builder.add_node(*nodes[0])
521
        # Test the parts of the index that take up memory are doing so
522
        # predictably.
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
523
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
524
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
525
        builder.add_node(*nodes[1])
526
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
527
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
528
        self.assertEqual(1, len(builder._backing_indices))
529
        self.assertEqual(2, builder._backing_indices[0].key_count())
530
        # now back to memory
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
531
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
532
        builder.add_node(*nodes[2])
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
533
        self.assertEqual(1, len(builder._nodes))
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
534
        self.assertIsNot(None, builder._nodes_by_key)
535
        self.assertNotEqual({}, builder._nodes_by_key)
536
        # We should have a new entry
537
        self.assertNotEqual(old, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
538
        # And spills to a second backing index combing all
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
539
        builder.add_node(*nodes[3])
540
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
541
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
542
        self.assertEqual(2, len(builder._backing_indices))
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
543
        self.assertEqual(None, builder._backing_indices[0])
544
        self.assertEqual(4, builder._backing_indices[1].key_count())
545
        # The next spills to the 2-len slot
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
546
        builder.add_node(*nodes[4])
547
        builder.add_node(*nodes[5])
548
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
549
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
550
        self.assertEqual(2, len(builder._backing_indices))
551
        self.assertEqual(2, builder._backing_indices[0].key_count())
552
        self.assertEqual(4, builder._backing_indices[1].key_count())
553
        # Next spill combines
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
554
        builder.add_node(*nodes[6])
555
        builder.add_node(*nodes[7])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
556
        self.assertEqual(3, len(builder._backing_indices))
557
        self.assertEqual(None, builder._backing_indices[0])
558
        self.assertEqual(None, builder._backing_indices[1])
559
        self.assertEqual(8, builder._backing_indices[2].key_count())
560
        # And so forth - counting up in binary.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
561
        builder.add_node(*nodes[8])
562
        builder.add_node(*nodes[9])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
563
        self.assertEqual(3, len(builder._backing_indices))
564
        self.assertEqual(2, builder._backing_indices[0].key_count())
565
        self.assertEqual(None, builder._backing_indices[1])
566
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
567
        builder.add_node(*nodes[10])
568
        builder.add_node(*nodes[11])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
569
        self.assertEqual(3, len(builder._backing_indices))
570
        self.assertEqual(None, builder._backing_indices[0])
571
        self.assertEqual(4, builder._backing_indices[1].key_count())
572
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
573
        builder.add_node(*nodes[12])
574
        # Test that memory and disk are both used for query methods; and that
575
        # None is skipped over happily.
576
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
577
            list(builder.iter_all_entries()))
578
        # Two nodes - one memory one disk
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
579
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
580
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
581
        self.assertEqual(13, builder.key_count())
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
582
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
583
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
584
        builder.add_node(*nodes[13])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
585
        self.assertEqual(3, len(builder._backing_indices))
586
        self.assertEqual(2, builder._backing_indices[0].key_count())
587
        self.assertEqual(4, builder._backing_indices[1].key_count())
588
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
589
        builder.add_node(*nodes[14])
590
        builder.add_node(*nodes[15])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
591
        self.assertEqual(4, len(builder._backing_indices))
592
        self.assertEqual(None, builder._backing_indices[0])
593
        self.assertEqual(None, builder._backing_indices[1])
594
        self.assertEqual(None, builder._backing_indices[2])
595
        self.assertEqual(16, builder._backing_indices[3].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
596
        # Now finish, and check we got a correctly ordered tree
597
        transport = self.get_transport('')
598
        size = transport.put_file('index', builder.finish())
599
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
600
        nodes = list(index.iter_all_entries())
601
        self.assertEqual(sorted(nodes), nodes)
602
        self.assertEqual(16, len(nodes))
603
604
    def test_spill_index_duplicate_key_caught_on_finish(self):
605
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
606
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
607
        builder.add_node(*nodes[0])
608
        builder.add_node(*nodes[1])
609
        builder.add_node(*nodes[0])
610
        self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
611
612
613
class TestBTreeIndex(BTreeTestCase):
614
615
    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
616
        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
617
            key_elements=key_elements)
618
        for key, value, references in nodes:
619
            builder.add_node(key, value, references)
620
        stream = builder.finish()
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
621
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
622
        size = trans.put_file('index', stream)
623
        return btree_index.BTreeGraphIndex(trans, 'index', size)
624
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
625
    def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
626
                               offset=0):
627
        builder = btree_index.BTreeBuilder(key_elements=key_elements,
628
                                           reference_lists=ref_lists)
629
        builder.add_nodes(nodes)
630
        transport = self.get_transport('')
5074.4.7 by John Arbash Meinel
Workaround a strange bug about the file closing before it is read.
631
        # NamedTemporaryFile dies on builder.finish().read(). weird.
632
        temp_file = builder.finish()
633
        content = temp_file.read()
634
        del temp_file
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
635
        size = len(content)
636
        transport.put_bytes('index', (' '*offset)+content)
637
        return btree_index.BTreeGraphIndex(transport, 'index', size=size,
638
                                           offset=offset)
639
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
640
    def test_clear_cache(self):
641
        nodes = self.make_nodes(160, 2, 2)
642
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
643
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
644
        self.assertEqual([1, 4], index._row_lengths)
645
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
646
        internal_node_pre_clear = index._internal_node_cache.keys()
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
647
        self.assertTrue(len(index._leaf_node_cache) > 0)
648
        index.clear_cache()
649
        # We don't touch _root_node or _internal_node_cache, both should be
650
        # small, and can save a round trip or two
651
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
652
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
653
        #       it will be small, and if we ever do touch this index again, it
654
        #       will save round-trips.  This assertion isn't very strong,
655
        #       becuase without a 3-level index, we don't have any internal
656
        #       nodes cached.
657
        self.assertEqual(internal_node_pre_clear,
658
                         index._internal_node_cache.keys())
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
659
        self.assertEqual(0, len(index._leaf_node_cache))
660
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
661
    def test_trivial_constructor(self):
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
662
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
663
        index = btree_index.BTreeGraphIndex(t, 'index', None)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
664
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
665
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
666
667
    def test_with_size_constructor(self):
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
668
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
669
        index = btree_index.BTreeGraphIndex(t, 'index', 1)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
670
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
671
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
672
673
    def test_empty_key_count_no_size(self):
674
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
675
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
676
        t.put_file('index', builder.finish())
677
        index = btree_index.BTreeGraphIndex(t, 'index', None)
678
        del t._activity[:]
679
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
680
        self.assertEqual(0, index.key_count())
681
        # The entire index should have been requested (as we generally have the
682
        # size available, and doing many small readvs is inappropriate).
683
        # We can't tell how much was actually read here, but - check the code.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
684
        self.assertEqual([('get', 'index')], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
685
686
    def test_empty_key_count(self):
687
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
688
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
689
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
690
        self.assertEqual(72, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
691
        index = btree_index.BTreeGraphIndex(t, 'index', size)
692
        del t._activity[:]
693
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
694
        self.assertEqual(0, index.key_count())
695
        # The entire index should have been read, as 4K > size
696
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
697
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
698
699
    def test_non_empty_key_count_2_2(self):
700
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
701
        nodes = self.make_nodes(35, 2, 2)
702
        for node in nodes:
703
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
704
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
705
        size = t.put_file('index', builder.finish())
706
        index = btree_index.BTreeGraphIndex(t, 'index', size)
707
        del t._activity[:]
708
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
709
        self.assertEqual(70, index.key_count())
710
        # The entire index should have been read, as it is one page long.
711
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
712
            t._activity)
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
713
        self.assertEqualsApproxCompressed(1173, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
714
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
715
    def test_with_offset_no_size(self):
716
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
717
                                            offset=1234,
718
                                            nodes=self.make_nodes(200, 1, 1))
719
        index._size = None # throw away the size info
720
        self.assertEqual(200, index.key_count())
721
722
    def test_with_small_offset(self):
723
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
724
                                            offset=1234,
725
                                            nodes=self.make_nodes(200, 1, 1))
726
        self.assertEqual(200, index.key_count())
727
728
    def test_with_large_offset(self):
729
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
730
                                            offset=123456,
731
                                            nodes=self.make_nodes(200, 1, 1))
732
        self.assertEqual(200, index.key_count())
733
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
734
    def test__read_nodes_no_size_one_page_reads_once(self):
735
        self.make_index(nodes=[(('key',), 'value', ())])
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
736
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
737
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
738
        del trans._activity[:]
739
        nodes = dict(index._read_nodes([0]))
740
        self.assertEqual([0], nodes.keys())
741
        node = nodes[0]
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
742
        self.assertEqual([('key',)], node.all_keys())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
743
        self.assertEqual([('get', 'index')], trans._activity)
744
745
    def test__read_nodes_no_size_multiple_pages(self):
746
        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
747
        index.key_count()
748
        num_pages = index._row_offsets[-1]
749
        # Reopen with a traced transport and no size
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
750
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
751
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
752
        del trans._activity[:]
753
        nodes = dict(index._read_nodes([0]))
754
        self.assertEqual(range(num_pages), nodes.keys())
755
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
756
    def test_2_levels_key_count_2_2(self):
757
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
758
        nodes = self.make_nodes(160, 2, 2)
759
        for node in nodes:
760
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
761
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
762
        size = t.put_file('index', builder.finish())
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
763
        self.assertEqualsApproxCompressed(17692, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
764
        index = btree_index.BTreeGraphIndex(t, 'index', size)
765
        del t._activity[:]
766
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
767
        self.assertEqual(320, index.key_count())
768
        # The entire index should not have been read.
769
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
770
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
771
772
    def test_validate_one_page(self):
773
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
774
        nodes = self.make_nodes(45, 2, 2)
775
        for node in nodes:
776
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
777
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
778
        size = t.put_file('index', builder.finish())
779
        index = btree_index.BTreeGraphIndex(t, 'index', size)
780
        del t._activity[:]
781
        self.assertEqual([], t._activity)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
782
        index.validate()
783
        # The entire index should have been read linearly.
784
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
785
                         t._activity)
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
786
        self.assertEqualsApproxCompressed(1488, size)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
787
788
    def test_validate_two_pages(self):
789
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
790
        nodes = self.make_nodes(80, 2, 2)
791
        for node in nodes:
792
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
793
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
794
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
795
        # Root page, 2 leaf pages
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
796
        self.assertEqualsApproxCompressed(9339, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
797
        index = btree_index.BTreeGraphIndex(t, 'index', size)
798
        del t._activity[:]
799
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
800
        index.validate()
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
801
        rem = size - 8192 # Number of remaining bytes after second block
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
802
        # The entire index should have been read linearly.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
803
        self.assertEqual(
804
            [('readv', 'index', [(0, 4096)], False, None),
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
805
             ('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
806
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
807
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
808
        # node and make validate find them.
809
810
    def test_eq_ne(self):
811
        # two indices are equal when constructed with the same parameters:
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
812
        t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
5609.9.4 by Vincent Ladeuil
Use self.get_transport instead of transport.get_transport where possible.
813
        t2 = self.get_transport()
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
814
        self.assertTrue(
815
            btree_index.BTreeGraphIndex(t1, 'index', None) ==
816
            btree_index.BTreeGraphIndex(t1, 'index', None))
817
        self.assertTrue(
818
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
819
            btree_index.BTreeGraphIndex(t1, 'index', 20))
820
        self.assertFalse(
821
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
822
            btree_index.BTreeGraphIndex(t2, 'index', 20))
823
        self.assertFalse(
824
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
825
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
826
        self.assertFalse(
827
            btree_index.BTreeGraphIndex(t1, 'index', 10) ==
828
            btree_index.BTreeGraphIndex(t1, 'index', 20))
829
        self.assertFalse(
830
            btree_index.BTreeGraphIndex(t1, 'index', None) !=
831
            btree_index.BTreeGraphIndex(t1, 'index', None))
832
        self.assertFalse(
833
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
834
            btree_index.BTreeGraphIndex(t1, 'index', 20))
835
        self.assertTrue(
836
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
837
            btree_index.BTreeGraphIndex(t2, 'index', 20))
838
        self.assertTrue(
839
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
840
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
841
        self.assertTrue(
842
            btree_index.BTreeGraphIndex(t1, 'index', 10) !=
843
            btree_index.BTreeGraphIndex(t1, 'index', 20))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
844
6178.2.9 by Shannon Weyrick
A version of the patch, based on suggestions from John Meinel, which detects an empty page differently to avoid false positives.
845
    def test_key_too_big(self):
846
        # the size that matters here is the _compressed_ size of the key, so we can't
847
        # do a simple character repeat.
848
        bigKey = ''.join(map(repr, xrange(btree_index._PAGE_SIZE)))
849
        self.assertRaises(errors.BadIndexKey,
850
                          self.make_index,
851
                          nodes=[((bigKey,), 'value', ())])
852
        
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
853
    def test_iter_all_only_root_no_size(self):
854
        self.make_index(nodes=[(('key',), 'value', ())])
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
855
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
856
        index = btree_index.BTreeGraphIndex(t, 'index', None)
857
        del t._activity[:]
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
858
        self.assertEqual([(('key',), 'value')],
859
                         [x[1:] for x in index.iter_all_entries()])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
860
        self.assertEqual([('get', 'index')], t._activity)
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
861
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
862
    def test_iter_all_entries_reads(self):
863
        # iterating all entries reads the header, then does a linear
864
        # read.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
865
        self.shrink_page_size()
3641.5.11 by John Arbash Meinel
Some small test cleanup
866
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
867
        # 20k nodes is enough to create a two internal nodes on the second
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
868
        # level, with a 2K page size
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
869
        nodes = self.make_nodes(10000, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
870
        for node in nodes:
871
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
872
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
873
        size = t.put_file('index', builder.finish())
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
874
        page_size = btree_index._PAGE_SIZE
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
875
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
876
        index = btree_index.BTreeGraphIndex(t, 'index', size)
877
        del t._activity[:]
878
        self.assertEqual([], t._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
879
        found_nodes = self.time(list, index.iter_all_entries())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
880
        bare_nodes = []
881
        for node in found_nodes:
882
            self.assertTrue(node[0] is index)
883
            bare_nodes.append(node[1:])
884
        self.assertEqual(3, len(index._row_lengths),
885
            "Not enough rows: %r" % index._row_lengths)
886
        # Should be as long as the nodes we supplied
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
887
        self.assertEqual(20000, len(found_nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
888
        # Should have the same content
889
        self.assertEqual(set(nodes), set(bare_nodes))
890
        # Should have done linear scan IO up the index, ignoring
891
        # the internal nodes:
892
        # The entire index should have been read
893
        total_pages = sum(index._row_lengths)
894
        self.assertEqual(total_pages, index._row_offsets[-1])
6015.54.1 by Martin Packman
Add assertEqualsApproxCompressed to bt.test_btree_index as newer zlib gives smaller output
895
        self.assertEqualsApproxCompressed(1303220, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
896
        # The start of the leaves
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
897
        first_byte = index._row_offsets[-2] * page_size
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
898
        readv_request = []
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
899
        for offset in range(first_byte, size, page_size):
900
            readv_request.append((offset, page_size))
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
901
        # The last page is truncated
6015.54.3 by Martin Packman
Fix test_iter_all_entries_reads for real by using observed size
902
        readv_request[-1] = (readv_request[-1][0], size % page_size)
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
903
        expected = [('readv', 'index', [(0, page_size)], False, None),
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
904
             ('readv',  'index', readv_request, False, None)]
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
905
        if expected != t._activity:
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
906
            self.assertEqualDiff(pprint.pformat(expected),
6015.54.3 by Martin Packman
Fix test_iter_all_entries_reads for real by using observed size
907
                                 pprint.pformat(t._activity))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
908
909
    def _test_iter_entries_references_resolved(self):
910
        index = self.make_index(1, nodes=[
911
            (('name', ), 'data', ([('ref', ), ('ref', )], )),
912
            (('ref', ), 'refdata', ([], ))])
913
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
914
            (index, ('ref', ), 'refdata', ((), ))]),
915
            set(index.iter_entries([('name',), ('ref',)])))
916
917
    def test_iter_entries_references_2_refs_resolved(self):
918
        # iterating some entries reads just the pages needed. For now, to
919
        # get it working and start measuring, only 4K pages are read.
920
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
921
        # 80 nodes is enough to create a two-level index.
922
        nodes = self.make_nodes(160, 2, 2)
923
        for node in nodes:
924
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
925
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
926
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
927
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
928
        index = btree_index.BTreeGraphIndex(t, 'index', size)
929
        del t._activity[:]
930
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
931
        # search for one key
932
        found_nodes = list(index.iter_entries([nodes[30][0]]))
933
        bare_nodes = []
934
        for node in found_nodes:
935
            self.assertTrue(node[0] is index)
936
            bare_nodes.append(node[1:])
937
        # Should be as long as the nodes we supplied
938
        self.assertEqual(1, len(found_nodes))
939
        # Should have the same content
940
        self.assertEqual(nodes[30], bare_nodes[0])
941
        # Should have read the root node, then one leaf page:
942
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
943
             ('readv',  'index', [(8192, 4096), ], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
944
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
945
946
    def test_iter_key_prefix_1_element_key_None(self):
947
        index = self.make_index()
948
        self.assertRaises(errors.BadIndexKey, list,
949
            index.iter_entries_prefix([(None, )]))
950
951
    def test_iter_key_prefix_wrong_length(self):
952
        index = self.make_index()
953
        self.assertRaises(errors.BadIndexKey, list,
954
            index.iter_entries_prefix([('foo', None)]))
955
        index = self.make_index(key_elements=2)
956
        self.assertRaises(errors.BadIndexKey, list,
957
            index.iter_entries_prefix([('foo', )]))
958
        self.assertRaises(errors.BadIndexKey, list,
959
            index.iter_entries_prefix([('foo', None, None)]))
960
961
    def test_iter_key_prefix_1_key_element_no_refs(self):
962
        index = self.make_index( nodes=[
963
            (('name', ), 'data', ()),
964
            (('ref', ), 'refdata', ())])
965
        self.assertEqual(set([(index, ('name', ), 'data'),
966
            (index, ('ref', ), 'refdata')]),
967
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
968
969
    def test_iter_key_prefix_1_key_element_refs(self):
970
        index = self.make_index(1, nodes=[
971
            (('name', ), 'data', ([('ref', )], )),
972
            (('ref', ), 'refdata', ([], ))])
973
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
974
            (index, ('ref', ), 'refdata', ((), ))]),
975
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
976
977
    def test_iter_key_prefix_2_key_element_no_refs(self):
978
        index = self.make_index(key_elements=2, nodes=[
979
            (('name', 'fin1'), 'data', ()),
980
            (('name', 'fin2'), 'beta', ()),
981
            (('ref', 'erence'), 'refdata', ())])
982
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
983
            (index, ('ref', 'erence'), 'refdata')]),
984
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
985
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
986
            (index, ('name', 'fin2'), 'beta')]),
987
            set(index.iter_entries_prefix([('name', None)])))
988
989
    def test_iter_key_prefix_2_key_element_refs(self):
990
        index = self.make_index(1, key_elements=2, nodes=[
991
            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
992
            (('name', 'fin2'), 'beta', ([], )),
993
            (('ref', 'erence'), 'refdata', ([], ))])
994
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
995
            (index, ('ref', 'erence'), 'refdata', ((), ))]),
996
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
997
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
998
            (index, ('name', 'fin2'), 'beta', ((), ))]),
999
            set(index.iter_entries_prefix([('name', None)])))
1000
4011.5.5 by Andrew Bennetts
Fix typo in comment.
1001
    # XXX: external_references tests are duplicated in test_index.  We
4011.5.3 by Andrew Bennetts
Implement and test external_references on GraphIndex and BTreeGraphIndex.
1002
    # probably should have per_graph_index tests...
1003
    def test_external_references_no_refs(self):
1004
        index = self.make_index(ref_lists=0, nodes=[])
1005
        self.assertRaises(ValueError, index.external_references, 0)
1006
1007
    def test_external_references_no_results(self):
1008
        index = self.make_index(ref_lists=1, nodes=[
1009
            (('key',), 'value', ([],))])
1010
        self.assertEqual(set(), index.external_references(0))
1011
1012
    def test_external_references_missing_ref(self):
1013
        missing_key = ('missing',)
1014
        index = self.make_index(ref_lists=1, nodes=[
1015
            (('key',), 'value', ([missing_key],))])
1016
        self.assertEqual(set([missing_key]), index.external_references(0))
1017
1018
    def test_external_references_multiple_ref_lists(self):
1019
        missing_key = ('missing',)
1020
        index = self.make_index(ref_lists=2, nodes=[
1021
            (('key',), 'value', ([], [missing_key]))])
1022
        self.assertEqual(set([]), index.external_references(0))
1023
        self.assertEqual(set([missing_key]), index.external_references(1))
1024
1025
    def test_external_references_two_records(self):
1026
        index = self.make_index(ref_lists=1, nodes=[
1027
            (('key-1',), 'value', ([('key-2',)],)),
1028
            (('key-2',), 'value', ([],)),
1029
            ])
1030
        self.assertEqual(set([]), index.external_references(0))
1031
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1032
    def test__find_ancestors_one_page(self):
4593.4.5 by John Arbash Meinel
Start adding some tests.
1033
        key1 = ('key-1',)
1034
        key2 = ('key-2',)
1035
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1036
            (key1, 'value', ([key2],)),
1037
            (key2, 'value', ([],)),
1038
            ])
1039
        parent_map = {}
1040
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1041
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1042
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1043
        self.assertEqual(set(), missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1044
        self.assertEqual(set(), search_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1045
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1046
    def test__find_ancestors_one_page_w_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1047
        key1 = ('key-1',)
1048
        key2 = ('key-2',)
1049
        key3 = ('key-3',)
1050
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1051
            (key1, 'value', ([key2],)),
1052
            (key2, 'value', ([],)),
1053
            ])
1054
        parent_map = {}
1055
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1056
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1057
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1058
        self.assertEqual({key2: ()}, parent_map)
1059
        # we know that key3 is missing because we read the page that it would
1060
        # otherwise be on
1061
        self.assertEqual(set([key3]), missing_keys)
1062
        self.assertEqual(set(), search_keys)
1063
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1064
    def test__find_ancestors_one_parent_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1065
        key1 = ('key-1',)
1066
        key2 = ('key-2',)
1067
        key3 = ('key-3',)
1068
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1069
            (key1, 'value', ([key2],)),
1070
            (key2, 'value', ([key3],)),
1071
            ])
1072
        parent_map = {}
1073
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1074
        search_keys = index._find_ancestors([key1], 0, parent_map,
1075
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1076
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1077
        self.assertEqual(set(), missing_keys)
1078
        # all we know is that key3 wasn't present on the page we were reading
1079
        # but if you look, the last key is key2 which comes before key3, so we
1080
        # don't know whether key3 would land on this page or not.
1081
        self.assertEqual(set([key3]), search_keys)
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1082
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1083
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1084
        # passing it back in, we are sure it is 'missing'
1085
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1086
        self.assertEqual(set([key3]), missing_keys)
1087
        self.assertEqual(set([]), search_keys)
1088
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1089
    def test__find_ancestors_dont_search_known(self):
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1090
        key1 = ('key-1',)
1091
        key2 = ('key-2',)
1092
        key3 = ('key-3',)
1093
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1094
            (key1, 'value', ([key2],)),
1095
            (key2, 'value', ([key3],)),
1096
            (key3, 'value', ([],)),
1097
            ])
1098
        # We already know about key2, so we won't try to search for key3
1099
        parent_map = {key2: (key3,)}
1100
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1101
        search_keys = index._find_ancestors([key1], 0, parent_map,
1102
                                            missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1103
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1104
        self.assertEqual(set(), missing_keys)
1105
        self.assertEqual(set(), search_keys)
1106
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1107
    def test__find_ancestors_multiple_pages(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1108
        # We need to use enough keys that we actually cause a split
1109
        start_time = 1249671539
1110
        email = "joebob@example.com"
1111
        nodes = []
1112
        ref_lists = ((),)
1113
        rev_keys = []
1114
        for i in xrange(400):
1115
            rev_id = '%s-%s-%s' % (email,
1116
                                   osutils.compact_date(start_time + i),
1117
                                   osutils.rand_chars(16))
1118
            rev_key = (rev_id,)
1119
            nodes.append((rev_key, 'value', ref_lists))
1120
            # We have a ref 'list' of length 1, with a list of parents, with 1
1121
            # parent which is a key
1122
            ref_lists = ((rev_key,),)
1123
            rev_keys.append(rev_key)
1124
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1125
        self.assertEqual(400, index.key_count())
1126
        self.assertEqual(3, len(index._row_offsets))
1127
        nodes = dict(index._read_nodes([1, 2]))
1128
        l1 = nodes[1]
1129
        l2 = nodes[2]
1130
        min_l2_key = l2.min_key
1131
        max_l1_key = l1.max_key
1132
        self.assertTrue(max_l1_key < min_l2_key)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1133
        parents_min_l2_key = l2[min_l2_key][1][0]
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1134
        self.assertEqual((l1.max_key,), parents_min_l2_key)
1135
        # Now, whatever key we select that would fall on the second page,
1136
        # should give us all the parents until the page break
1137
        key_idx = rev_keys.index(min_l2_key)
1138
        next_key = rev_keys[key_idx+1]
1139
        # So now when we get the parent map, we should get the key we are
1140
        # looking for, min_l2_key, and then a reference to go look for the
1141
        # parent of that key
1142
        parent_map = {}
1143
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1144
        search_keys = index._find_ancestors([next_key], 0, parent_map,
1145
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1146
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1147
        self.assertEqual(set(), missing_keys)
1148
        self.assertEqual(set([max_l1_key]), search_keys)
1149
        parent_map = {}
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1150
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1151
                                            missing_keys)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1152
        self.assertEqual(l1.all_keys(), sorted(parent_map))
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1153
        self.assertEqual(set(), missing_keys)
1154
        self.assertEqual(set(), search_keys)
1155
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1156
    def test__find_ancestors_empty_index(self):
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1157
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1158
        parent_map = {}
1159
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1160
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1161
                                            missing_keys)
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1162
        self.assertEqual(set(), search_keys)
1163
        self.assertEqual({}, parent_map)
1164
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
1165
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1166
    def test_supports_unlimited_cache(self):
1167
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1168
        # We need enough nodes to cause a page split (so we have both an
1169
        # internal node and a couple leaf nodes. 500 seems to be enough.)
4634.71.2 by John Arbash Meinel
If we are going to sometimes use a dict, we have to conform to just the dict interface.
1170
        nodes = self.make_nodes(500, 1, 0)
1171
        for node in nodes:
1172
            builder.add_node(*node)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1173
        stream = builder.finish()
5609.9.4 by Vincent Ladeuil
Use self.get_transport instead of transport.get_transport where possible.
1174
        trans = self.get_transport()
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1175
        size = trans.put_file('index', stream)
1176
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1177
        self.assertEqual(500, index.key_count())
1178
        # We have an internal node
1179
        self.assertEqual(2, len(index._row_lengths))
1180
        # We have at least 2 leaf nodes
1181
        self.assertTrue(index._row_lengths[-1] >= 2)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1182
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1183
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1184
                         index._leaf_node_cache._max_cache)
1185
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1186
        self.assertEqual(100, index._internal_node_cache._max_cache)
1187
        # No change if unlimited_cache=False is passed
1188
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1189
                                            unlimited_cache=False)
1190
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1191
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1192
                         index._leaf_node_cache._max_cache)
1193
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1194
        self.assertEqual(100, index._internal_node_cache._max_cache)
1195
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1196
                                            unlimited_cache=True)
1197
        self.assertIsInstance(index._leaf_node_cache, dict)
1198
        self.assertIs(type(index._internal_node_cache), dict)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1199
        # Exercise the lookup code
1200
        entries = set(index.iter_entries([n[0] for n in nodes]))
1201
        self.assertEqual(500, len(entries))
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1202
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1203
1204
class TestBTreeNodes(BTreeTestCase):
1205
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
1206
    scenarios = btreeparser_scenarios()
1207
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1208
    def setUp(self):
6552.1.4 by Vincent Ladeuil
Remaining tests matching setup(self) that can be rewritten with super().
1209
        super(TestBTreeNodes, self).setUp()
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
1210
        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1211
1212
    def test_LeafNode_1_0(self):
1213
        node_bytes = ("type=leaf\n"
1214
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
1215
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
1216
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
1217
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
1218
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
1219
        node = btree_index._LeafNode(node_bytes, 1, 0)
1220
        # We do direct access, or don't care about order, to leaf nodes most of
1221
        # the time, so a dict is useful:
1222
        self.assertEqual({
1223
            ("0000000000000000000000000000000000000000",): ("value:0", ()),
1224
            ("1111111111111111111111111111111111111111",): ("value:1", ()),
1225
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
1226
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
1227
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1228
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1229
1230
    def test_LeafNode_2_2(self):
1231
        node_bytes = ("type=leaf\n"
1232
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1233
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1234
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1235
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1236
            ""
1237
            )
1238
        node = btree_index._LeafNode(node_bytes, 2, 2)
1239
        # We do direct access, or don't care about order, to leaf nodes most of
1240
        # the time, so a dict is useful:
1241
        self.assertEqual({
1242
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1243
            ('00', '11'): ('value:1',
1244
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1245
            ('11', '33'): ('value:3',
1246
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1247
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1248
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1249
1250
    def test_InternalNode_1(self):
1251
        node_bytes = ("type=internal\n"
1252
            "offset=1\n"
1253
            "0000000000000000000000000000000000000000\n"
1254
            "1111111111111111111111111111111111111111\n"
1255
            "2222222222222222222222222222222222222222\n"
1256
            "3333333333333333333333333333333333333333\n"
1257
            "4444444444444444444444444444444444444444\n"
1258
            )
1259
        node = btree_index._InternalNode(node_bytes)
1260
        # We want to bisect to find the right children from this node, so a
1261
        # vector is most useful.
1262
        self.assertEqual([
1263
            ("0000000000000000000000000000000000000000",),
1264
            ("1111111111111111111111111111111111111111",),
1265
            ("2222222222222222222222222222222222222222",),
1266
            ("3333333333333333333333333333333333333333",),
1267
            ("4444444444444444444444444444444444444444",),
1268
            ], node.keys)
1269
        self.assertEqual(1, node.offset)
1270
1271
    def test_LeafNode_2_2(self):
1272
        node_bytes = ("type=leaf\n"
1273
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1274
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1275
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1276
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1277
            ""
1278
            )
1279
        node = btree_index._LeafNode(node_bytes, 2, 2)
1280
        # We do direct access, or don't care about order, to leaf nodes most of
1281
        # the time, so a dict is useful:
1282
        self.assertEqual({
1283
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1284
            ('00', '11'): ('value:1',
1285
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1286
            ('11', '33'): ('value:3',
1287
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1288
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1289
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1290
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1291
    def assertFlattened(self, expected, key, value, refs):
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1292
        flat_key, flat_line = self.parse_btree._flatten_node(
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1293
            (None, key, value, refs), bool(refs))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1294
        self.assertEqual('\x00'.join(key), flat_key)
1295
        self.assertEqual(expected, flat_line)
1296
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1297
    def test__flatten_node(self):
1298
        self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1299
        self.assertFlattened('key\0tuple\0\0value str\n',
1300
                             ('key', 'tuple'), 'value str', [])
1301
        self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1302
                             ('key', 'tuple', 'triple'), 'value str', [])
1303
        self.assertFlattened('k\0t\0s\0ref\0value str\n',
1304
                             ('k', 't', 's'), 'value str', [[('ref',)]])
1305
        self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1306
                             ('key', 'tuple'), 'value str', [[('ref', 'key')]])
1307
        self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1308
            ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1309
        self.assertFlattened(
1310
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1311
            ('00', '11'), 'value:1',
1312
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1313
        self.assertFlattened(
1314
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1315
            ('11', '33'), 'value:3',
1316
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1317
        self.assertFlattened(
1318
            "11\x0044\x00\t11\x00ref00\x00value:4\n",
1319
            ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1320
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1321
1322
class TestCompiledBtree(tests.TestCase):
1323
1324
    def test_exists(self):
1325
        # This is just to let the user know if they don't have the feature
1326
        # available
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
1327
        self.requireFeature(compiled_btreeparser_feature)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1328
1329
1330
class TestMultiBisectRight(tests.TestCase):
1331
1332
    def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1333
        self.assertEqual(offsets,
1334
                         btree_index.BTreeGraphIndex._multi_bisect_right(
1335
                            search_keys, fixed_keys))
1336
1337
    def test_after(self):
1338
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1339
        self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1340
                                    ['e', 'f', 'g'], ['a', 'b', 'c'])
1341
1342
    def test_before(self):
1343
        self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1344
        self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1345
                                    ['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1346
1347
    def test_exact(self):
1348
        self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1349
        self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1350
        self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1351
                                    ['a', 'c'], ['a', 'b', 'c'])
1352
1353
    def test_inbetween(self):
1354
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1355
        self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1356
                                    ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1357
1358
    def test_mixed(self):
1359
        self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1360
                                     (4, ['g', 'h'])],
1361
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1362
                                    ['c', 'd', 'f', 'g'])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1363
1364
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1365
class TestExpandOffsets(tests.TestCase):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1366
1367
    def make_index(self, size, recommended_pages=None):
1368
        """Make an index with a generic size.
1369
1370
        This doesn't actually create anything on disk, it just primes a
1371
        BTreeGraphIndex with the recommended information.
1372
        """
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
1373
        index = btree_index.BTreeGraphIndex(
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
1374
            transport.get_transport_from_url('memory:///'),
1375
            'test-index', size=size)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1376
        if recommended_pages is not None:
1377
            index._recommended_pages = recommended_pages
1378
        return index
1379
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1380
    def set_cached_offsets(self, index, cached_offsets):
1381
        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1382
        def _get_offsets_to_cached_pages():
1383
            cached = set(cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1384
            return cached
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1385
        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1386
1387
    def prepare_index(self, index, node_ref_lists, key_length, key_count,
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1388
                      row_lengths, cached_offsets):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1389
        """Setup the BTreeGraphIndex with some pre-canned information."""
1390
        index.node_ref_lists = node_ref_lists
1391
        index._key_length = key_length
1392
        index._key_count = key_count
1393
        index._row_lengths = row_lengths
1394
        index._compute_row_offsets()
1395
        index._root_node = btree_index._InternalNode('internal\noffset=0\n')
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1396
        self.set_cached_offsets(index, cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1397
1398
    def make_100_node_index(self):
1399
        index = self.make_index(4096*100, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1400
        # Consider we've already made a single request at the middle
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1401
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1402
                           key_count=1000, row_lengths=[1, 99],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1403
                           cached_offsets=[0, 50])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1404
        return index
1405
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1406
    def make_1000_node_index(self):
1407
        index = self.make_index(4096*1000, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1408
        # Pretend we've already made a single request in the middle
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1409
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1410
                           key_count=90000, row_lengths=[1, 9, 990],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1411
                           cached_offsets=[0, 5, 500])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1412
        return index
1413
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1414
    def assertNumPages(self, expected_pages, index, size):
1415
        index._size = size
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1416
        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1417
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1418
    def assertExpandOffsets(self, expected, index, offsets):
1419
        self.assertEqual(expected, index._expand_offsets(offsets),
1420
                         'We did not get the expected value after expanding'
1421
                         ' %s' % (offsets,))
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1422
1423
    def test_default_recommended_pages(self):
1424
        index = self.make_index(None)
1425
        # local transport recommends 4096 byte reads, which is 1 page
1426
        self.assertEqual(1, index._recommended_pages)
1427
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1428
    def test__compute_total_pages_in_index(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1429
        index = self.make_index(None)
1430
        self.assertNumPages(1, index, 1024)
1431
        self.assertNumPages(1, index, 4095)
1432
        self.assertNumPages(1, index, 4096)
1433
        self.assertNumPages(2, index, 4097)
1434
        self.assertNumPages(2, index, 8192)
1435
        self.assertNumPages(76, index, 4096*75 + 10)
1436
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1437
    def test__find_layer_start_and_stop(self):
1438
        index = self.make_1000_node_index()
1439
        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1440
        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1441
        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1442
        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1443
        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1444
        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1445
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1446
    def test_unknown_size(self):
1447
        # We should not expand if we don't know the file size
1448
        index = self.make_index(None, 10)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1449
        self.assertExpandOffsets([0], index, [0])
1450
        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1451
1452
    def test_more_than_recommended(self):
1453
        index = self.make_index(4096*100, 2)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1454
        self.assertExpandOffsets([1, 10], index, [1, 10])
1455
        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1456
1457
    def test_read_all_from_root(self):
1458
        index = self.make_index(4096*10, 20)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1459
        self.assertExpandOffsets(range(10), index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1460
1461
    def test_read_all_when_cached(self):
1462
        # We've read enough that we can grab all the rest in a single request
1463
        index = self.make_index(4096*10, 5)
1464
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1465
                           key_count=1000, row_lengths=[1, 9],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1466
                           cached_offsets=[0, 1, 2, 5, 6])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1467
        # It should fill the remaining nodes, regardless of the one requested
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1468
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1469
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1470
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1471
1472
    def test_no_root_node(self):
1473
        index = self.make_index(4096*10, 5)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1474
        self.assertExpandOffsets([0], index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1475
1476
    def test_include_neighbors(self):
1477
        index = self.make_100_node_index()
1478
        # We expand in both directions, until we have at least 'recommended'
1479
        # pages
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1480
        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1481
        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1482
        # If we hit an 'edge' we continue in the other direction
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1483
        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1484
        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1485
1486
        # Requesting many nodes will expand all locations equally
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1487
        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1488
        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1489
                               [2, 10, 81])
1490
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1491
    def test_stop_at_cached(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1492
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1493
        self.set_cached_offsets(index, [0, 10, 19])
1494
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1495
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1496
        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1497
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1498
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1499
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1500
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1501
    def test_cannot_fully_expand(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1502
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1503
        self.set_cached_offsets(index, [0, 10, 12])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1504
        # We don't go into an endless loop if we are bound by cached nodes
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1505
        self.assertExpandOffsets([11], index, [11])
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1506
1507
    def test_overlap(self):
1508
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1509
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1510
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1511
1512
    def test_stay_within_layer(self):
1513
        index = self.make_1000_node_index()
1514
        # When expanding a request, we won't read nodes from the next layer
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1515
        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1516
        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1517
        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1518
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1519
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1520
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1521
        self.set_cached_offsets(index, [0, 4, 12])
1522
        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1523
        self.assertExpandOffsets([10, 11], index, [11])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1524
1525
    def test_small_requests_unexpanded(self):
1526
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1527
        self.set_cached_offsets(index, [0])
1528
        self.assertExpandOffsets([1], index, [1])
1529
        self.assertExpandOffsets([50], index, [50])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1530
        # If we request more than one node, then we'll expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1531
        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1532
1533
        # The first pass does not expand
1534
        index = self.make_1000_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1535
        self.set_cached_offsets(index, [0])
1536
        self.assertExpandOffsets([1], index, [1])
1537
        self.set_cached_offsets(index, [0, 1])
1538
        self.assertExpandOffsets([100], index, [100])
1539
        self.set_cached_offsets(index, [0, 1, 100])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1540
        # But after the first depth, we will expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1541
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1542
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1543
        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1544
        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1545
                                 [105])