~bzr-pqm/bzr/bzr.dev

5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
2
#
3
# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
16
#
17
18
"""Tests for btree indices."""
19
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
20
import pprint
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
21
import zlib
22
23
from bzrlib import (
24
    btree_index,
25
    errors,
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
26
    fifo_cache,
27
    lru_cache,
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
28
    osutils,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
29
    tests,
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
30
    transport,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
31
    )
32
from bzrlib.tests import (
33
    TestCaseWithTransport,
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
34
    scenarios,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
35
    )
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
36
from bzrlib.tests import (
37
    features,
38
    )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
39
40
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
41
load_tests = scenarios.load_tests_apply_scenarios
42
43
44
def btreeparser_scenarios():
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
45
    import bzrlib._btree_serializer_py as py_module
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
46
    scenarios = [('python', {'parse_btree': py_module})]
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
47
    if compiled_btreeparser_feature.available():
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
48
        scenarios.append(('C', 
49
            {'parse_btree': compiled_btreeparser_feature.module}))
50
    return scenarios
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
51
52
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
53
compiled_btreeparser_feature = features.ModuleAvailableFeature(
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
54
    'bzrlib._btree_serializer_pyx')
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
55
56
57
class BTreeTestCase(TestCaseWithTransport):
58
    # test names here are suffixed by the key length and reference list count
59
    # that they test.
60
61
    def setUp(self):
62
        TestCaseWithTransport.setUp(self)
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
63
        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
64
65
    def make_nodes(self, count, key_elements, reference_lists):
66
        """Generate count*key_elements sample nodes."""
67
        keys = []
68
        for prefix_pos in range(key_elements):
69
            if key_elements - 1:
70
                prefix = (str(prefix_pos) * 40,)
71
            else:
72
                prefix = ()
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
73
            for pos in xrange(count):
74
                # TODO: This creates odd keys. When count == 100,000, it
75
                #       creates a 240 byte key
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
76
                key = prefix + (str(pos) * 40,)
77
                value = "value:%s" % pos
78
                if reference_lists:
79
                    # generate some references
80
                    refs = []
81
                    for list_pos in range(reference_lists):
82
                        # as many keys in each list as its index + the key depth
83
                        # mod 2 - this generates both 0 length lists and
84
                        # ones slightly longer than the number of lists.
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
85
                        # It also ensures we have non homogeneous lists.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
86
                        refs.append([])
87
                        for ref_pos in range(list_pos + pos % 2):
88
                            if pos % 2:
89
                                # refer to a nearby key
90
                                refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
91
                            else:
92
                                # serial of this ref in the ref list
93
                                refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
94
                        refs[-1] = tuple(refs[-1])
95
                    refs = tuple(refs)
96
                else:
97
                    refs = ()
98
                keys.append((key, value, refs))
99
        return keys
100
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
101
    def shrink_page_size(self):
102
        """Shrink the default page size so that less fits in a page."""
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
103
        self.overrideAttr(btree_index, '_PAGE_SIZE')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
104
        btree_index._PAGE_SIZE = 2048
105
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
106
107
class TestBTreeBuilder(BTreeTestCase):
108
4744.2.7 by John Arbash Meinel
Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder.
109
    def test_clear_cache(self):
110
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
111
        # This is a no-op, but we need the api to be consistent with other
112
        # BTreeGraphIndex apis.
113
        builder.clear_cache()
114
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
115
    def test_empty_1_0(self):
116
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
117
        # NamedTemporaryFile dies on builder.finish().read(). weird.
118
        temp_file = builder.finish()
119
        content = temp_file.read()
120
        del temp_file
121
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
122
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
123
            "row_lengths=\n",
124
            content)
125
126
    def test_empty_2_1(self):
127
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
128
        # NamedTemporaryFile dies on builder.finish().read(). weird.
129
        temp_file = builder.finish()
130
        content = temp_file.read()
131
        del temp_file
132
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
133
            "B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
134
            "row_lengths=\n",
135
            content)
136
137
    def test_root_leaf_1_0(self):
138
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
139
        nodes = self.make_nodes(5, 1, 0)
140
        for node in nodes:
141
            builder.add_node(*node)
142
        # NamedTemporaryFile dies on builder.finish().read(). weird.
143
        temp_file = builder.finish()
144
        content = temp_file.read()
145
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
146
        self.assertEqual(131, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
147
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
148
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
149
            "row_lengths=1\n",
150
            content[:73])
151
        node_content = content[73:]
152
        node_bytes = zlib.decompress(node_content)
153
        expected_node = ("type=leaf\n"
154
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
155
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
156
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
157
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
158
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
159
        self.assertEqual(expected_node, node_bytes)
160
161
    def test_root_leaf_2_2(self):
162
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
163
        nodes = self.make_nodes(5, 2, 2)
164
        for node in nodes:
165
            builder.add_node(*node)
166
        # NamedTemporaryFile dies on builder.finish().read(). weird.
167
        temp_file = builder.finish()
168
        content = temp_file.read()
169
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
170
        self.assertEqual(238, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
171
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
172
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
173
            "row_lengths=1\n",
174
            content[:74])
175
        node_content = content[74:]
176
        node_bytes = zlib.decompress(node_content)
177
        expected_node = (
178
            "type=leaf\n"
179
            "0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
180
            "0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
181
            "0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
182
            "0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
183
            "0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
184
            "1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
185
            "1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
186
            "1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
187
            "1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
188
            "1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
189
            ""
190
            )
191
        self.assertEqual(expected_node, node_bytes)
192
193
    def test_2_leaves_1_0(self):
194
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
195
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
196
        for node in nodes:
197
            builder.add_node(*node)
198
        # NamedTemporaryFile dies on builder.finish().read(). weird.
199
        temp_file = builder.finish()
200
        content = temp_file.read()
201
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
202
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
203
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
204
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
205
            "row_lengths=1,2\n",
206
            content[:77])
207
        root = content[77:4096]
208
        leaf1 = content[4096:8192]
209
        leaf2 = content[8192:]
210
        root_bytes = zlib.decompress(root)
211
        expected_root = (
212
            "type=internal\n"
213
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
214
            ) + ("307" * 40) + "\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
215
        self.assertEqual(expected_root, root_bytes)
216
        # We already know serialisation works for leaves, check key selection:
217
        leaf1_bytes = zlib.decompress(leaf1)
218
        sorted_node_keys = sorted(node[0] for node in nodes)
219
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
220
        self.assertEqual(231, len(node))
221
        self.assertEqual(sorted_node_keys[:231], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
222
        leaf2_bytes = zlib.decompress(leaf2)
223
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
224
        self.assertEqual(400 - 231, len(node))
225
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
226
227
    def test_last_page_rounded_1_layer(self):
228
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
229
        nodes = self.make_nodes(10, 1, 0)
230
        for node in nodes:
231
            builder.add_node(*node)
232
        # NamedTemporaryFile dies on builder.finish().read(). weird.
233
        temp_file = builder.finish()
234
        content = temp_file.read()
235
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
236
        self.assertEqual(155, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
237
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
238
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
239
            "row_lengths=1\n",
240
            content[:74])
241
        # Check thelast page is well formed
242
        leaf2 = content[74:]
243
        leaf2_bytes = zlib.decompress(leaf2)
244
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
245
        self.assertEqual(10, len(node))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
246
        sorted_node_keys = sorted(node[0] for node in nodes)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
247
        self.assertEqual(sorted_node_keys, node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
248
249
    def test_last_page_not_rounded_2_layer(self):
250
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
251
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
252
        for node in nodes:
253
            builder.add_node(*node)
254
        # NamedTemporaryFile dies on builder.finish().read(). weird.
255
        temp_file = builder.finish()
256
        content = temp_file.read()
257
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
258
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
259
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
260
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
261
            "row_lengths=1,2\n",
262
            content[:77])
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
263
        # Check the last page is well formed
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
264
        leaf2 = content[8192:]
265
        leaf2_bytes = zlib.decompress(leaf2)
266
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
267
        self.assertEqual(400 - 231, len(node))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
268
        sorted_node_keys = sorted(node[0] for node in nodes)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
269
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
270
271
    def test_three_level_tree_details(self):
272
        # The left most pointer in the second internal node in a row should
273
        # pointer to the second node that the internal node is for, _not_
274
        # the first, otherwise the first node overlaps with the last node of
275
        # the prior internal node on that row.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
276
        self.shrink_page_size()
277
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
278
        # 40K nodes is enough to create a two internal nodes on the second
279
        # level, with a 2K page size
280
        nodes = self.make_nodes(20000, 2, 2)
3641.3.5 by John Arbash Meinel
For iter_all and three_level tests adjust spill-at.
281
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
282
        for node in nodes:
283
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
284
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
285
        size = t.put_file('index', self.time(builder.finish))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
286
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
287
        index = btree_index.BTreeGraphIndex(t, 'index', size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
288
        # Seed the metadata, we're using internal calls now.
289
        index.key_count()
290
        self.assertEqual(3, len(index._row_lengths),
291
            "Not enough rows: %r" % index._row_lengths)
292
        self.assertEqual(4, len(index._row_offsets))
293
        self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
294
        internal_nodes = index._get_internal_nodes([0, 1, 2])
295
        root_node = internal_nodes[0]
296
        internal_node1 = internal_nodes[1]
297
        internal_node2 = internal_nodes[2]
298
        # The left most node node2 points at should be one after the right most
299
        # node pointed at by node1.
300
        self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
301
        # The left most key of the second node pointed at by internal_node2
302
        # should be its first key. We can check this by looking for its first key
303
        # in the second node it points at
304
        pos = index._row_offsets[2] + internal_node2.offset + 1
305
        leaf = index._get_leaf_nodes([pos])[pos]
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
306
        self.assertTrue(internal_node2.keys[0] in leaf)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
307
308
    def test_2_leaves_2_2(self):
309
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
310
        nodes = self.make_nodes(100, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
311
        for node in nodes:
312
            builder.add_node(*node)
313
        # NamedTemporaryFile dies on builder.finish().read(). weird.
314
        temp_file = builder.finish()
315
        content = temp_file.read()
316
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
317
        self.assertEqual(12643, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
318
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
319
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
320
            "row_lengths=1,3\n",
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
321
            content[:77])
322
        root = content[77:4096]
323
        leaf1 = content[4096:8192]
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
324
        leaf2 = content[8192:12288]
325
        leaf3 = content[12288:]
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
326
        root_bytes = zlib.decompress(root)
327
        expected_root = (
328
            "type=internal\n"
329
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
330
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
331
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
332
            )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
333
        self.assertEqual(expected_root, root_bytes)
334
        # We assume the other leaf nodes have been written correctly - layering
335
        # FTW.
336
337
    def test_spill_index_stress_1_1(self):
338
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
339
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
340
        builder.add_node(*nodes[0])
341
        # Test the parts of the index that take up memory are doing so
342
        # predictably.
343
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
344
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
345
        builder.add_node(*nodes[1])
346
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
347
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
348
        self.assertEqual(1, len(builder._backing_indices))
349
        self.assertEqual(2, builder._backing_indices[0].key_count())
350
        # now back to memory
351
        builder.add_node(*nodes[2])
352
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
353
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
354
        # And spills to a second backing index combing all
355
        builder.add_node(*nodes[3])
356
        self.assertEqual(0, len(builder._nodes))
357
        self.assertIs(None, builder._nodes_by_key)
358
        self.assertEqual(2, len(builder._backing_indices))
359
        self.assertEqual(None, builder._backing_indices[0])
360
        self.assertEqual(4, builder._backing_indices[1].key_count())
361
        # The next spills to the 2-len slot
362
        builder.add_node(*nodes[4])
363
        builder.add_node(*nodes[5])
364
        self.assertEqual(0, len(builder._nodes))
365
        self.assertIs(None, builder._nodes_by_key)
366
        self.assertEqual(2, len(builder._backing_indices))
367
        self.assertEqual(2, builder._backing_indices[0].key_count())
368
        self.assertEqual(4, builder._backing_indices[1].key_count())
369
        # Next spill combines
370
        builder.add_node(*nodes[6])
371
        builder.add_node(*nodes[7])
372
        self.assertEqual(3, len(builder._backing_indices))
373
        self.assertEqual(None, builder._backing_indices[0])
374
        self.assertEqual(None, builder._backing_indices[1])
375
        self.assertEqual(8, builder._backing_indices[2].key_count())
376
        # And so forth - counting up in binary.
377
        builder.add_node(*nodes[8])
378
        builder.add_node(*nodes[9])
379
        self.assertEqual(3, len(builder._backing_indices))
380
        self.assertEqual(2, builder._backing_indices[0].key_count())
381
        self.assertEqual(None, builder._backing_indices[1])
382
        self.assertEqual(8, builder._backing_indices[2].key_count())
383
        builder.add_node(*nodes[10])
384
        builder.add_node(*nodes[11])
385
        self.assertEqual(3, len(builder._backing_indices))
386
        self.assertEqual(None, builder._backing_indices[0])
387
        self.assertEqual(4, builder._backing_indices[1].key_count())
388
        self.assertEqual(8, builder._backing_indices[2].key_count())
389
        builder.add_node(*nodes[12])
390
        # Test that memory and disk are both used for query methods; and that
391
        # None is skipped over happily.
392
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
393
            list(builder.iter_all_entries()))
394
        # Two nodes - one memory one disk
395
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
396
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
397
        self.assertEqual(13, builder.key_count())
398
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
399
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
400
        builder.add_node(*nodes[13])
401
        self.assertEqual(3, len(builder._backing_indices))
402
        self.assertEqual(2, builder._backing_indices[0].key_count())
403
        self.assertEqual(4, builder._backing_indices[1].key_count())
404
        self.assertEqual(8, builder._backing_indices[2].key_count())
405
        builder.add_node(*nodes[14])
406
        builder.add_node(*nodes[15])
407
        self.assertEqual(4, len(builder._backing_indices))
408
        self.assertEqual(None, builder._backing_indices[0])
409
        self.assertEqual(None, builder._backing_indices[1])
410
        self.assertEqual(None, builder._backing_indices[2])
411
        self.assertEqual(16, builder._backing_indices[3].key_count())
412
        # Now finish, and check we got a correctly ordered tree
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
413
        t = self.get_transport('')
414
        size = t.put_file('index', builder.finish())
415
        index = btree_index.BTreeGraphIndex(t, 'index', size)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
416
        nodes = list(index.iter_all_entries())
417
        self.assertEqual(sorted(nodes), nodes)
418
        self.assertEqual(16, len(nodes))
419
420
    def test_spill_index_stress_1_1_no_combine(self):
421
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
422
        builder.set_optimize(for_size=False, combine_backing_indices=False)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
423
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
424
        builder.add_node(*nodes[0])
425
        # Test the parts of the index that take up memory are doing so
426
        # predictably.
427
        self.assertEqual(1, len(builder._nodes))
428
        self.assertIs(None, builder._nodes_by_key)
429
        builder.add_node(*nodes[1])
430
        self.assertEqual(0, len(builder._nodes))
431
        self.assertIs(None, builder._nodes_by_key)
432
        self.assertEqual(1, len(builder._backing_indices))
433
        self.assertEqual(2, builder._backing_indices[0].key_count())
434
        # now back to memory
435
        builder.add_node(*nodes[2])
436
        self.assertEqual(1, len(builder._nodes))
437
        self.assertIs(None, builder._nodes_by_key)
438
        # And spills to a second backing index but doesn't combine
439
        builder.add_node(*nodes[3])
440
        self.assertEqual(0, len(builder._nodes))
441
        self.assertIs(None, builder._nodes_by_key)
442
        self.assertEqual(2, len(builder._backing_indices))
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
443
        for backing_index in builder._backing_indices:
444
            self.assertEqual(2, backing_index.key_count())
445
        # The next spills to the 3rd slot
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
446
        builder.add_node(*nodes[4])
447
        builder.add_node(*nodes[5])
448
        self.assertEqual(0, len(builder._nodes))
449
        self.assertIs(None, builder._nodes_by_key)
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
450
        self.assertEqual(3, len(builder._backing_indices))
451
        for backing_index in builder._backing_indices:
452
            self.assertEqual(2, backing_index.key_count())
453
        # Now spill a few more, and check that we don't combine
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
454
        builder.add_node(*nodes[6])
455
        builder.add_node(*nodes[7])
456
        builder.add_node(*nodes[8])
457
        builder.add_node(*nodes[9])
458
        builder.add_node(*nodes[10])
459
        builder.add_node(*nodes[11])
460
        builder.add_node(*nodes[12])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
461
        self.assertEqual(6, len(builder._backing_indices))
462
        for backing_index in builder._backing_indices:
463
            self.assertEqual(2, backing_index.key_count())
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
464
        # Test that memory and disk are both used for query methods; and that
465
        # None is skipped over happily.
466
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
467
            list(builder.iter_all_entries()))
468
        # Two nodes - one memory one disk
469
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
470
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
471
        self.assertEqual(13, builder.key_count())
472
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
473
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
474
        builder.add_node(*nodes[13])
475
        builder.add_node(*nodes[14])
476
        builder.add_node(*nodes[15])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
477
        self.assertEqual(8, len(builder._backing_indices))
478
        for backing_index in builder._backing_indices:
479
            self.assertEqual(2, backing_index.key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
480
        # Now finish, and check we got a correctly ordered tree
481
        transport = self.get_transport('')
482
        size = transport.put_file('index', builder.finish())
483
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
484
        nodes = list(index.iter_all_entries())
485
        self.assertEqual(sorted(nodes), nodes)
486
        self.assertEqual(16, len(nodes))
487
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
488
    def test_set_optimize(self):
489
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
490
        builder.set_optimize(for_size=True)
491
        self.assertTrue(builder._optimize_for_size)
492
        builder.set_optimize(for_size=False)
493
        self.assertFalse(builder._optimize_for_size)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
494
        # test that we can set combine_backing_indices without effecting
495
        # _optimize_for_size
496
        obj = object()
497
        builder._optimize_for_size = obj
498
        builder.set_optimize(combine_backing_indices=False)
499
        self.assertFalse(builder._combine_backing_indices)
500
        self.assertIs(obj, builder._optimize_for_size)
501
        builder.set_optimize(combine_backing_indices=True)
502
        self.assertTrue(builder._combine_backing_indices)
503
        self.assertIs(obj, builder._optimize_for_size)
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
504
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
505
    def test_spill_index_stress_2_2(self):
506
        # test that references and longer keys don't confuse things.
507
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
508
            spill_at=2)
509
        nodes = self.make_nodes(16, 2, 2)
510
        builder.add_node(*nodes[0])
511
        # Test the parts of the index that take up memory are doing so
512
        # predictably.
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
513
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
514
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
515
        builder.add_node(*nodes[1])
516
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
517
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
518
        self.assertEqual(1, len(builder._backing_indices))
519
        self.assertEqual(2, builder._backing_indices[0].key_count())
520
        # now back to memory
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
521
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
522
        builder.add_node(*nodes[2])
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
523
        self.assertEqual(1, len(builder._nodes))
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
524
        self.assertIsNot(None, builder._nodes_by_key)
525
        self.assertNotEqual({}, builder._nodes_by_key)
526
        # We should have a new entry
527
        self.assertNotEqual(old, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
528
        # And spills to a second backing index combing all
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
529
        builder.add_node(*nodes[3])
530
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
531
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
532
        self.assertEqual(2, len(builder._backing_indices))
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
533
        self.assertEqual(None, builder._backing_indices[0])
534
        self.assertEqual(4, builder._backing_indices[1].key_count())
535
        # The next spills to the 2-len slot
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
536
        builder.add_node(*nodes[4])
537
        builder.add_node(*nodes[5])
538
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
539
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
540
        self.assertEqual(2, len(builder._backing_indices))
541
        self.assertEqual(2, builder._backing_indices[0].key_count())
542
        self.assertEqual(4, builder._backing_indices[1].key_count())
543
        # Next spill combines
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
544
        builder.add_node(*nodes[6])
545
        builder.add_node(*nodes[7])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
546
        self.assertEqual(3, len(builder._backing_indices))
547
        self.assertEqual(None, builder._backing_indices[0])
548
        self.assertEqual(None, builder._backing_indices[1])
549
        self.assertEqual(8, builder._backing_indices[2].key_count())
550
        # And so forth - counting up in binary.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
551
        builder.add_node(*nodes[8])
552
        builder.add_node(*nodes[9])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
553
        self.assertEqual(3, len(builder._backing_indices))
554
        self.assertEqual(2, builder._backing_indices[0].key_count())
555
        self.assertEqual(None, builder._backing_indices[1])
556
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
557
        builder.add_node(*nodes[10])
558
        builder.add_node(*nodes[11])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
559
        self.assertEqual(3, len(builder._backing_indices))
560
        self.assertEqual(None, builder._backing_indices[0])
561
        self.assertEqual(4, builder._backing_indices[1].key_count())
562
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
563
        builder.add_node(*nodes[12])
564
        # Test that memory and disk are both used for query methods; and that
565
        # None is skipped over happily.
566
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
567
            list(builder.iter_all_entries()))
568
        # Two nodes - one memory one disk
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
569
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
570
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
571
        self.assertEqual(13, builder.key_count())
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
572
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
573
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
574
        builder.add_node(*nodes[13])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
575
        self.assertEqual(3, len(builder._backing_indices))
576
        self.assertEqual(2, builder._backing_indices[0].key_count())
577
        self.assertEqual(4, builder._backing_indices[1].key_count())
578
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
579
        builder.add_node(*nodes[14])
580
        builder.add_node(*nodes[15])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
581
        self.assertEqual(4, len(builder._backing_indices))
582
        self.assertEqual(None, builder._backing_indices[0])
583
        self.assertEqual(None, builder._backing_indices[1])
584
        self.assertEqual(None, builder._backing_indices[2])
585
        self.assertEqual(16, builder._backing_indices[3].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
586
        # Now finish, and check we got a correctly ordered tree
587
        transport = self.get_transport('')
588
        size = transport.put_file('index', builder.finish())
589
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
590
        nodes = list(index.iter_all_entries())
591
        self.assertEqual(sorted(nodes), nodes)
592
        self.assertEqual(16, len(nodes))
593
594
    def test_spill_index_duplicate_key_caught_on_finish(self):
595
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
596
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
597
        builder.add_node(*nodes[0])
598
        builder.add_node(*nodes[1])
599
        builder.add_node(*nodes[0])
600
        self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
601
602
603
class TestBTreeIndex(BTreeTestCase):
604
605
    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
606
        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
607
            key_elements=key_elements)
608
        for key, value, references in nodes:
609
            builder.add_node(key, value, references)
610
        stream = builder.finish()
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
611
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
612
        size = trans.put_file('index', stream)
613
        return btree_index.BTreeGraphIndex(trans, 'index', size)
614
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
615
    def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
616
                               offset=0):
617
        builder = btree_index.BTreeBuilder(key_elements=key_elements,
618
                                           reference_lists=ref_lists)
619
        builder.add_nodes(nodes)
620
        transport = self.get_transport('')
5074.4.7 by John Arbash Meinel
Workaround a strange bug about the file closing before it is read.
621
        # NamedTemporaryFile dies on builder.finish().read(). weird.
622
        temp_file = builder.finish()
623
        content = temp_file.read()
624
        del temp_file
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
625
        size = len(content)
626
        transport.put_bytes('index', (' '*offset)+content)
627
        return btree_index.BTreeGraphIndex(transport, 'index', size=size,
628
                                           offset=offset)
629
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
630
    def test_clear_cache(self):
631
        nodes = self.make_nodes(160, 2, 2)
632
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
633
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
634
        self.assertEqual([1, 4], index._row_lengths)
635
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
636
        internal_node_pre_clear = index._internal_node_cache.keys()
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
637
        self.assertTrue(len(index._leaf_node_cache) > 0)
638
        index.clear_cache()
639
        # We don't touch _root_node or _internal_node_cache, both should be
640
        # small, and can save a round trip or two
641
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
642
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
643
        #       it will be small, and if we ever do touch this index again, it
644
        #       will save round-trips.  This assertion isn't very strong,
645
        #       becuase without a 3-level index, we don't have any internal
646
        #       nodes cached.
647
        self.assertEqual(internal_node_pre_clear,
648
                         index._internal_node_cache.keys())
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
649
        self.assertEqual(0, len(index._leaf_node_cache))
650
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
651
    def test_trivial_constructor(self):
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
652
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
653
        index = btree_index.BTreeGraphIndex(t, 'index', None)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
654
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
655
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
656
657
    def test_with_size_constructor(self):
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
658
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
659
        index = btree_index.BTreeGraphIndex(t, 'index', 1)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
660
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
661
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
662
663
    def test_empty_key_count_no_size(self):
664
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
665
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
666
        t.put_file('index', builder.finish())
667
        index = btree_index.BTreeGraphIndex(t, 'index', None)
668
        del t._activity[:]
669
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
670
        self.assertEqual(0, index.key_count())
671
        # The entire index should have been requested (as we generally have the
672
        # size available, and doing many small readvs is inappropriate).
673
        # We can't tell how much was actually read here, but - check the code.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
674
        self.assertEqual([('get', 'index')], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
675
676
    def test_empty_key_count(self):
677
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
678
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
679
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
680
        self.assertEqual(72, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
681
        index = btree_index.BTreeGraphIndex(t, 'index', size)
682
        del t._activity[:]
683
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
684
        self.assertEqual(0, index.key_count())
685
        # The entire index should have been read, as 4K > size
686
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
687
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
688
689
    def test_non_empty_key_count_2_2(self):
690
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
691
        nodes = self.make_nodes(35, 2, 2)
692
        for node in nodes:
693
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
694
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
695
        size = t.put_file('index', builder.finish())
696
        index = btree_index.BTreeGraphIndex(t, 'index', size)
697
        del t._activity[:]
698
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
699
        self.assertEqual(70, index.key_count())
700
        # The entire index should have been read, as it is one page long.
701
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
702
            t._activity)
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
703
        self.assertEqual(1173, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
704
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
705
    def test_with_offset_no_size(self):
706
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
707
                                            offset=1234,
708
                                            nodes=self.make_nodes(200, 1, 1))
709
        index._size = None # throw away the size info
710
        self.assertEqual(200, index.key_count())
711
712
    def test_with_small_offset(self):
713
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
714
                                            offset=1234,
715
                                            nodes=self.make_nodes(200, 1, 1))
716
        self.assertEqual(200, index.key_count())
717
718
    def test_with_large_offset(self):
719
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
720
                                            offset=123456,
721
                                            nodes=self.make_nodes(200, 1, 1))
722
        self.assertEqual(200, index.key_count())
723
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
724
    def test__read_nodes_no_size_one_page_reads_once(self):
725
        self.make_index(nodes=[(('key',), 'value', ())])
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
726
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
727
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
728
        del trans._activity[:]
729
        nodes = dict(index._read_nodes([0]))
730
        self.assertEqual([0], nodes.keys())
731
        node = nodes[0]
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
732
        self.assertEqual([('key',)], node.all_keys())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
733
        self.assertEqual([('get', 'index')], trans._activity)
734
735
    def test__read_nodes_no_size_multiple_pages(self):
736
        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
737
        index.key_count()
738
        num_pages = index._row_offsets[-1]
739
        # Reopen with a traced transport and no size
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
740
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
741
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
742
        del trans._activity[:]
743
        nodes = dict(index._read_nodes([0]))
744
        self.assertEqual(range(num_pages), nodes.keys())
745
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
746
    def test_2_levels_key_count_2_2(self):
747
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
748
        nodes = self.make_nodes(160, 2, 2)
749
        for node in nodes:
750
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
751
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
752
        size = t.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
753
        self.assertEqual(17692, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
754
        index = btree_index.BTreeGraphIndex(t, 'index', size)
755
        del t._activity[:]
756
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
757
        self.assertEqual(320, index.key_count())
758
        # The entire index should not have been read.
759
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
760
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
761
762
    def test_validate_one_page(self):
763
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
764
        nodes = self.make_nodes(45, 2, 2)
765
        for node in nodes:
766
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
767
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
768
        size = t.put_file('index', builder.finish())
769
        index = btree_index.BTreeGraphIndex(t, 'index', size)
770
        del t._activity[:]
771
        self.assertEqual([], t._activity)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
772
        index.validate()
773
        # The entire index should have been read linearly.
774
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
775
                         t._activity)
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
776
        self.assertEqual(1488, size)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
777
778
    def test_validate_two_pages(self):
779
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
780
        nodes = self.make_nodes(80, 2, 2)
781
        for node in nodes:
782
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
783
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
784
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
785
        # Root page, 2 leaf pages
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
786
        self.assertEqual(9339, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
787
        index = btree_index.BTreeGraphIndex(t, 'index', size)
788
        del t._activity[:]
789
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
790
        index.validate()
791
        # The entire index should have been read linearly.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
792
        self.assertEqual(
793
            [('readv', 'index', [(0, 4096)], False, None),
794
             ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
795
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
796
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
797
        # node and make validate find them.
798
799
    def test_eq_ne(self):
800
        # two indices are equal when constructed with the same parameters:
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
801
        t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
5609.9.4 by Vincent Ladeuil
Use self.get_transport instead of transport.get_transport where possible.
802
        t2 = self.get_transport()
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
803
        self.assertTrue(
804
            btree_index.BTreeGraphIndex(t1, 'index', None) ==
805
            btree_index.BTreeGraphIndex(t1, 'index', None))
806
        self.assertTrue(
807
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
808
            btree_index.BTreeGraphIndex(t1, 'index', 20))
809
        self.assertFalse(
810
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
811
            btree_index.BTreeGraphIndex(t2, 'index', 20))
812
        self.assertFalse(
813
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
814
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
815
        self.assertFalse(
816
            btree_index.BTreeGraphIndex(t1, 'index', 10) ==
817
            btree_index.BTreeGraphIndex(t1, 'index', 20))
818
        self.assertFalse(
819
            btree_index.BTreeGraphIndex(t1, 'index', None) !=
820
            btree_index.BTreeGraphIndex(t1, 'index', None))
821
        self.assertFalse(
822
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
823
            btree_index.BTreeGraphIndex(t1, 'index', 20))
824
        self.assertTrue(
825
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
826
            btree_index.BTreeGraphIndex(t2, 'index', 20))
827
        self.assertTrue(
828
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
829
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
830
        self.assertTrue(
831
            btree_index.BTreeGraphIndex(t1, 'index', 10) !=
832
            btree_index.BTreeGraphIndex(t1, 'index', 20))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
833
6178.2.9 by Shannon Weyrick
A version of the patch, based on suggestions from John Meinel, which detects an empty page differently to avoid false positives.
834
    def test_key_too_big(self):
835
        # the size that matters here is the _compressed_ size of the key, so we can't
836
        # do a simple character repeat.
837
        bigKey = ''.join(map(repr, xrange(btree_index._PAGE_SIZE)))
838
        self.assertRaises(errors.BadIndexKey,
839
                          self.make_index,
840
                          nodes=[((bigKey,), 'value', ())])
841
        
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
842
    def test_iter_all_only_root_no_size(self):
843
        self.make_index(nodes=[(('key',), 'value', ())])
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
844
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
845
        index = btree_index.BTreeGraphIndex(t, 'index', None)
846
        del t._activity[:]
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
847
        self.assertEqual([(('key',), 'value')],
848
                         [x[1:] for x in index.iter_all_entries()])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
849
        self.assertEqual([('get', 'index')], t._activity)
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
850
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
851
    def test_iter_all_entries_reads(self):
852
        # iterating all entries reads the header, then does a linear
853
        # read.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
854
        self.shrink_page_size()
3641.5.11 by John Arbash Meinel
Some small test cleanup
855
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
856
        # 20k nodes is enough to create a two internal nodes on the second
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
857
        # level, with a 2K page size
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
858
        nodes = self.make_nodes(10000, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
859
        for node in nodes:
860
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
861
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
862
        size = t.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
863
        self.assertEqual(1303220, size, 'number of expected bytes in the'
864
                                        ' output changed')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
865
        page_size = btree_index._PAGE_SIZE
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
866
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
867
        index = btree_index.BTreeGraphIndex(t, 'index', size)
868
        del t._activity[:]
869
        self.assertEqual([], t._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
870
        found_nodes = self.time(list, index.iter_all_entries())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
871
        bare_nodes = []
872
        for node in found_nodes:
873
            self.assertTrue(node[0] is index)
874
            bare_nodes.append(node[1:])
875
        self.assertEqual(3, len(index._row_lengths),
876
            "Not enough rows: %r" % index._row_lengths)
877
        # Should be as long as the nodes we supplied
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
878
        self.assertEqual(20000, len(found_nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
879
        # Should have the same content
880
        self.assertEqual(set(nodes), set(bare_nodes))
881
        # Should have done linear scan IO up the index, ignoring
882
        # the internal nodes:
883
        # The entire index should have been read
884
        total_pages = sum(index._row_lengths)
885
        self.assertEqual(total_pages, index._row_offsets[-1])
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
886
        self.assertEqual(1303220, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
887
        # The start of the leaves
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
888
        first_byte = index._row_offsets[-2] * page_size
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
889
        readv_request = []
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
890
        for offset in range(first_byte, size, page_size):
891
            readv_request.append((offset, page_size))
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
892
        # The last page is truncated
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
893
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
894
        expected = [('readv', 'index', [(0, page_size)], False, None),
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
895
             ('readv',  'index', readv_request, False, None)]
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
896
        if expected != t._activity:
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
897
            self.assertEqualDiff(pprint.pformat(expected),
898
                                 pprint.pformat(transport._activity))
899
900
    def _test_iter_entries_references_resolved(self):
901
        index = self.make_index(1, nodes=[
902
            (('name', ), 'data', ([('ref', ), ('ref', )], )),
903
            (('ref', ), 'refdata', ([], ))])
904
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
905
            (index, ('ref', ), 'refdata', ((), ))]),
906
            set(index.iter_entries([('name',), ('ref',)])))
907
908
    def test_iter_entries_references_2_refs_resolved(self):
909
        # iterating some entries reads just the pages needed. For now, to
910
        # get it working and start measuring, only 4K pages are read.
911
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
912
        # 80 nodes is enough to create a two-level index.
913
        nodes = self.make_nodes(160, 2, 2)
914
        for node in nodes:
915
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
916
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
917
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
918
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
919
        index = btree_index.BTreeGraphIndex(t, 'index', size)
920
        del t._activity[:]
921
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
922
        # search for one key
923
        found_nodes = list(index.iter_entries([nodes[30][0]]))
924
        bare_nodes = []
925
        for node in found_nodes:
926
            self.assertTrue(node[0] is index)
927
            bare_nodes.append(node[1:])
928
        # Should be as long as the nodes we supplied
929
        self.assertEqual(1, len(found_nodes))
930
        # Should have the same content
931
        self.assertEqual(nodes[30], bare_nodes[0])
932
        # Should have read the root node, then one leaf page:
933
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
934
             ('readv',  'index', [(8192, 4096), ], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
935
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
936
937
    def test_iter_key_prefix_1_element_key_None(self):
938
        index = self.make_index()
939
        self.assertRaises(errors.BadIndexKey, list,
940
            index.iter_entries_prefix([(None, )]))
941
942
    def test_iter_key_prefix_wrong_length(self):
943
        index = self.make_index()
944
        self.assertRaises(errors.BadIndexKey, list,
945
            index.iter_entries_prefix([('foo', None)]))
946
        index = self.make_index(key_elements=2)
947
        self.assertRaises(errors.BadIndexKey, list,
948
            index.iter_entries_prefix([('foo', )]))
949
        self.assertRaises(errors.BadIndexKey, list,
950
            index.iter_entries_prefix([('foo', None, None)]))
951
952
    def test_iter_key_prefix_1_key_element_no_refs(self):
953
        index = self.make_index( nodes=[
954
            (('name', ), 'data', ()),
955
            (('ref', ), 'refdata', ())])
956
        self.assertEqual(set([(index, ('name', ), 'data'),
957
            (index, ('ref', ), 'refdata')]),
958
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
959
960
    def test_iter_key_prefix_1_key_element_refs(self):
961
        index = self.make_index(1, nodes=[
962
            (('name', ), 'data', ([('ref', )], )),
963
            (('ref', ), 'refdata', ([], ))])
964
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
965
            (index, ('ref', ), 'refdata', ((), ))]),
966
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
967
968
    def test_iter_key_prefix_2_key_element_no_refs(self):
969
        index = self.make_index(key_elements=2, nodes=[
970
            (('name', 'fin1'), 'data', ()),
971
            (('name', 'fin2'), 'beta', ()),
972
            (('ref', 'erence'), 'refdata', ())])
973
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
974
            (index, ('ref', 'erence'), 'refdata')]),
975
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
976
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
977
            (index, ('name', 'fin2'), 'beta')]),
978
            set(index.iter_entries_prefix([('name', None)])))
979
980
    def test_iter_key_prefix_2_key_element_refs(self):
981
        index = self.make_index(1, key_elements=2, nodes=[
982
            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
983
            (('name', 'fin2'), 'beta', ([], )),
984
            (('ref', 'erence'), 'refdata', ([], ))])
985
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
986
            (index, ('ref', 'erence'), 'refdata', ((), ))]),
987
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
988
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
989
            (index, ('name', 'fin2'), 'beta', ((), ))]),
990
            set(index.iter_entries_prefix([('name', None)])))
991
4011.5.5 by Andrew Bennetts
Fix typo in comment.
992
    # XXX: external_references tests are duplicated in test_index.  We
4011.5.3 by Andrew Bennetts
Implement and test external_references on GraphIndex and BTreeGraphIndex.
993
    # probably should have per_graph_index tests...
994
    def test_external_references_no_refs(self):
995
        index = self.make_index(ref_lists=0, nodes=[])
996
        self.assertRaises(ValueError, index.external_references, 0)
997
998
    def test_external_references_no_results(self):
999
        index = self.make_index(ref_lists=1, nodes=[
1000
            (('key',), 'value', ([],))])
1001
        self.assertEqual(set(), index.external_references(0))
1002
1003
    def test_external_references_missing_ref(self):
1004
        missing_key = ('missing',)
1005
        index = self.make_index(ref_lists=1, nodes=[
1006
            (('key',), 'value', ([missing_key],))])
1007
        self.assertEqual(set([missing_key]), index.external_references(0))
1008
1009
    def test_external_references_multiple_ref_lists(self):
1010
        missing_key = ('missing',)
1011
        index = self.make_index(ref_lists=2, nodes=[
1012
            (('key',), 'value', ([], [missing_key]))])
1013
        self.assertEqual(set([]), index.external_references(0))
1014
        self.assertEqual(set([missing_key]), index.external_references(1))
1015
1016
    def test_external_references_two_records(self):
1017
        index = self.make_index(ref_lists=1, nodes=[
1018
            (('key-1',), 'value', ([('key-2',)],)),
1019
            (('key-2',), 'value', ([],)),
1020
            ])
1021
        self.assertEqual(set([]), index.external_references(0))
1022
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1023
    def test__find_ancestors_one_page(self):
4593.4.5 by John Arbash Meinel
Start adding some tests.
1024
        key1 = ('key-1',)
1025
        key2 = ('key-2',)
1026
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1027
            (key1, 'value', ([key2],)),
1028
            (key2, 'value', ([],)),
1029
            ])
1030
        parent_map = {}
1031
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1032
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1033
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1034
        self.assertEqual(set(), missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1035
        self.assertEqual(set(), search_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1036
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1037
    def test__find_ancestors_one_page_w_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1038
        key1 = ('key-1',)
1039
        key2 = ('key-2',)
1040
        key3 = ('key-3',)
1041
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1042
            (key1, 'value', ([key2],)),
1043
            (key2, 'value', ([],)),
1044
            ])
1045
        parent_map = {}
1046
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1047
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1048
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1049
        self.assertEqual({key2: ()}, parent_map)
1050
        # we know that key3 is missing because we read the page that it would
1051
        # otherwise be on
1052
        self.assertEqual(set([key3]), missing_keys)
1053
        self.assertEqual(set(), search_keys)
1054
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1055
    def test__find_ancestors_one_parent_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1056
        key1 = ('key-1',)
1057
        key2 = ('key-2',)
1058
        key3 = ('key-3',)
1059
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1060
            (key1, 'value', ([key2],)),
1061
            (key2, 'value', ([key3],)),
1062
            ])
1063
        parent_map = {}
1064
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1065
        search_keys = index._find_ancestors([key1], 0, parent_map,
1066
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1067
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1068
        self.assertEqual(set(), missing_keys)
1069
        # all we know is that key3 wasn't present on the page we were reading
1070
        # but if you look, the last key is key2 which comes before key3, so we
1071
        # don't know whether key3 would land on this page or not.
1072
        self.assertEqual(set([key3]), search_keys)
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1073
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1074
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1075
        # passing it back in, we are sure it is 'missing'
1076
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1077
        self.assertEqual(set([key3]), missing_keys)
1078
        self.assertEqual(set([]), search_keys)
1079
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1080
    def test__find_ancestors_dont_search_known(self):
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1081
        key1 = ('key-1',)
1082
        key2 = ('key-2',)
1083
        key3 = ('key-3',)
1084
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1085
            (key1, 'value', ([key2],)),
1086
            (key2, 'value', ([key3],)),
1087
            (key3, 'value', ([],)),
1088
            ])
1089
        # We already know about key2, so we won't try to search for key3
1090
        parent_map = {key2: (key3,)}
1091
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1092
        search_keys = index._find_ancestors([key1], 0, parent_map,
1093
                                            missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1094
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1095
        self.assertEqual(set(), missing_keys)
1096
        self.assertEqual(set(), search_keys)
1097
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1098
    def test__find_ancestors_multiple_pages(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1099
        # We need to use enough keys that we actually cause a split
1100
        start_time = 1249671539
1101
        email = "joebob@example.com"
1102
        nodes = []
1103
        ref_lists = ((),)
1104
        rev_keys = []
1105
        for i in xrange(400):
1106
            rev_id = '%s-%s-%s' % (email,
1107
                                   osutils.compact_date(start_time + i),
1108
                                   osutils.rand_chars(16))
1109
            rev_key = (rev_id,)
1110
            nodes.append((rev_key, 'value', ref_lists))
1111
            # We have a ref 'list' of length 1, with a list of parents, with 1
1112
            # parent which is a key
1113
            ref_lists = ((rev_key,),)
1114
            rev_keys.append(rev_key)
1115
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1116
        self.assertEqual(400, index.key_count())
1117
        self.assertEqual(3, len(index._row_offsets))
1118
        nodes = dict(index._read_nodes([1, 2]))
1119
        l1 = nodes[1]
1120
        l2 = nodes[2]
1121
        min_l2_key = l2.min_key
1122
        max_l1_key = l1.max_key
1123
        self.assertTrue(max_l1_key < min_l2_key)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1124
        parents_min_l2_key = l2[min_l2_key][1][0]
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1125
        self.assertEqual((l1.max_key,), parents_min_l2_key)
1126
        # Now, whatever key we select that would fall on the second page,
1127
        # should give us all the parents until the page break
1128
        key_idx = rev_keys.index(min_l2_key)
1129
        next_key = rev_keys[key_idx+1]
1130
        # So now when we get the parent map, we should get the key we are
1131
        # looking for, min_l2_key, and then a reference to go look for the
1132
        # parent of that key
1133
        parent_map = {}
1134
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1135
        search_keys = index._find_ancestors([next_key], 0, parent_map,
1136
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1137
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1138
        self.assertEqual(set(), missing_keys)
1139
        self.assertEqual(set([max_l1_key]), search_keys)
1140
        parent_map = {}
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1141
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1142
                                            missing_keys)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1143
        self.assertEqual(l1.all_keys(), sorted(parent_map))
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1144
        self.assertEqual(set(), missing_keys)
1145
        self.assertEqual(set(), search_keys)
1146
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1147
    def test__find_ancestors_empty_index(self):
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1148
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1149
        parent_map = {}
1150
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1151
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1152
                                            missing_keys)
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1153
        self.assertEqual(set(), search_keys)
1154
        self.assertEqual({}, parent_map)
1155
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
1156
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1157
    def test_supports_unlimited_cache(self):
1158
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1159
        # We need enough nodes to cause a page split (so we have both an
1160
        # internal node and a couple leaf nodes. 500 seems to be enough.)
4634.71.2 by John Arbash Meinel
If we are going to sometimes use a dict, we have to conform to just the dict interface.
1161
        nodes = self.make_nodes(500, 1, 0)
1162
        for node in nodes:
1163
            builder.add_node(*node)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1164
        stream = builder.finish()
5609.9.4 by Vincent Ladeuil
Use self.get_transport instead of transport.get_transport where possible.
1165
        trans = self.get_transport()
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1166
        size = trans.put_file('index', stream)
1167
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1168
        self.assertEqual(500, index.key_count())
1169
        # We have an internal node
1170
        self.assertEqual(2, len(index._row_lengths))
1171
        # We have at least 2 leaf nodes
1172
        self.assertTrue(index._row_lengths[-1] >= 2)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1173
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1174
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1175
                         index._leaf_node_cache._max_cache)
1176
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1177
        self.assertEqual(100, index._internal_node_cache._max_cache)
1178
        # No change if unlimited_cache=False is passed
1179
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1180
                                            unlimited_cache=False)
1181
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1182
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1183
                         index._leaf_node_cache._max_cache)
1184
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1185
        self.assertEqual(100, index._internal_node_cache._max_cache)
1186
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1187
                                            unlimited_cache=True)
1188
        self.assertIsInstance(index._leaf_node_cache, dict)
1189
        self.assertIs(type(index._internal_node_cache), dict)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1190
        # Exercise the lookup code
1191
        entries = set(index.iter_entries([n[0] for n in nodes]))
1192
        self.assertEqual(500, len(entries))
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1193
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1194
1195
class TestBTreeNodes(BTreeTestCase):
1196
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
1197
    scenarios = btreeparser_scenarios()
1198
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1199
    def setUp(self):
1200
        BTreeTestCase.setUp(self)
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
1201
        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1202
1203
    def test_LeafNode_1_0(self):
1204
        node_bytes = ("type=leaf\n"
1205
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
1206
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
1207
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
1208
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
1209
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
1210
        node = btree_index._LeafNode(node_bytes, 1, 0)
1211
        # We do direct access, or don't care about order, to leaf nodes most of
1212
        # the time, so a dict is useful:
1213
        self.assertEqual({
1214
            ("0000000000000000000000000000000000000000",): ("value:0", ()),
1215
            ("1111111111111111111111111111111111111111",): ("value:1", ()),
1216
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
1217
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
1218
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1219
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1220
1221
    def test_LeafNode_2_2(self):
1222
        node_bytes = ("type=leaf\n"
1223
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1224
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1225
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1226
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1227
            ""
1228
            )
1229
        node = btree_index._LeafNode(node_bytes, 2, 2)
1230
        # We do direct access, or don't care about order, to leaf nodes most of
1231
        # the time, so a dict is useful:
1232
        self.assertEqual({
1233
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1234
            ('00', '11'): ('value:1',
1235
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1236
            ('11', '33'): ('value:3',
1237
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1238
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1239
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1240
1241
    def test_InternalNode_1(self):
1242
        node_bytes = ("type=internal\n"
1243
            "offset=1\n"
1244
            "0000000000000000000000000000000000000000\n"
1245
            "1111111111111111111111111111111111111111\n"
1246
            "2222222222222222222222222222222222222222\n"
1247
            "3333333333333333333333333333333333333333\n"
1248
            "4444444444444444444444444444444444444444\n"
1249
            )
1250
        node = btree_index._InternalNode(node_bytes)
1251
        # We want to bisect to find the right children from this node, so a
1252
        # vector is most useful.
1253
        self.assertEqual([
1254
            ("0000000000000000000000000000000000000000",),
1255
            ("1111111111111111111111111111111111111111",),
1256
            ("2222222222222222222222222222222222222222",),
1257
            ("3333333333333333333333333333333333333333",),
1258
            ("4444444444444444444444444444444444444444",),
1259
            ], node.keys)
1260
        self.assertEqual(1, node.offset)
1261
1262
    def test_LeafNode_2_2(self):
1263
        node_bytes = ("type=leaf\n"
1264
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1265
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1266
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1267
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1268
            ""
1269
            )
1270
        node = btree_index._LeafNode(node_bytes, 2, 2)
1271
        # We do direct access, or don't care about order, to leaf nodes most of
1272
        # the time, so a dict is useful:
1273
        self.assertEqual({
1274
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1275
            ('00', '11'): ('value:1',
1276
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1277
            ('11', '33'): ('value:3',
1278
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1279
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1280
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1281
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1282
    def assertFlattened(self, expected, key, value, refs):
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1283
        flat_key, flat_line = self.parse_btree._flatten_node(
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1284
            (None, key, value, refs), bool(refs))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1285
        self.assertEqual('\x00'.join(key), flat_key)
1286
        self.assertEqual(expected, flat_line)
1287
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1288
    def test__flatten_node(self):
1289
        self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1290
        self.assertFlattened('key\0tuple\0\0value str\n',
1291
                             ('key', 'tuple'), 'value str', [])
1292
        self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1293
                             ('key', 'tuple', 'triple'), 'value str', [])
1294
        self.assertFlattened('k\0t\0s\0ref\0value str\n',
1295
                             ('k', 't', 's'), 'value str', [[('ref',)]])
1296
        self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1297
                             ('key', 'tuple'), 'value str', [[('ref', 'key')]])
1298
        self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1299
            ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1300
        self.assertFlattened(
1301
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1302
            ('00', '11'), 'value:1',
1303
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1304
        self.assertFlattened(
1305
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1306
            ('11', '33'), 'value:3',
1307
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1308
        self.assertFlattened(
1309
            "11\x0044\x00\t11\x00ref00\x00value:4\n",
1310
            ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1311
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1312
1313
class TestCompiledBtree(tests.TestCase):
1314
1315
    def test_exists(self):
1316
        # This is just to let the user know if they don't have the feature
1317
        # available
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
1318
        self.requireFeature(compiled_btreeparser_feature)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1319
1320
1321
class TestMultiBisectRight(tests.TestCase):
1322
1323
    def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1324
        self.assertEqual(offsets,
1325
                         btree_index.BTreeGraphIndex._multi_bisect_right(
1326
                            search_keys, fixed_keys))
1327
1328
    def test_after(self):
1329
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1330
        self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1331
                                    ['e', 'f', 'g'], ['a', 'b', 'c'])
1332
1333
    def test_before(self):
1334
        self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1335
        self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1336
                                    ['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1337
1338
    def test_exact(self):
1339
        self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1340
        self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1341
        self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1342
                                    ['a', 'c'], ['a', 'b', 'c'])
1343
1344
    def test_inbetween(self):
1345
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1346
        self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1347
                                    ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1348
1349
    def test_mixed(self):
1350
        self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1351
                                     (4, ['g', 'h'])],
1352
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1353
                                    ['c', 'd', 'f', 'g'])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1354
1355
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1356
class TestExpandOffsets(tests.TestCase):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1357
1358
    def make_index(self, size, recommended_pages=None):
1359
        """Make an index with a generic size.
1360
1361
        This doesn't actually create anything on disk, it just primes a
1362
        BTreeGraphIndex with the recommended information.
1363
        """
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
1364
        index = btree_index.BTreeGraphIndex(
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
1365
            transport.get_transport_from_url('memory:///'),
1366
            'test-index', size=size)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1367
        if recommended_pages is not None:
1368
            index._recommended_pages = recommended_pages
1369
        return index
1370
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1371
    def set_cached_offsets(self, index, cached_offsets):
1372
        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1373
        def _get_offsets_to_cached_pages():
1374
            cached = set(cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1375
            return cached
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1376
        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1377
1378
    def prepare_index(self, index, node_ref_lists, key_length, key_count,
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1379
                      row_lengths, cached_offsets):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1380
        """Setup the BTreeGraphIndex with some pre-canned information."""
1381
        index.node_ref_lists = node_ref_lists
1382
        index._key_length = key_length
1383
        index._key_count = key_count
1384
        index._row_lengths = row_lengths
1385
        index._compute_row_offsets()
1386
        index._root_node = btree_index._InternalNode('internal\noffset=0\n')
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1387
        self.set_cached_offsets(index, cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1388
1389
    def make_100_node_index(self):
1390
        index = self.make_index(4096*100, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1391
        # Consider we've already made a single request at the middle
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1392
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1393
                           key_count=1000, row_lengths=[1, 99],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1394
                           cached_offsets=[0, 50])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1395
        return index
1396
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1397
    def make_1000_node_index(self):
1398
        index = self.make_index(4096*1000, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1399
        # Pretend we've already made a single request in the middle
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1400
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1401
                           key_count=90000, row_lengths=[1, 9, 990],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1402
                           cached_offsets=[0, 5, 500])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1403
        return index
1404
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1405
    def assertNumPages(self, expected_pages, index, size):
1406
        index._size = size
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1407
        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1408
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1409
    def assertExpandOffsets(self, expected, index, offsets):
1410
        self.assertEqual(expected, index._expand_offsets(offsets),
1411
                         'We did not get the expected value after expanding'
1412
                         ' %s' % (offsets,))
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1413
1414
    def test_default_recommended_pages(self):
1415
        index = self.make_index(None)
1416
        # local transport recommends 4096 byte reads, which is 1 page
1417
        self.assertEqual(1, index._recommended_pages)
1418
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1419
    def test__compute_total_pages_in_index(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1420
        index = self.make_index(None)
1421
        self.assertNumPages(1, index, 1024)
1422
        self.assertNumPages(1, index, 4095)
1423
        self.assertNumPages(1, index, 4096)
1424
        self.assertNumPages(2, index, 4097)
1425
        self.assertNumPages(2, index, 8192)
1426
        self.assertNumPages(76, index, 4096*75 + 10)
1427
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1428
    def test__find_layer_start_and_stop(self):
1429
        index = self.make_1000_node_index()
1430
        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1431
        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1432
        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1433
        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1434
        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1435
        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1436
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1437
    def test_unknown_size(self):
1438
        # We should not expand if we don't know the file size
1439
        index = self.make_index(None, 10)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1440
        self.assertExpandOffsets([0], index, [0])
1441
        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1442
1443
    def test_more_than_recommended(self):
1444
        index = self.make_index(4096*100, 2)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1445
        self.assertExpandOffsets([1, 10], index, [1, 10])
1446
        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1447
1448
    def test_read_all_from_root(self):
1449
        index = self.make_index(4096*10, 20)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1450
        self.assertExpandOffsets(range(10), index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1451
1452
    def test_read_all_when_cached(self):
1453
        # We've read enough that we can grab all the rest in a single request
1454
        index = self.make_index(4096*10, 5)
1455
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1456
                           key_count=1000, row_lengths=[1, 9],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1457
                           cached_offsets=[0, 1, 2, 5, 6])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1458
        # It should fill the remaining nodes, regardless of the one requested
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1459
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1460
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1461
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1462
1463
    def test_no_root_node(self):
1464
        index = self.make_index(4096*10, 5)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1465
        self.assertExpandOffsets([0], index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1466
1467
    def test_include_neighbors(self):
1468
        index = self.make_100_node_index()
1469
        # We expand in both directions, until we have at least 'recommended'
1470
        # pages
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1471
        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1472
        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1473
        # If we hit an 'edge' we continue in the other direction
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1474
        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1475
        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1476
1477
        # Requesting many nodes will expand all locations equally
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1478
        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1479
        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1480
                               [2, 10, 81])
1481
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1482
    def test_stop_at_cached(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1483
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1484
        self.set_cached_offsets(index, [0, 10, 19])
1485
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1486
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1487
        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1488
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1489
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1490
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1491
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1492
    def test_cannot_fully_expand(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1493
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1494
        self.set_cached_offsets(index, [0, 10, 12])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1495
        # We don't go into an endless loop if we are bound by cached nodes
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1496
        self.assertExpandOffsets([11], index, [11])
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1497
1498
    def test_overlap(self):
1499
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1500
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1501
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1502
1503
    def test_stay_within_layer(self):
1504
        index = self.make_1000_node_index()
1505
        # When expanding a request, we won't read nodes from the next layer
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1506
        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1507
        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1508
        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1509
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1510
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1511
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1512
        self.set_cached_offsets(index, [0, 4, 12])
1513
        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1514
        self.assertExpandOffsets([10, 11], index, [11])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1515
1516
    def test_small_requests_unexpanded(self):
1517
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1518
        self.set_cached_offsets(index, [0])
1519
        self.assertExpandOffsets([1], index, [1])
1520
        self.assertExpandOffsets([50], index, [50])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1521
        # If we request more than one node, then we'll expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1522
        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1523
1524
        # The first pass does not expand
1525
        index = self.make_1000_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1526
        self.set_cached_offsets(index, [0])
1527
        self.assertExpandOffsets([1], index, [1])
1528
        self.set_cached_offsets(index, [0, 1])
1529
        self.assertExpandOffsets([100], index, [100])
1530
        self.set_cached_offsets(index, [0, 1, 100])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1531
        # But after the first depth, we will expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1532
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1533
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1534
        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1535
        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1536
                                 [105])