~bzr-pqm/bzr/bzr.dev

5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
2
#
3
# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel
Cleanup the copyright headers
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
16
#
17
18
"""Tests for btree indices."""
19
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
20
import pprint
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
21
import zlib
22
23
from bzrlib import (
24
    btree_index,
25
    errors,
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
26
    fifo_cache,
27
    lru_cache,
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
28
    osutils,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
29
    tests,
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
30
    transport,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
31
    )
32
from bzrlib.tests import (
33
    TestCaseWithTransport,
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
34
    scenarios,
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
35
    )
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
36
from bzrlib.tests import (
37
    features,
38
    )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
39
40
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
41
load_tests = scenarios.load_tests_apply_scenarios
42
43
44
def btreeparser_scenarios():
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
45
    import bzrlib._btree_serializer_py as py_module
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
46
    scenarios = [('python', {'parse_btree': py_module})]
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
47
    if compiled_btreeparser_feature.available():
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
48
        scenarios.append(('C', 
49
            {'parse_btree': compiled_btreeparser_feature.module}))
50
    return scenarios
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
51
52
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
53
compiled_btreeparser_feature = features.ModuleAvailableFeature(
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
54
    'bzrlib._btree_serializer_pyx')
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
55
56
57
class BTreeTestCase(TestCaseWithTransport):
58
    # test names here are suffixed by the key length and reference list count
59
    # that they test.
60
61
    def setUp(self):
62
        TestCaseWithTransport.setUp(self)
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
63
        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
64
65
    def make_nodes(self, count, key_elements, reference_lists):
66
        """Generate count*key_elements sample nodes."""
67
        keys = []
68
        for prefix_pos in range(key_elements):
69
            if key_elements - 1:
70
                prefix = (str(prefix_pos) * 40,)
71
            else:
72
                prefix = ()
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
73
            for pos in xrange(count):
74
                # TODO: This creates odd keys. When count == 100,000, it
75
                #       creates a 240 byte key
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
76
                key = prefix + (str(pos) * 40,)
77
                value = "value:%s" % pos
78
                if reference_lists:
79
                    # generate some references
80
                    refs = []
81
                    for list_pos in range(reference_lists):
82
                        # as many keys in each list as its index + the key depth
83
                        # mod 2 - this generates both 0 length lists and
84
                        # ones slightly longer than the number of lists.
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
85
                        # It also ensures we have non homogeneous lists.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
86
                        refs.append([])
87
                        for ref_pos in range(list_pos + pos % 2):
88
                            if pos % 2:
89
                                # refer to a nearby key
90
                                refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
91
                            else:
92
                                # serial of this ref in the ref list
93
                                refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
94
                        refs[-1] = tuple(refs[-1])
95
                    refs = tuple(refs)
96
                else:
97
                    refs = ()
98
                keys.append((key, value, refs))
99
        return keys
100
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
101
    def shrink_page_size(self):
102
        """Shrink the default page size so that less fits in a page."""
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
103
        self.overrideAttr(btree_index, '_PAGE_SIZE')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
104
        btree_index._PAGE_SIZE = 2048
105
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
106
107
class TestBTreeBuilder(BTreeTestCase):
108
4744.2.7 by John Arbash Meinel
Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder.
109
    def test_clear_cache(self):
110
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
111
        # This is a no-op, but we need the api to be consistent with other
112
        # BTreeGraphIndex apis.
113
        builder.clear_cache()
114
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
115
    def test_empty_1_0(self):
116
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
117
        # NamedTemporaryFile dies on builder.finish().read(). weird.
118
        temp_file = builder.finish()
119
        content = temp_file.read()
120
        del temp_file
121
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
122
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
123
            "row_lengths=\n",
124
            content)
125
126
    def test_empty_2_1(self):
127
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
128
        # NamedTemporaryFile dies on builder.finish().read(). weird.
129
        temp_file = builder.finish()
130
        content = temp_file.read()
131
        del temp_file
132
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
133
            "B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
134
            "row_lengths=\n",
135
            content)
136
137
    def test_root_leaf_1_0(self):
138
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
139
        nodes = self.make_nodes(5, 1, 0)
140
        for node in nodes:
141
            builder.add_node(*node)
142
        # NamedTemporaryFile dies on builder.finish().read(). weird.
143
        temp_file = builder.finish()
144
        content = temp_file.read()
145
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
146
        self.assertEqual(131, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
147
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
148
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
149
            "row_lengths=1\n",
150
            content[:73])
151
        node_content = content[73:]
152
        node_bytes = zlib.decompress(node_content)
153
        expected_node = ("type=leaf\n"
154
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
155
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
156
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
157
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
158
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
159
        self.assertEqual(expected_node, node_bytes)
160
161
    def test_root_leaf_2_2(self):
162
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
163
        nodes = self.make_nodes(5, 2, 2)
164
        for node in nodes:
165
            builder.add_node(*node)
166
        # NamedTemporaryFile dies on builder.finish().read(). weird.
167
        temp_file = builder.finish()
168
        content = temp_file.read()
169
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
170
        self.assertEqual(238, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
171
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
172
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
173
            "row_lengths=1\n",
174
            content[:74])
175
        node_content = content[74:]
176
        node_bytes = zlib.decompress(node_content)
177
        expected_node = (
178
            "type=leaf\n"
179
            "0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
180
            "0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
181
            "0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
182
            "0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
183
            "0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
184
            "1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
185
            "1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
186
            "1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
187
            "1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
188
            "1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
189
            ""
190
            )
191
        self.assertEqual(expected_node, node_bytes)
192
193
    def test_2_leaves_1_0(self):
194
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
195
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
196
        for node in nodes:
197
            builder.add_node(*node)
198
        # NamedTemporaryFile dies on builder.finish().read(). weird.
199
        temp_file = builder.finish()
200
        content = temp_file.read()
201
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
202
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
203
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
204
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
205
            "row_lengths=1,2\n",
206
            content[:77])
207
        root = content[77:4096]
208
        leaf1 = content[4096:8192]
209
        leaf2 = content[8192:]
210
        root_bytes = zlib.decompress(root)
211
        expected_root = (
212
            "type=internal\n"
213
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
214
            ) + ("307" * 40) + "\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
215
        self.assertEqual(expected_root, root_bytes)
216
        # We already know serialisation works for leaves, check key selection:
217
        leaf1_bytes = zlib.decompress(leaf1)
218
        sorted_node_keys = sorted(node[0] for node in nodes)
219
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
220
        self.assertEqual(231, len(node))
221
        self.assertEqual(sorted_node_keys[:231], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
222
        leaf2_bytes = zlib.decompress(leaf2)
223
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
224
        self.assertEqual(400 - 231, len(node))
225
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
226
227
    def test_last_page_rounded_1_layer(self):
228
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
229
        nodes = self.make_nodes(10, 1, 0)
230
        for node in nodes:
231
            builder.add_node(*node)
232
        # NamedTemporaryFile dies on builder.finish().read(). weird.
233
        temp_file = builder.finish()
234
        content = temp_file.read()
235
        del temp_file
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
236
        self.assertEqual(155, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
237
        self.assertEqual(
3641.3.3 by John Arbash Meinel
Change the header to indicate these indexes are
238
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
239
            "row_lengths=1\n",
240
            content[:74])
241
        # Check thelast page is well formed
242
        leaf2 = content[74:]
243
        leaf2_bytes = zlib.decompress(leaf2)
244
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
245
        self.assertEqual(10, len(node))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
246
        sorted_node_keys = sorted(node[0] for node in nodes)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
247
        self.assertEqual(sorted_node_keys, node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
248
249
    def test_last_page_not_rounded_2_layer(self):
250
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
251
        nodes = self.make_nodes(400, 1, 0)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
252
        for node in nodes:
253
            builder.add_node(*node)
254
        # NamedTemporaryFile dies on builder.finish().read(). weird.
255
        temp_file = builder.finish()
256
        content = temp_file.read()
257
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
258
        self.assertEqual(9283, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
259
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
260
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
261
            "row_lengths=1,2\n",
262
            content[:77])
3641.3.30 by John Arbash Meinel
Rename _parse_btree to _btree_serializer
263
        # Check the last page is well formed
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
264
        leaf2 = content[8192:]
265
        leaf2_bytes = zlib.decompress(leaf2)
266
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
267
        self.assertEqual(400 - 231, len(node))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
268
        sorted_node_keys = sorted(node[0] for node in nodes)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
269
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
270
271
    def test_three_level_tree_details(self):
272
        # The left most pointer in the second internal node in a row should
273
        # pointer to the second node that the internal node is for, _not_
274
        # the first, otherwise the first node overlaps with the last node of
275
        # the prior internal node on that row.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
276
        self.shrink_page_size()
277
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
278
        # 40K nodes is enough to create a two internal nodes on the second
279
        # level, with a 2K page size
280
        nodes = self.make_nodes(20000, 2, 2)
3641.3.5 by John Arbash Meinel
For iter_all and three_level tests adjust spill-at.
281
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
282
        for node in nodes:
283
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
284
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
285
        size = t.put_file('index', self.time(builder.finish))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
286
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
287
        index = btree_index.BTreeGraphIndex(t, 'index', size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
288
        # Seed the metadata, we're using internal calls now.
289
        index.key_count()
290
        self.assertEqual(3, len(index._row_lengths),
291
            "Not enough rows: %r" % index._row_lengths)
292
        self.assertEqual(4, len(index._row_offsets))
293
        self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
294
        internal_nodes = index._get_internal_nodes([0, 1, 2])
295
        root_node = internal_nodes[0]
296
        internal_node1 = internal_nodes[1]
297
        internal_node2 = internal_nodes[2]
298
        # The left most node node2 points at should be one after the right most
299
        # node pointed at by node1.
300
        self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
301
        # The left most key of the second node pointed at by internal_node2
302
        # should be its first key. We can check this by looking for its first key
303
        # in the second node it points at
304
        pos = index._row_offsets[2] + internal_node2.offset + 1
305
        leaf = index._get_leaf_nodes([pos])[pos]
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
306
        self.assertTrue(internal_node2.keys[0] in leaf)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
307
308
    def test_2_leaves_2_2(self):
309
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
310
        nodes = self.make_nodes(100, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
311
        for node in nodes:
312
            builder.add_node(*node)
313
        # NamedTemporaryFile dies on builder.finish().read(). weird.
314
        temp_file = builder.finish()
315
        content = temp_file.read()
316
        del temp_file
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
317
        self.assertEqual(12643, len(content))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
318
        self.assertEqual(
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
319
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
320
            "row_lengths=1,3\n",
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
321
            content[:77])
322
        root = content[77:4096]
323
        leaf1 = content[4096:8192]
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
324
        leaf2 = content[8192:12288]
325
        leaf3 = content[12288:]
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
326
        root_bytes = zlib.decompress(root)
327
        expected_root = (
328
            "type=internal\n"
329
            "offset=0\n"
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
330
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
331
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
332
            )
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
333
        self.assertEqual(expected_root, root_bytes)
334
        # We assume the other leaf nodes have been written correctly - layering
335
        # FTW.
336
337
    def test_spill_index_stress_1_1(self):
338
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
339
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
340
        builder.add_node(*nodes[0])
341
        # Test the parts of the index that take up memory are doing so
342
        # predictably.
343
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
344
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
345
        builder.add_node(*nodes[1])
346
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
347
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
348
        self.assertEqual(1, len(builder._backing_indices))
349
        self.assertEqual(2, builder._backing_indices[0].key_count())
350
        # now back to memory
351
        builder.add_node(*nodes[2])
352
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
353
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
354
        # And spills to a second backing index combing all
355
        builder.add_node(*nodes[3])
356
        self.assertEqual(0, len(builder._nodes))
357
        self.assertIs(None, builder._nodes_by_key)
358
        self.assertEqual(2, len(builder._backing_indices))
359
        self.assertEqual(None, builder._backing_indices[0])
360
        self.assertEqual(4, builder._backing_indices[1].key_count())
361
        # The next spills to the 2-len slot
362
        builder.add_node(*nodes[4])
363
        builder.add_node(*nodes[5])
364
        self.assertEqual(0, len(builder._nodes))
365
        self.assertIs(None, builder._nodes_by_key)
366
        self.assertEqual(2, len(builder._backing_indices))
367
        self.assertEqual(2, builder._backing_indices[0].key_count())
368
        self.assertEqual(4, builder._backing_indices[1].key_count())
369
        # Next spill combines
370
        builder.add_node(*nodes[6])
371
        builder.add_node(*nodes[7])
372
        self.assertEqual(3, len(builder._backing_indices))
373
        self.assertEqual(None, builder._backing_indices[0])
374
        self.assertEqual(None, builder._backing_indices[1])
375
        self.assertEqual(8, builder._backing_indices[2].key_count())
376
        # And so forth - counting up in binary.
377
        builder.add_node(*nodes[8])
378
        builder.add_node(*nodes[9])
379
        self.assertEqual(3, len(builder._backing_indices))
380
        self.assertEqual(2, builder._backing_indices[0].key_count())
381
        self.assertEqual(None, builder._backing_indices[1])
382
        self.assertEqual(8, builder._backing_indices[2].key_count())
383
        builder.add_node(*nodes[10])
384
        builder.add_node(*nodes[11])
385
        self.assertEqual(3, len(builder._backing_indices))
386
        self.assertEqual(None, builder._backing_indices[0])
387
        self.assertEqual(4, builder._backing_indices[1].key_count())
388
        self.assertEqual(8, builder._backing_indices[2].key_count())
389
        builder.add_node(*nodes[12])
390
        # Test that memory and disk are both used for query methods; and that
391
        # None is skipped over happily.
392
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
393
            list(builder.iter_all_entries()))
394
        # Two nodes - one memory one disk
395
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
396
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
397
        self.assertEqual(13, builder.key_count())
398
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
399
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
400
        builder.add_node(*nodes[13])
401
        self.assertEqual(3, len(builder._backing_indices))
402
        self.assertEqual(2, builder._backing_indices[0].key_count())
403
        self.assertEqual(4, builder._backing_indices[1].key_count())
404
        self.assertEqual(8, builder._backing_indices[2].key_count())
405
        builder.add_node(*nodes[14])
406
        builder.add_node(*nodes[15])
407
        self.assertEqual(4, len(builder._backing_indices))
408
        self.assertEqual(None, builder._backing_indices[0])
409
        self.assertEqual(None, builder._backing_indices[1])
410
        self.assertEqual(None, builder._backing_indices[2])
411
        self.assertEqual(16, builder._backing_indices[3].key_count())
412
        # Now finish, and check we got a correctly ordered tree
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
413
        t = self.get_transport('')
414
        size = t.put_file('index', builder.finish())
415
        index = btree_index.BTreeGraphIndex(t, 'index', size)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
416
        nodes = list(index.iter_all_entries())
417
        self.assertEqual(sorted(nodes), nodes)
418
        self.assertEqual(16, len(nodes))
419
420
    def test_spill_index_stress_1_1_no_combine(self):
421
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
422
        builder.set_optimize(for_size=False, combine_backing_indices=False)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
423
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
424
        builder.add_node(*nodes[0])
425
        # Test the parts of the index that take up memory are doing so
426
        # predictably.
427
        self.assertEqual(1, len(builder._nodes))
428
        self.assertIs(None, builder._nodes_by_key)
429
        builder.add_node(*nodes[1])
430
        self.assertEqual(0, len(builder._nodes))
431
        self.assertIs(None, builder._nodes_by_key)
432
        self.assertEqual(1, len(builder._backing_indices))
433
        self.assertEqual(2, builder._backing_indices[0].key_count())
434
        # now back to memory
435
        builder.add_node(*nodes[2])
436
        self.assertEqual(1, len(builder._nodes))
437
        self.assertIs(None, builder._nodes_by_key)
438
        # And spills to a second backing index but doesn't combine
439
        builder.add_node(*nodes[3])
440
        self.assertEqual(0, len(builder._nodes))
441
        self.assertIs(None, builder._nodes_by_key)
442
        self.assertEqual(2, len(builder._backing_indices))
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
443
        for backing_index in builder._backing_indices:
444
            self.assertEqual(2, backing_index.key_count())
445
        # The next spills to the 3rd slot
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
446
        builder.add_node(*nodes[4])
447
        builder.add_node(*nodes[5])
448
        self.assertEqual(0, len(builder._nodes))
449
        self.assertIs(None, builder._nodes_by_key)
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
450
        self.assertEqual(3, len(builder._backing_indices))
451
        for backing_index in builder._backing_indices:
452
            self.assertEqual(2, backing_index.key_count())
453
        # Now spill a few more, and check that we don't combine
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
454
        builder.add_node(*nodes[6])
455
        builder.add_node(*nodes[7])
456
        builder.add_node(*nodes[8])
457
        builder.add_node(*nodes[9])
458
        builder.add_node(*nodes[10])
459
        builder.add_node(*nodes[11])
460
        builder.add_node(*nodes[12])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
461
        self.assertEqual(6, len(builder._backing_indices))
462
        for backing_index in builder._backing_indices:
463
            self.assertEqual(2, backing_index.key_count())
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
464
        # Test that memory and disk are both used for query methods; and that
465
        # None is skipped over happily.
466
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
467
            list(builder.iter_all_entries()))
468
        # Two nodes - one memory one disk
469
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
470
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
471
        self.assertEqual(13, builder.key_count())
472
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
473
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
474
        builder.add_node(*nodes[13])
475
        builder.add_node(*nodes[14])
476
        builder.add_node(*nodes[15])
4168.3.5 by John Arbash Meinel
Check that setting _combine_spilled_indices has the expected effect.
477
        self.assertEqual(8, len(builder._backing_indices))
478
        for backing_index in builder._backing_indices:
479
            self.assertEqual(2, backing_index.key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
480
        # Now finish, and check we got a correctly ordered tree
481
        transport = self.get_transport('')
482
        size = transport.put_file('index', builder.finish())
483
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
484
        nodes = list(index.iter_all_entries())
485
        self.assertEqual(sorted(nodes), nodes)
486
        self.assertEqual(16, len(nodes))
487
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
488
    def test_set_optimize(self):
489
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
490
        builder.set_optimize(for_size=True)
491
        self.assertTrue(builder._optimize_for_size)
492
        builder.set_optimize(for_size=False)
493
        self.assertFalse(builder._optimize_for_size)
4168.3.6 by John Arbash Meinel
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
494
        # test that we can set combine_backing_indices without effecting
495
        # _optimize_for_size
496
        obj = object()
497
        builder._optimize_for_size = obj
498
        builder.set_optimize(combine_backing_indices=False)
499
        self.assertFalse(builder._combine_backing_indices)
500
        self.assertIs(obj, builder._optimize_for_size)
501
        builder.set_optimize(combine_backing_indices=True)
502
        self.assertTrue(builder._combine_backing_indices)
503
        self.assertIs(obj, builder._optimize_for_size)
3777.5.3 by John Arbash Meinel
Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.
504
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
505
    def test_spill_index_stress_2_2(self):
506
        # test that references and longer keys don't confuse things.
507
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
508
            spill_at=2)
509
        nodes = self.make_nodes(16, 2, 2)
510
        builder.add_node(*nodes[0])
511
        # Test the parts of the index that take up memory are doing so
512
        # predictably.
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
513
        self.assertEqual(1, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
514
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
515
        builder.add_node(*nodes[1])
516
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
517
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
518
        self.assertEqual(1, len(builder._backing_indices))
519
        self.assertEqual(2, builder._backing_indices[0].key_count())
520
        # now back to memory
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
521
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
522
        builder.add_node(*nodes[2])
3644.2.1 by John Arbash Meinel
Change the IndexBuilders to not generate the nodes_by_key unless needed.
523
        self.assertEqual(1, len(builder._nodes))
3644.2.6 by John Arbash Meinel
Restore the exact old tests, only assert that
524
        self.assertIsNot(None, builder._nodes_by_key)
525
        self.assertNotEqual({}, builder._nodes_by_key)
526
        # We should have a new entry
527
        self.assertNotEqual(old, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
528
        # And spills to a second backing index combing all
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
529
        builder.add_node(*nodes[3])
530
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
531
        self.assertIs(None, builder._nodes_by_key)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
532
        self.assertEqual(2, len(builder._backing_indices))
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
533
        self.assertEqual(None, builder._backing_indices[0])
534
        self.assertEqual(4, builder._backing_indices[1].key_count())
535
        # The next spills to the 2-len slot
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
536
        builder.add_node(*nodes[4])
537
        builder.add_node(*nodes[5])
538
        self.assertEqual(0, len(builder._nodes))
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
539
        self.assertIs(None, builder._nodes_by_key)
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
540
        self.assertEqual(2, len(builder._backing_indices))
541
        self.assertEqual(2, builder._backing_indices[0].key_count())
542
        self.assertEqual(4, builder._backing_indices[1].key_count())
543
        # Next spill combines
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
544
        builder.add_node(*nodes[6])
545
        builder.add_node(*nodes[7])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
546
        self.assertEqual(3, len(builder._backing_indices))
547
        self.assertEqual(None, builder._backing_indices[0])
548
        self.assertEqual(None, builder._backing_indices[1])
549
        self.assertEqual(8, builder._backing_indices[2].key_count())
550
        # And so forth - counting up in binary.
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
551
        builder.add_node(*nodes[8])
552
        builder.add_node(*nodes[9])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
553
        self.assertEqual(3, len(builder._backing_indices))
554
        self.assertEqual(2, builder._backing_indices[0].key_count())
555
        self.assertEqual(None, builder._backing_indices[1])
556
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
557
        builder.add_node(*nodes[10])
558
        builder.add_node(*nodes[11])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
559
        self.assertEqual(3, len(builder._backing_indices))
560
        self.assertEqual(None, builder._backing_indices[0])
561
        self.assertEqual(4, builder._backing_indices[1].key_count())
562
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
563
        builder.add_node(*nodes[12])
564
        # Test that memory and disk are both used for query methods; and that
565
        # None is skipped over happily.
566
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
567
            list(builder.iter_all_entries()))
568
        # Two nodes - one memory one disk
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
569
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
570
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
571
        self.assertEqual(13, builder.key_count())
3644.2.5 by John Arbash Meinel
Restore the exact old tests, only assert that
572
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
573
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
574
        builder.add_node(*nodes[13])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
575
        self.assertEqual(3, len(builder._backing_indices))
576
        self.assertEqual(2, builder._backing_indices[0].key_count())
577
        self.assertEqual(4, builder._backing_indices[1].key_count())
578
        self.assertEqual(8, builder._backing_indices[2].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
579
        builder.add_node(*nodes[14])
580
        builder.add_node(*nodes[15])
4168.3.4 by John Arbash Meinel
Restore the ability to spill, but prepare a flag to disable it.
581
        self.assertEqual(4, len(builder._backing_indices))
582
        self.assertEqual(None, builder._backing_indices[0])
583
        self.assertEqual(None, builder._backing_indices[1])
584
        self.assertEqual(None, builder._backing_indices[2])
585
        self.assertEqual(16, builder._backing_indices[3].key_count())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
586
        # Now finish, and check we got a correctly ordered tree
587
        transport = self.get_transport('')
588
        size = transport.put_file('index', builder.finish())
589
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
590
        nodes = list(index.iter_all_entries())
591
        self.assertEqual(sorted(nodes), nodes)
592
        self.assertEqual(16, len(nodes))
593
594
    def test_spill_index_duplicate_key_caught_on_finish(self):
595
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
596
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
597
        builder.add_node(*nodes[0])
598
        builder.add_node(*nodes[1])
599
        builder.add_node(*nodes[0])
600
        self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
601
602
603
class TestBTreeIndex(BTreeTestCase):
604
605
    def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
606
        builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
607
            key_elements=key_elements)
608
        for key, value, references in nodes:
609
            builder.add_node(key, value, references)
610
        stream = builder.finish()
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
611
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
612
        size = trans.put_file('index', stream)
613
        return btree_index.BTreeGraphIndex(trans, 'index', size)
614
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
615
    def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
616
                               offset=0):
617
        builder = btree_index.BTreeBuilder(key_elements=key_elements,
618
                                           reference_lists=ref_lists)
619
        builder.add_nodes(nodes)
620
        transport = self.get_transport('')
5074.4.7 by John Arbash Meinel
Workaround a strange bug about the file closing before it is read.
621
        # NamedTemporaryFile dies on builder.finish().read(). weird.
622
        temp_file = builder.finish()
623
        content = temp_file.read()
624
        del temp_file
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
625
        size = len(content)
626
        transport.put_bytes('index', (' '*offset)+content)
627
        return btree_index.BTreeGraphIndex(transport, 'index', size=size,
628
                                           offset=offset)
629
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
630
    def test_clear_cache(self):
631
        nodes = self.make_nodes(160, 2, 2)
632
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
633
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
634
        self.assertEqual([1, 4], index._row_lengths)
635
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
636
        internal_node_pre_clear = index._internal_node_cache.keys()
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
637
        self.assertTrue(len(index._leaf_node_cache) > 0)
638
        index.clear_cache()
639
        # We don't touch _root_node or _internal_node_cache, both should be
640
        # small, and can save a round trip or two
641
        self.assertIsNot(None, index._root_node)
4744.2.9 by John Arbash Meinel
Move the note and assertions.
642
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
643
        #       it will be small, and if we ever do touch this index again, it
644
        #       will save round-trips.  This assertion isn't very strong,
645
        #       becuase without a 3-level index, we don't have any internal
646
        #       nodes cached.
647
        self.assertEqual(internal_node_pre_clear,
648
                         index._internal_node_cache.keys())
4744.2.6 by John Arbash Meinel
Start exposing an GraphIndex.clear_cache() member.
649
        self.assertEqual(0, len(index._leaf_node_cache))
650
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
651
    def test_trivial_constructor(self):
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
652
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
653
        index = btree_index.BTreeGraphIndex(t, 'index', None)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
654
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
655
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
656
657
    def test_with_size_constructor(self):
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
658
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
659
        index = btree_index.BTreeGraphIndex(t, 'index', 1)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
660
        # Checks the page size at load, but that isn't logged yet.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
661
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
662
663
    def test_empty_key_count_no_size(self):
664
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
665
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
666
        t.put_file('index', builder.finish())
667
        index = btree_index.BTreeGraphIndex(t, 'index', None)
668
        del t._activity[:]
669
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
670
        self.assertEqual(0, index.key_count())
671
        # The entire index should have been requested (as we generally have the
672
        # size available, and doing many small readvs is inappropriate).
673
        # We can't tell how much was actually read here, but - check the code.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
674
        self.assertEqual([('get', 'index')], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
675
676
    def test_empty_key_count(self):
677
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
678
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
679
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
680
        self.assertEqual(72, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
681
        index = btree_index.BTreeGraphIndex(t, 'index', size)
682
        del t._activity[:]
683
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
684
        self.assertEqual(0, index.key_count())
685
        # The entire index should have been read, as 4K > size
686
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
687
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
688
689
    def test_non_empty_key_count_2_2(self):
690
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
691
        nodes = self.make_nodes(35, 2, 2)
692
        for node in nodes:
693
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
694
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
695
        size = t.put_file('index', builder.finish())
696
        index = btree_index.BTreeGraphIndex(t, 'index', size)
697
        del t._activity[:]
698
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
699
        self.assertEqual(70, index.key_count())
700
        # The entire index should have been read, as it is one page long.
701
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
702
            t._activity)
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
703
        self.assertEqual(1173, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
704
5074.4.1 by John Arbash Meinel
Add an offset flag to BTreeGraphIndex.
705
    def test_with_offset_no_size(self):
706
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
707
                                            offset=1234,
708
                                            nodes=self.make_nodes(200, 1, 1))
709
        index._size = None # throw away the size info
710
        self.assertEqual(200, index.key_count())
711
712
    def test_with_small_offset(self):
713
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
714
                                            offset=1234,
715
                                            nodes=self.make_nodes(200, 1, 1))
716
        self.assertEqual(200, index.key_count())
717
718
    def test_with_large_offset(self):
719
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
720
                                            offset=123456,
721
                                            nodes=self.make_nodes(200, 1, 1))
722
        self.assertEqual(200, index.key_count())
723
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
724
    def test__read_nodes_no_size_one_page_reads_once(self):
725
        self.make_index(nodes=[(('key',), 'value', ())])
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
726
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
727
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
728
        del trans._activity[:]
729
        nodes = dict(index._read_nodes([0]))
730
        self.assertEqual([0], nodes.keys())
731
        node = nodes[0]
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
732
        self.assertEqual([('key',)], node.all_keys())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
733
        self.assertEqual([('get', 'index')], trans._activity)
734
735
    def test__read_nodes_no_size_multiple_pages(self):
736
        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
737
        index.key_count()
738
        num_pages = index._row_offsets[-1]
739
        # Reopen with a traced transport and no size
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
740
        trans = transport.get_transport_from_url('trace+' + self.get_url())
3824.1.1 by John Arbash Meinel
Fix _read_nodes() to only issue a single read if there is no known size.
741
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
742
        del trans._activity[:]
743
        nodes = dict(index._read_nodes([0]))
744
        self.assertEqual(range(num_pages), nodes.keys())
745
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
746
    def test_2_levels_key_count_2_2(self):
747
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
748
        nodes = self.make_nodes(160, 2, 2)
749
        for node in nodes:
750
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
751
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
752
        size = t.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
753
        self.assertEqual(17692, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
754
        index = btree_index.BTreeGraphIndex(t, 'index', size)
755
        del t._activity[:]
756
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
757
        self.assertEqual(320, index.key_count())
758
        # The entire index should not have been read.
759
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
760
                         t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
761
762
    def test_validate_one_page(self):
763
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
764
        nodes = self.make_nodes(45, 2, 2)
765
        for node in nodes:
766
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
767
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
768
        size = t.put_file('index', builder.finish())
769
        index = btree_index.BTreeGraphIndex(t, 'index', size)
770
        del t._activity[:]
771
        self.assertEqual([], t._activity)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
772
        index.validate()
773
        # The entire index should have been read linearly.
774
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
775
                         t._activity)
4771.3.1 by John Arbash Meinel
We don't have to pad 'short' records.
776
        self.assertEqual(1488, size)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
777
778
    def test_validate_two_pages(self):
779
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
780
        nodes = self.make_nodes(80, 2, 2)
781
        for node in nodes:
782
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
783
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
784
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
785
        # Root page, 2 leaf pages
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
786
        self.assertEqual(9339, size)
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
787
        index = btree_index.BTreeGraphIndex(t, 'index', size)
788
        del t._activity[:]
789
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
790
        index.validate()
791
        # The entire index should have been read linearly.
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
792
        self.assertEqual(
793
            [('readv', 'index', [(0, 4096)], False, None),
794
             ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
795
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
796
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
797
        # node and make validate find them.
798
799
    def test_eq_ne(self):
800
        # two indices are equal when constructed with the same parameters:
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
801
        t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
5609.9.4 by Vincent Ladeuil
Use self.get_transport instead of transport.get_transport where possible.
802
        t2 = self.get_transport()
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
803
        self.assertTrue(
804
            btree_index.BTreeGraphIndex(t1, 'index', None) ==
805
            btree_index.BTreeGraphIndex(t1, 'index', None))
806
        self.assertTrue(
807
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
808
            btree_index.BTreeGraphIndex(t1, 'index', 20))
809
        self.assertFalse(
810
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
811
            btree_index.BTreeGraphIndex(t2, 'index', 20))
812
        self.assertFalse(
813
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
814
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
815
        self.assertFalse(
816
            btree_index.BTreeGraphIndex(t1, 'index', 10) ==
817
            btree_index.BTreeGraphIndex(t1, 'index', 20))
818
        self.assertFalse(
819
            btree_index.BTreeGraphIndex(t1, 'index', None) !=
820
            btree_index.BTreeGraphIndex(t1, 'index', None))
821
        self.assertFalse(
822
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
823
            btree_index.BTreeGraphIndex(t1, 'index', 20))
824
        self.assertTrue(
825
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
826
            btree_index.BTreeGraphIndex(t2, 'index', 20))
827
        self.assertTrue(
828
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
829
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
830
        self.assertTrue(
831
            btree_index.BTreeGraphIndex(t1, 'index', 10) !=
832
            btree_index.BTreeGraphIndex(t1, 'index', 20))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
833
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
834
    def test_iter_all_only_root_no_size(self):
835
        self.make_index(nodes=[(('key',), 'value', ())])
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
836
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
837
        index = btree_index.BTreeGraphIndex(t, 'index', None)
838
        del t._activity[:]
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
839
        self.assertEqual([(('key',), 'value')],
840
                         [x[1:] for x in index.iter_all_entries()])
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
841
        self.assertEqual([('get', 'index')], t._activity)
3824.1.2 by John Arbash Meinel
iter_all_entries() shouldn't need to re-read the page.
842
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
843
    def test_iter_all_entries_reads(self):
844
        # iterating all entries reads the header, then does a linear
845
        # read.
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
846
        self.shrink_page_size()
3641.5.11 by John Arbash Meinel
Some small test cleanup
847
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
848
        # 20k nodes is enough to create a two internal nodes on the second
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
849
        # level, with a 2K page size
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
850
        nodes = self.make_nodes(10000, 2, 2)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
851
        for node in nodes:
852
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
853
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
854
        size = t.put_file('index', builder.finish())
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
855
        self.assertEqual(1303220, size, 'number of expected bytes in the'
856
                                        ' output changed')
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
857
        page_size = btree_index._PAGE_SIZE
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
858
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
859
        index = btree_index.BTreeGraphIndex(t, 'index', size)
860
        del t._activity[:]
861
        self.assertEqual([], t._activity)
3641.5.8 by John Arbash Meinel
Fix up the test suite, now that we pack more efficiently.
862
        found_nodes = self.time(list, index.iter_all_entries())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
863
        bare_nodes = []
864
        for node in found_nodes:
865
            self.assertTrue(node[0] is index)
866
            bare_nodes.append(node[1:])
867
        self.assertEqual(3, len(index._row_lengths),
868
            "Not enough rows: %r" % index._row_lengths)
869
        # Should be as long as the nodes we supplied
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
870
        self.assertEqual(20000, len(found_nodes))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
871
        # Should have the same content
872
        self.assertEqual(set(nodes), set(bare_nodes))
873
        # Should have done linear scan IO up the index, ignoring
874
        # the internal nodes:
875
        # The entire index should have been read
876
        total_pages = sum(index._row_lengths)
877
        self.assertEqual(total_pages, index._row_offsets[-1])
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
878
        self.assertEqual(1303220, size)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
879
        # The start of the leaves
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
880
        first_byte = index._row_offsets[-2] * page_size
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
881
        readv_request = []
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
882
        for offset in range(first_byte, size, page_size):
883
            readv_request.append((offset, page_size))
3641.3.6 by John Arbash Meinel
Dropping the number of nodes to 100k from 200k
884
        # The last page is truncated
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
885
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
3641.5.9 by John Arbash Meinel
Shrink the page size so that the three_level and iter_all
886
        expected = [('readv', 'index', [(0, page_size)], False, None),
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
887
             ('readv',  'index', readv_request, False, None)]
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
888
        if expected != t._activity:
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
889
            self.assertEqualDiff(pprint.pformat(expected),
890
                                 pprint.pformat(transport._activity))
891
892
    def _test_iter_entries_references_resolved(self):
893
        index = self.make_index(1, nodes=[
894
            (('name', ), 'data', ([('ref', ), ('ref', )], )),
895
            (('ref', ), 'refdata', ([], ))])
896
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
897
            (index, ('ref', ), 'refdata', ((), ))]),
898
            set(index.iter_entries([('name',), ('ref',)])))
899
900
    def test_iter_entries_references_2_refs_resolved(self):
901
        # iterating some entries reads just the pages needed. For now, to
902
        # get it working and start measuring, only 4K pages are read.
903
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
904
        # 80 nodes is enough to create a two-level index.
905
        nodes = self.make_nodes(160, 2, 2)
906
        for node in nodes:
907
            builder.add_node(*node)
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
908
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
909
        size = t.put_file('index', builder.finish())
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
910
        del builder
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
911
        index = btree_index.BTreeGraphIndex(t, 'index', size)
912
        del t._activity[:]
913
        self.assertEqual([], t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
914
        # search for one key
915
        found_nodes = list(index.iter_entries([nodes[30][0]]))
916
        bare_nodes = []
917
        for node in found_nodes:
918
            self.assertTrue(node[0] is index)
919
            bare_nodes.append(node[1:])
920
        # Should be as long as the nodes we supplied
921
        self.assertEqual(1, len(found_nodes))
922
        # Should have the same content
923
        self.assertEqual(nodes[30], bare_nodes[0])
924
        # Should have read the root node, then one leaf page:
925
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
3641.5.17 by John Arbash Meinel
Fix up the test suite now that things don't pack as well.
926
             ('readv',  'index', [(8192, 4096), ], False, None)],
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
927
            t._activity)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
928
929
    def test_iter_key_prefix_1_element_key_None(self):
930
        index = self.make_index()
931
        self.assertRaises(errors.BadIndexKey, list,
932
            index.iter_entries_prefix([(None, )]))
933
934
    def test_iter_key_prefix_wrong_length(self):
935
        index = self.make_index()
936
        self.assertRaises(errors.BadIndexKey, list,
937
            index.iter_entries_prefix([('foo', None)]))
938
        index = self.make_index(key_elements=2)
939
        self.assertRaises(errors.BadIndexKey, list,
940
            index.iter_entries_prefix([('foo', )]))
941
        self.assertRaises(errors.BadIndexKey, list,
942
            index.iter_entries_prefix([('foo', None, None)]))
943
944
    def test_iter_key_prefix_1_key_element_no_refs(self):
945
        index = self.make_index( nodes=[
946
            (('name', ), 'data', ()),
947
            (('ref', ), 'refdata', ())])
948
        self.assertEqual(set([(index, ('name', ), 'data'),
949
            (index, ('ref', ), 'refdata')]),
950
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
951
952
    def test_iter_key_prefix_1_key_element_refs(self):
953
        index = self.make_index(1, nodes=[
954
            (('name', ), 'data', ([('ref', )], )),
955
            (('ref', ), 'refdata', ([], ))])
956
        self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
957
            (index, ('ref', ), 'refdata', ((), ))]),
958
            set(index.iter_entries_prefix([('name', ), ('ref', )])))
959
960
    def test_iter_key_prefix_2_key_element_no_refs(self):
961
        index = self.make_index(key_elements=2, nodes=[
962
            (('name', 'fin1'), 'data', ()),
963
            (('name', 'fin2'), 'beta', ()),
964
            (('ref', 'erence'), 'refdata', ())])
965
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
966
            (index, ('ref', 'erence'), 'refdata')]),
967
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
968
        self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
969
            (index, ('name', 'fin2'), 'beta')]),
970
            set(index.iter_entries_prefix([('name', None)])))
971
972
    def test_iter_key_prefix_2_key_element_refs(self):
973
        index = self.make_index(1, key_elements=2, nodes=[
974
            (('name', 'fin1'), 'data', ([('ref', 'erence')], )),
975
            (('name', 'fin2'), 'beta', ([], )),
976
            (('ref', 'erence'), 'refdata', ([], ))])
977
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
978
            (index, ('ref', 'erence'), 'refdata', ((), ))]),
979
            set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
980
        self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
981
            (index, ('name', 'fin2'), 'beta', ((), ))]),
982
            set(index.iter_entries_prefix([('name', None)])))
983
4011.5.5 by Andrew Bennetts
Fix typo in comment.
984
    # XXX: external_references tests are duplicated in test_index.  We
4011.5.3 by Andrew Bennetts
Implement and test external_references on GraphIndex and BTreeGraphIndex.
985
    # probably should have per_graph_index tests...
986
    def test_external_references_no_refs(self):
987
        index = self.make_index(ref_lists=0, nodes=[])
988
        self.assertRaises(ValueError, index.external_references, 0)
989
990
    def test_external_references_no_results(self):
991
        index = self.make_index(ref_lists=1, nodes=[
992
            (('key',), 'value', ([],))])
993
        self.assertEqual(set(), index.external_references(0))
994
995
    def test_external_references_missing_ref(self):
996
        missing_key = ('missing',)
997
        index = self.make_index(ref_lists=1, nodes=[
998
            (('key',), 'value', ([missing_key],))])
999
        self.assertEqual(set([missing_key]), index.external_references(0))
1000
1001
    def test_external_references_multiple_ref_lists(self):
1002
        missing_key = ('missing',)
1003
        index = self.make_index(ref_lists=2, nodes=[
1004
            (('key',), 'value', ([], [missing_key]))])
1005
        self.assertEqual(set([]), index.external_references(0))
1006
        self.assertEqual(set([missing_key]), index.external_references(1))
1007
1008
    def test_external_references_two_records(self):
1009
        index = self.make_index(ref_lists=1, nodes=[
1010
            (('key-1',), 'value', ([('key-2',)],)),
1011
            (('key-2',), 'value', ([],)),
1012
            ])
1013
        self.assertEqual(set([]), index.external_references(0))
1014
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1015
    def test__find_ancestors_one_page(self):
4593.4.5 by John Arbash Meinel
Start adding some tests.
1016
        key1 = ('key-1',)
1017
        key2 = ('key-2',)
1018
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1019
            (key1, 'value', ([key2],)),
1020
            (key2, 'value', ([],)),
1021
            ])
1022
        parent_map = {}
1023
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1024
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1025
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1026
        self.assertEqual(set(), missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1027
        self.assertEqual(set(), search_keys)
4593.4.5 by John Arbash Meinel
Start adding some tests.
1028
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1029
    def test__find_ancestors_one_page_w_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1030
        key1 = ('key-1',)
1031
        key2 = ('key-2',)
1032
        key3 = ('key-3',)
1033
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1034
            (key1, 'value', ([key2],)),
1035
            (key2, 'value', ([],)),
1036
            ])
1037
        parent_map = {}
1038
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1039
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1040
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1041
        self.assertEqual({key2: ()}, parent_map)
1042
        # we know that key3 is missing because we read the page that it would
1043
        # otherwise be on
1044
        self.assertEqual(set([key3]), missing_keys)
1045
        self.assertEqual(set(), search_keys)
1046
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1047
    def test__find_ancestors_one_parent_missing(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1048
        key1 = ('key-1',)
1049
        key2 = ('key-2',)
1050
        key3 = ('key-3',)
1051
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1052
            (key1, 'value', ([key2],)),
1053
            (key2, 'value', ([key3],)),
1054
            ])
1055
        parent_map = {}
1056
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1057
        search_keys = index._find_ancestors([key1], 0, parent_map,
1058
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1059
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1060
        self.assertEqual(set(), missing_keys)
1061
        # all we know is that key3 wasn't present on the page we were reading
1062
        # but if you look, the last key is key2 which comes before key3, so we
1063
        # don't know whether key3 would land on this page or not.
1064
        self.assertEqual(set([key3]), search_keys)
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1065
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1066
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1067
        # passing it back in, we are sure it is 'missing'
1068
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1069
        self.assertEqual(set([key3]), missing_keys)
1070
        self.assertEqual(set([]), search_keys)
1071
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1072
    def test__find_ancestors_dont_search_known(self):
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1073
        key1 = ('key-1',)
1074
        key2 = ('key-2',)
1075
        key3 = ('key-3',)
1076
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1077
            (key1, 'value', ([key2],)),
1078
            (key2, 'value', ([key3],)),
1079
            (key3, 'value', ([],)),
1080
            ])
1081
        # We already know about key2, so we won't try to search for key3
1082
        parent_map = {key2: (key3,)}
1083
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1084
        search_keys = index._find_ancestors([key1], 0, parent_map,
1085
                                            missing_keys)
4593.4.7 by John Arbash Meinel
Basic implementation of a conforming interface for GraphIndex.
1086
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1087
        self.assertEqual(set(), missing_keys)
1088
        self.assertEqual(set(), search_keys)
1089
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1090
    def test__find_ancestors_multiple_pages(self):
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1091
        # We need to use enough keys that we actually cause a split
1092
        start_time = 1249671539
1093
        email = "joebob@example.com"
1094
        nodes = []
1095
        ref_lists = ((),)
1096
        rev_keys = []
1097
        for i in xrange(400):
1098
            rev_id = '%s-%s-%s' % (email,
1099
                                   osutils.compact_date(start_time + i),
1100
                                   osutils.rand_chars(16))
1101
            rev_key = (rev_id,)
1102
            nodes.append((rev_key, 'value', ref_lists))
1103
            # We have a ref 'list' of length 1, with a list of parents, with 1
1104
            # parent which is a key
1105
            ref_lists = ((rev_key,),)
1106
            rev_keys.append(rev_key)
1107
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1108
        self.assertEqual(400, index.key_count())
1109
        self.assertEqual(3, len(index._row_offsets))
1110
        nodes = dict(index._read_nodes([1, 2]))
1111
        l1 = nodes[1]
1112
        l2 = nodes[2]
1113
        min_l2_key = l2.min_key
1114
        max_l1_key = l1.max_key
1115
        self.assertTrue(max_l1_key < min_l2_key)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1116
        parents_min_l2_key = l2[min_l2_key][1][0]
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1117
        self.assertEqual((l1.max_key,), parents_min_l2_key)
1118
        # Now, whatever key we select that would fall on the second page,
1119
        # should give us all the parents until the page break
1120
        key_idx = rev_keys.index(min_l2_key)
1121
        next_key = rev_keys[key_idx+1]
1122
        # So now when we get the parent map, we should get the key we are
1123
        # looking for, min_l2_key, and then a reference to go look for the
1124
        # parent of that key
1125
        parent_map = {}
1126
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1127
        search_keys = index._find_ancestors([next_key], 0, parent_map,
1128
                                            missing_keys)
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1129
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1130
        self.assertEqual(set(), missing_keys)
1131
        self.assertEqual(set([max_l1_key]), search_keys)
1132
        parent_map = {}
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1133
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1134
                                            missing_keys)
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1135
        self.assertEqual(l1.all_keys(), sorted(parent_map))
4593.4.6 by John Arbash Meinel
Add a test that we can walk some of the keys on a given page
1136
        self.assertEqual(set(), missing_keys)
1137
        self.assertEqual(set(), search_keys)
1138
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1139
    def test__find_ancestors_empty_index(self):
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1140
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1141
        parent_map = {}
1142
        missing_keys = set()
4593.4.12 by John Arbash Meinel
Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()
1143
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1144
                                            missing_keys)
4593.4.11 by John Arbash Meinel
Snapshot the work in progress.
1145
        self.assertEqual(set(), search_keys)
1146
        self.assertEqual({}, parent_map)
1147
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
1148
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1149
    def test_supports_unlimited_cache(self):
1150
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1151
        # We need enough nodes to cause a page split (so we have both an
1152
        # internal node and a couple leaf nodes. 500 seems to be enough.)
4634.71.2 by John Arbash Meinel
If we are going to sometimes use a dict, we have to conform to just the dict interface.
1153
        nodes = self.make_nodes(500, 1, 0)
1154
        for node in nodes:
1155
            builder.add_node(*node)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1156
        stream = builder.finish()
5609.9.4 by Vincent Ladeuil
Use self.get_transport instead of transport.get_transport where possible.
1157
        trans = self.get_transport()
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1158
        size = trans.put_file('index', stream)
1159
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1160
        self.assertEqual(500, index.key_count())
1161
        # We have an internal node
1162
        self.assertEqual(2, len(index._row_lengths))
1163
        # We have at least 2 leaf nodes
1164
        self.assertTrue(index._row_lengths[-1] >= 2)
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1165
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1166
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1167
                         index._leaf_node_cache._max_cache)
1168
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1169
        self.assertEqual(100, index._internal_node_cache._max_cache)
1170
        # No change if unlimited_cache=False is passed
1171
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1172
                                            unlimited_cache=False)
1173
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1174
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1175
                         index._leaf_node_cache._max_cache)
1176
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1177
        self.assertEqual(100, index._internal_node_cache._max_cache)
1178
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1179
                                            unlimited_cache=True)
1180
        self.assertIsInstance(index._leaf_node_cache, dict)
1181
        self.assertIs(type(index._internal_node_cache), dict)
4634.71.3 by John Arbash Meinel
Add some comments to test_supports_unlimited_cache, as mentioned by Vincent.
1182
        # Exercise the lookup code
1183
        entries = set(index.iter_entries([n[0] for n in nodes]))
1184
        self.assertEqual(500, len(entries))
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
1185
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1186
1187
class TestBTreeNodes(BTreeTestCase):
1188
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
1189
    scenarios = btreeparser_scenarios()
1190
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1191
    def setUp(self):
1192
        BTreeTestCase.setUp(self)
4985.1.5 by Vincent Ladeuil
Deploying the new overrideAttr facility further reduces the complexity
1193
        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1194
1195
    def test_LeafNode_1_0(self):
1196
        node_bytes = ("type=leaf\n"
1197
            "0000000000000000000000000000000000000000\x00\x00value:0\n"
1198
            "1111111111111111111111111111111111111111\x00\x00value:1\n"
1199
            "2222222222222222222222222222222222222222\x00\x00value:2\n"
1200
            "3333333333333333333333333333333333333333\x00\x00value:3\n"
1201
            "4444444444444444444444444444444444444444\x00\x00value:4\n")
1202
        node = btree_index._LeafNode(node_bytes, 1, 0)
1203
        # We do direct access, or don't care about order, to leaf nodes most of
1204
        # the time, so a dict is useful:
1205
        self.assertEqual({
1206
            ("0000000000000000000000000000000000000000",): ("value:0", ()),
1207
            ("1111111111111111111111111111111111111111",): ("value:1", ()),
1208
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
1209
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
1210
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1211
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1212
1213
    def test_LeafNode_2_2(self):
1214
        node_bytes = ("type=leaf\n"
1215
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1216
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1217
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1218
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1219
            ""
1220
            )
1221
        node = btree_index._LeafNode(node_bytes, 2, 2)
1222
        # We do direct access, or don't care about order, to leaf nodes most of
1223
        # the time, so a dict is useful:
1224
        self.assertEqual({
1225
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1226
            ('00', '11'): ('value:1',
1227
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1228
            ('11', '33'): ('value:3',
1229
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1230
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1231
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1232
1233
    def test_InternalNode_1(self):
1234
        node_bytes = ("type=internal\n"
1235
            "offset=1\n"
1236
            "0000000000000000000000000000000000000000\n"
1237
            "1111111111111111111111111111111111111111\n"
1238
            "2222222222222222222222222222222222222222\n"
1239
            "3333333333333333333333333333333333333333\n"
1240
            "4444444444444444444444444444444444444444\n"
1241
            )
1242
        node = btree_index._InternalNode(node_bytes)
1243
        # We want to bisect to find the right children from this node, so a
1244
        # vector is most useful.
1245
        self.assertEqual([
1246
            ("0000000000000000000000000000000000000000",),
1247
            ("1111111111111111111111111111111111111111",),
1248
            ("2222222222222222222222222222222222222222",),
1249
            ("3333333333333333333333333333333333333333",),
1250
            ("4444444444444444444444444444444444444444",),
1251
            ], node.keys)
1252
        self.assertEqual(1, node.offset)
1253
1254
    def test_LeafNode_2_2(self):
1255
        node_bytes = ("type=leaf\n"
1256
            "00\x0000\x00\t00\x00ref00\x00value:0\n"
1257
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1258
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1259
            "11\x0044\x00\t11\x00ref00\x00value:4\n"
1260
            ""
1261
            )
1262
        node = btree_index._LeafNode(node_bytes, 2, 2)
1263
        # We do direct access, or don't care about order, to leaf nodes most of
1264
        # the time, so a dict is useful:
1265
        self.assertEqual({
1266
            ('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1267
            ('00', '11'): ('value:1',
1268
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1269
            ('11', '33'): ('value:3',
1270
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1271
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
5365.5.1 by John Arbash Meinel
Implement a custom parser for chk btree leaves.
1272
            }, dict(node.all_items()))
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1273
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1274
    def assertFlattened(self, expected, key, value, refs):
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1275
        flat_key, flat_line = self.parse_btree._flatten_node(
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1276
            (None, key, value, refs), bool(refs))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1277
        self.assertEqual('\x00'.join(key), flat_key)
1278
        self.assertEqual(expected, flat_line)
1279
3641.3.19 by John Arbash Meinel
The flatten code now handles the no-ref-list case.
1280
    def test__flatten_node(self):
1281
        self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1282
        self.assertFlattened('key\0tuple\0\0value str\n',
1283
                             ('key', 'tuple'), 'value str', [])
1284
        self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1285
                             ('key', 'tuple', 'triple'), 'value str', [])
1286
        self.assertFlattened('k\0t\0s\0ref\0value str\n',
1287
                             ('k', 't', 's'), 'value str', [[('ref',)]])
1288
        self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1289
                             ('key', 'tuple'), 'value str', [[('ref', 'key')]])
1290
        self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1291
            ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1292
        self.assertFlattened(
1293
            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1294
            ('00', '11'), 'value:1',
1295
                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1296
        self.assertFlattened(
1297
            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1298
            ('11', '33'), 'value:3',
1299
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1300
        self.assertFlattened(
1301
            "11\x0044\x00\t11\x00ref00\x00value:4\n",
1302
            ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
3641.3.18 by John Arbash Meinel
Start working on a compiled function for transforming
1303
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1304
1305
class TestCompiledBtree(tests.TestCase):
1306
1307
    def test_exists(self):
1308
        # This is just to let the user know if they don't have the feature
1309
        # available
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
1310
        self.requireFeature(compiled_btreeparser_feature)
3641.3.1 by John Arbash Meinel
Bring in the btree_index and chunk_writer code and their tests.
1311
1312
1313
class TestMultiBisectRight(tests.TestCase):
1314
1315
    def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1316
        self.assertEqual(offsets,
1317
                         btree_index.BTreeGraphIndex._multi_bisect_right(
1318
                            search_keys, fixed_keys))
1319
1320
    def test_after(self):
1321
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1322
        self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1323
                                    ['e', 'f', 'g'], ['a', 'b', 'c'])
1324
1325
    def test_before(self):
1326
        self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1327
        self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1328
                                    ['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1329
1330
    def test_exact(self):
1331
        self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1332
        self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1333
        self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1334
                                    ['a', 'c'], ['a', 'b', 'c'])
1335
1336
    def test_inbetween(self):
1337
        self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1338
        self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1339
                                    ['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1340
1341
    def test_mixed(self):
1342
        self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1343
                                     (4, ['g', 'h'])],
1344
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1345
                                    ['c', 'd', 'f', 'g'])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1346
1347
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1348
class TestExpandOffsets(tests.TestCase):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1349
1350
    def make_index(self, size, recommended_pages=None):
1351
        """Make an index with a generic size.
1352
1353
        This doesn't actually create anything on disk, it just primes a
1354
        BTreeGraphIndex with the recommended information.
1355
        """
5273.1.7 by Vincent Ladeuil
No more use of the get_transport imported *symbol*, all uses are through
1356
        index = btree_index.BTreeGraphIndex(
6083.1.1 by Jelmer Vernooij
Use get_transport_from_{url,path} in more places.
1357
            transport.get_transport_from_url('memory:///'),
1358
            'test-index', size=size)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1359
        if recommended_pages is not None:
1360
            index._recommended_pages = recommended_pages
1361
        return index
1362
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1363
    def set_cached_offsets(self, index, cached_offsets):
1364
        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1365
        def _get_offsets_to_cached_pages():
1366
            cached = set(cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1367
            return cached
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1368
        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1369
1370
    def prepare_index(self, index, node_ref_lists, key_length, key_count,
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1371
                      row_lengths, cached_offsets):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1372
        """Setup the BTreeGraphIndex with some pre-canned information."""
1373
        index.node_ref_lists = node_ref_lists
1374
        index._key_length = key_length
1375
        index._key_count = key_count
1376
        index._row_lengths = row_lengths
1377
        index._compute_row_offsets()
1378
        index._root_node = btree_index._InternalNode('internal\noffset=0\n')
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1379
        self.set_cached_offsets(index, cached_offsets)
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1380
1381
    def make_100_node_index(self):
1382
        index = self.make_index(4096*100, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1383
        # Consider we've already made a single request at the middle
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1384
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1385
                           key_count=1000, row_lengths=[1, 99],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1386
                           cached_offsets=[0, 50])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1387
        return index
1388
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1389
    def make_1000_node_index(self):
1390
        index = self.make_index(4096*1000, 6)
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1391
        # Pretend we've already made a single request in the middle
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1392
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1393
                           key_count=90000, row_lengths=[1, 9, 990],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1394
                           cached_offsets=[0, 5, 500])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1395
        return index
1396
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1397
    def assertNumPages(self, expected_pages, index, size):
1398
        index._size = size
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1399
        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1400
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1401
    def assertExpandOffsets(self, expected, index, offsets):
1402
        self.assertEqual(expected, index._expand_offsets(offsets),
1403
                         'We did not get the expected value after expanding'
1404
                         ' %s' % (offsets,))
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1405
1406
    def test_default_recommended_pages(self):
1407
        index = self.make_index(None)
1408
        # local transport recommends 4096 byte reads, which is 1 page
1409
        self.assertEqual(1, index._recommended_pages)
1410
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1411
    def test__compute_total_pages_in_index(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1412
        index = self.make_index(None)
1413
        self.assertNumPages(1, index, 1024)
1414
        self.assertNumPages(1, index, 4095)
1415
        self.assertNumPages(1, index, 4096)
1416
        self.assertNumPages(2, index, 4097)
1417
        self.assertNumPages(2, index, 8192)
1418
        self.assertNumPages(76, index, 4096*75 + 10)
1419
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1420
    def test__find_layer_start_and_stop(self):
1421
        index = self.make_1000_node_index()
1422
        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1423
        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1424
        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1425
        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1426
        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1427
        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1428
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1429
    def test_unknown_size(self):
1430
        # We should not expand if we don't know the file size
1431
        index = self.make_index(None, 10)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1432
        self.assertExpandOffsets([0], index, [0])
1433
        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1434
1435
    def test_more_than_recommended(self):
1436
        index = self.make_index(4096*100, 2)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1437
        self.assertExpandOffsets([1, 10], index, [1, 10])
1438
        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1439
1440
    def test_read_all_from_root(self):
1441
        index = self.make_index(4096*10, 20)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1442
        self.assertExpandOffsets(range(10), index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1443
1444
    def test_read_all_when_cached(self):
1445
        # We've read enough that we can grab all the rest in a single request
1446
        index = self.make_index(4096*10, 5)
1447
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1448
                           key_count=1000, row_lengths=[1, 9],
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1449
                           cached_offsets=[0, 1, 2, 5, 6])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1450
        # It should fill the remaining nodes, regardless of the one requested
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1451
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1452
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1453
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1454
1455
    def test_no_root_node(self):
1456
        index = self.make_index(4096*10, 5)
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1457
        self.assertExpandOffsets([0], index, [0])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1458
1459
    def test_include_neighbors(self):
1460
        index = self.make_100_node_index()
1461
        # We expand in both directions, until we have at least 'recommended'
1462
        # pages
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1463
        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1464
        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1465
        # If we hit an 'edge' we continue in the other direction
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1466
        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1467
        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1468
1469
        # Requesting many nodes will expand all locations equally
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1470
        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1471
        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1472
                               [2, 10, 81])
1473
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1474
    def test_stop_at_cached(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1475
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1476
        self.set_cached_offsets(index, [0, 10, 19])
1477
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1478
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1479
        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1480
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1481
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1482
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1483
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1484
    def test_cannot_fully_expand(self):
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1485
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1486
        self.set_cached_offsets(index, [0, 10, 12])
3763.8.7 by John Arbash Meinel
A bit of doc updates, start putting in tests for current behavior.
1487
        # We don't go into an endless loop if we are bound by cached nodes
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1488
        self.assertExpandOffsets([11], index, [11])
3763.8.8 by John Arbash Meinel
simple test of overlapped behavior
1489
1490
    def test_overlap(self):
1491
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1492
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1493
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1494
1495
    def test_stay_within_layer(self):
1496
        index = self.make_1000_node_index()
1497
        # When expanding a request, we won't read nodes from the next layer
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1498
        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1499
        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1500
        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1501
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1502
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
3763.8.9 by John Arbash Meinel
Finish up the algorithm to stay within a given layer.
1503
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1504
        self.set_cached_offsets(index, [0, 4, 12])
1505
        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1506
        self.assertExpandOffsets([10, 11], index, [11])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1507
1508
    def test_small_requests_unexpanded(self):
1509
        index = self.make_100_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1510
        self.set_cached_offsets(index, [0])
1511
        self.assertExpandOffsets([1], index, [1])
1512
        self.assertExpandOffsets([50], index, [50])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1513
        # If we request more than one node, then we'll expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1514
        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1515
1516
        # The first pass does not expand
1517
        index = self.make_1000_node_index()
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1518
        self.set_cached_offsets(index, [0])
1519
        self.assertExpandOffsets([1], index, [1])
1520
        self.set_cached_offsets(index, [0, 1])
1521
        self.assertExpandOffsets([100], index, [100])
1522
        self.set_cached_offsets(index, [0, 1, 100])
3763.8.14 by John Arbash Meinel
Add in a shortcut when we haven't cached much yet.
1523
        # But after the first depth, we will expand
3763.8.15 by John Arbash Meinel
Review comments from Martin. Code clarity/variable name/docstring updates.
1524
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1525
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1526
        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1527
        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1528
                                 [105])