~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_chk_map.py

  • Committer: Ian Clatworthy
  • Date: 2009-07-13 06:58:49 UTC
  • mto: (4527.1.1 integration)
  • mto: This revision was merged to the branch mainline in revision 4529.
  • Revision ID: ian.clatworthy@canonical.com-20090713065849-n7g2rsjyl6dt1mgv
Apply review feedback

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for maps built on a CHK versionedfiles facility."""
 
18
 
 
19
from itertools import izip
 
20
 
 
21
from bzrlib import (
 
22
    chk_map,
 
23
    osutils,
 
24
    tests,
 
25
    )
 
26
from bzrlib.chk_map import (
 
27
    CHKMap,
 
28
    InternalNode,
 
29
    LeafNode,
 
30
    Node,
 
31
    )
 
32
 
 
33
 
 
34
class TestNode(tests.TestCase):
 
35
 
 
36
    def assertCommonPrefix(self, expected_common, prefix, key):
 
37
        common = Node.common_prefix(prefix, key)
 
38
        self.assertTrue(len(common) <= len(prefix))
 
39
        self.assertTrue(len(common) <= len(key))
 
40
        self.assertStartsWith(prefix, common)
 
41
        self.assertStartsWith(key, common)
 
42
        self.assertEquals(expected_common, common)
 
43
 
 
44
    def test_common_prefix(self):
 
45
        self.assertCommonPrefix('beg', 'beg', 'begin')
 
46
 
 
47
    def test_no_common_prefix(self):
 
48
        self.assertCommonPrefix('', 'begin', 'end')
 
49
 
 
50
    def test_equal(self):
 
51
        self.assertCommonPrefix('begin', 'begin', 'begin')
 
52
 
 
53
    def test_not_a_prefix(self):
 
54
        self.assertCommonPrefix('b', 'begin', 'b')
 
55
 
 
56
    def test_empty(self):
 
57
        self.assertCommonPrefix('', '', 'end')
 
58
        self.assertCommonPrefix('', 'begin', '')
 
59
        self.assertCommonPrefix('', '', '')
 
60
 
 
61
 
 
62
class TestCaseWithStore(tests.TestCaseWithTransport):
 
63
 
 
64
    def get_chk_bytes(self):
 
65
        # The easiest way to get a CHK store is a development6 repository and
 
66
        # then work with the chk_bytes attribute directly.
 
67
        repo = self.make_repository(".", format="development6-rich-root")
 
68
        repo.lock_write()
 
69
        self.addCleanup(repo.unlock)
 
70
        repo.start_write_group()
 
71
        self.addCleanup(repo.abort_write_group)
 
72
        return repo.chk_bytes
 
73
 
 
74
    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
 
75
                 search_key_func=None):
 
76
        if chk_bytes is None:
 
77
            chk_bytes = self.get_chk_bytes()
 
78
        root_key = CHKMap.from_dict(chk_bytes, a_dict,
 
79
            maximum_size=maximum_size, key_width=key_width,
 
80
            search_key_func=search_key_func)
 
81
        root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
 
82
            maximum_size=maximum_size, key_width=key_width,
 
83
            search_key_func=search_key_func)
 
84
        self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
 
85
                         " match CHKMap._create_via_map")
 
86
        chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
 
87
        return chkmap
 
88
 
 
89
    def read_bytes(self, chk_bytes, key):
 
90
        stream = chk_bytes.get_record_stream([key], 'unordered', True)
 
91
        record = stream.next()
 
92
        if record.storage_kind == 'absent':
 
93
            self.fail('Store does not contain the key %s' % (key,))
 
94
        return record.get_bytes_as("fulltext")
 
95
 
 
96
    def to_dict(self, node, *args):
 
97
        return dict(node.iteritems(*args))
 
98
 
 
99
 
 
100
class TestMap(TestCaseWithStore):
 
101
 
 
102
    def assertHasABMap(self, chk_bytes):
 
103
        ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
 
104
        ab_sha1 = osutils.sha_string(ab_leaf_bytes)
 
105
        self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
 
106
        root_key = ('sha1:' + ab_sha1,)
 
107
        self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
 
108
        return root_key
 
109
 
 
110
    def assertHasEmptyMap(self, chk_bytes):
 
111
        empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n'
 
112
        empty_sha1 = osutils.sha_string(empty_leaf_bytes)
 
113
        self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
 
114
        root_key = ('sha1:' + empty_sha1,)
 
115
        self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
 
116
        return root_key
 
117
 
 
118
    def assertMapLayoutEqual(self, map_one, map_two):
 
119
        """Assert that the internal structure is identical between the maps."""
 
120
        map_one._ensure_root()
 
121
        node_one_stack = [map_one._root_node]
 
122
        map_two._ensure_root()
 
123
        node_two_stack = [map_two._root_node]
 
124
        while node_one_stack:
 
125
            node_one = node_one_stack.pop()
 
126
            node_two = node_two_stack.pop()
 
127
            if node_one.__class__ != node_two.__class__:
 
128
                self.assertEqualDiff(map_one._dump_tree(include_keys=True),
 
129
                                     map_two._dump_tree(include_keys=True))
 
130
            self.assertEqual(node_one._search_prefix,
 
131
                             node_two._search_prefix)
 
132
            if isinstance(node_one, InternalNode):
 
133
                # Internal nodes must have identical references
 
134
                self.assertEqual(sorted(node_one._items.keys()),
 
135
                                 sorted(node_two._items.keys()))
 
136
                node_one_stack.extend([n for n, _ in
 
137
                                       node_one._iter_nodes(map_one._store)])
 
138
                node_two_stack.extend([n for n, _ in
 
139
                                       node_two._iter_nodes(map_two._store)])
 
140
            else:
 
141
                # Leaf nodes must have identical contents
 
142
                self.assertEqual(node_one._items, node_two._items)
 
143
        self.assertEquals([], node_two_stack)
 
144
 
 
145
    def assertCanonicalForm(self, chkmap):
 
146
        """Assert that the chkmap is in 'canonical' form.
 
147
 
 
148
        We do this by adding all of the key value pairs from scratch, both in
 
149
        forward order and reverse order, and assert that the final tree layout
 
150
        is identical.
 
151
        """
 
152
        items = list(chkmap.iteritems())
 
153
        map_forward = chk_map.CHKMap(None, None)
 
154
        map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
 
155
        for key, value in items:
 
156
            map_forward.map(key, value)
 
157
        self.assertMapLayoutEqual(map_forward, chkmap)
 
158
        map_reverse = chk_map.CHKMap(None, None)
 
159
        map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
 
160
        for key, value in reversed(items):
 
161
            map_reverse.map(key, value)
 
162
        self.assertMapLayoutEqual(map_reverse, chkmap)
 
163
 
 
164
    def test_assert_map_layout_equal(self):
 
165
        store = self.get_chk_bytes()
 
166
        map_one = CHKMap(store, None)
 
167
        map_one._root_node.set_maximum_size(20)
 
168
        map_two = CHKMap(store, None)
 
169
        map_two._root_node.set_maximum_size(20)
 
170
        self.assertMapLayoutEqual(map_one, map_two)
 
171
        map_one.map('aaa', 'value')
 
172
        self.assertRaises(AssertionError,
 
173
            self.assertMapLayoutEqual, map_one, map_two)
 
174
        map_two.map('aaa', 'value')
 
175
        self.assertMapLayoutEqual(map_one, map_two)
 
176
        # Split the tree, so we ensure that internal nodes and leaf nodes are
 
177
        # properly checked
 
178
        map_one.map('aab', 'value')
 
179
        self.assertIsInstance(map_one._root_node, InternalNode)
 
180
        self.assertRaises(AssertionError,
 
181
            self.assertMapLayoutEqual, map_one, map_two)
 
182
        map_two.map('aab', 'value')
 
183
        self.assertMapLayoutEqual(map_one, map_two)
 
184
        map_one.map('aac', 'value')
 
185
        self.assertRaises(AssertionError,
 
186
            self.assertMapLayoutEqual, map_one, map_two)
 
187
        self.assertCanonicalForm(map_one)
 
188
 
 
189
    def test_from_dict_empty(self):
 
190
        chk_bytes = self.get_chk_bytes()
 
191
        root_key = CHKMap.from_dict(chk_bytes, {})
 
192
        # Check the data was saved and inserted correctly.
 
193
        expected_root_key = self.assertHasEmptyMap(chk_bytes)
 
194
        self.assertEqual(expected_root_key, root_key)
 
195
 
 
196
    def test_from_dict_ab(self):
 
197
        chk_bytes = self.get_chk_bytes()
 
198
        root_key = CHKMap.from_dict(chk_bytes, {"a": "b"})
 
199
        # Check the data was saved and inserted correctly.
 
200
        expected_root_key = self.assertHasABMap(chk_bytes)
 
201
        self.assertEqual(expected_root_key, root_key)
 
202
 
 
203
    def test_apply_empty_ab(self):
 
204
        # applying a delta (None, "a", "b") to an empty chkmap generates the
 
205
        # same map as from_dict_ab.
 
206
        chk_bytes = self.get_chk_bytes()
 
207
        root_key = CHKMap.from_dict(chk_bytes, {})
 
208
        chkmap = CHKMap(chk_bytes, root_key)
 
209
        new_root = chkmap.apply_delta([(None, "a", "b")])
 
210
        # Check the data was saved and inserted correctly.
 
211
        expected_root_key = self.assertHasABMap(chk_bytes)
 
212
        self.assertEqual(expected_root_key, new_root)
 
213
        # The update should have left us with an in memory root node, with an
 
214
        # updated key.
 
215
        self.assertEqual(new_root, chkmap._root_node._key)
 
216
 
 
217
    def test_apply_ab_empty(self):
 
218
        # applying a delta ("a", None, None) to a map with 'a' in it generates
 
219
        # an empty map.
 
220
        chk_bytes = self.get_chk_bytes()
 
221
        root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
 
222
        chkmap = CHKMap(chk_bytes, root_key)
 
223
        new_root = chkmap.apply_delta([(("a",), None, None)])
 
224
        # Check the data was saved and inserted correctly.
 
225
        expected_root_key = self.assertHasEmptyMap(chk_bytes)
 
226
        self.assertEqual(expected_root_key, new_root)
 
227
        # The update should have left us with an in memory root node, with an
 
228
        # updated key.
 
229
        self.assertEqual(new_root, chkmap._root_node._key)
 
230
 
 
231
    def test_apply_delta_is_deterministic(self):
 
232
        chk_bytes = self.get_chk_bytes()
 
233
        chkmap1 = CHKMap(chk_bytes, None)
 
234
        chkmap1._root_node.set_maximum_size(10)
 
235
        chkmap1.apply_delta([(None, ('aaa',), 'common'),
 
236
                             (None, ('bba',), 'target2'),
 
237
                             (None, ('bbb',), 'common')])
 
238
        root_key1 = chkmap1._save()
 
239
        self.assertCanonicalForm(chkmap1)
 
240
 
 
241
        chkmap2 = CHKMap(chk_bytes, None)
 
242
        chkmap2._root_node.set_maximum_size(10)
 
243
        chkmap2.apply_delta([(None, ('bbb',), 'common'),
 
244
                             (None, ('bba',), 'target2'),
 
245
                             (None, ('aaa',), 'common')])
 
246
        root_key2 = chkmap2._save()
 
247
        self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
 
248
                             chkmap2._dump_tree(include_keys=True))
 
249
        self.assertEqual(root_key1, root_key2)
 
250
        self.assertCanonicalForm(chkmap2)
 
251
 
 
252
    def test_stable_splitting(self):
 
253
        store = self.get_chk_bytes()
 
254
        chkmap = CHKMap(store, None)
 
255
        # Should fit 2 keys per LeafNode
 
256
        chkmap._root_node.set_maximum_size(35)
 
257
        chkmap.map(('aaa',), 'v')
 
258
        self.assertEqualDiff("'' LeafNode\n"
 
259
                             "      ('aaa',) 'v'\n",
 
260
                             chkmap._dump_tree())
 
261
        chkmap.map(('aab',), 'v')
 
262
        self.assertEqualDiff("'' LeafNode\n"
 
263
                             "      ('aaa',) 'v'\n"
 
264
                             "      ('aab',) 'v'\n",
 
265
                             chkmap._dump_tree())
 
266
        self.assertCanonicalForm(chkmap)
 
267
 
 
268
        # Creates a new internal node, and splits the others into leaves
 
269
        chkmap.map(('aac',), 'v')
 
270
        self.assertEqualDiff("'' InternalNode\n"
 
271
                             "  'aaa' LeafNode\n"
 
272
                             "      ('aaa',) 'v'\n"
 
273
                             "  'aab' LeafNode\n"
 
274
                             "      ('aab',) 'v'\n"
 
275
                             "  'aac' LeafNode\n"
 
276
                             "      ('aac',) 'v'\n",
 
277
                             chkmap._dump_tree())
 
278
        self.assertCanonicalForm(chkmap)
 
279
 
 
280
        # Splits again, because it can't fit in the current structure
 
281
        chkmap.map(('bbb',), 'v')
 
282
        self.assertEqualDiff("'' InternalNode\n"
 
283
                             "  'a' InternalNode\n"
 
284
                             "    'aaa' LeafNode\n"
 
285
                             "      ('aaa',) 'v'\n"
 
286
                             "    'aab' LeafNode\n"
 
287
                             "      ('aab',) 'v'\n"
 
288
                             "    'aac' LeafNode\n"
 
289
                             "      ('aac',) 'v'\n"
 
290
                             "  'b' LeafNode\n"
 
291
                             "      ('bbb',) 'v'\n",
 
292
                             chkmap._dump_tree())
 
293
        self.assertCanonicalForm(chkmap)
 
294
 
 
295
    def test_map_splits_with_longer_key(self):
 
296
        store = self.get_chk_bytes()
 
297
        chkmap = CHKMap(store, None)
 
298
        # Should fit 1 key per LeafNode
 
299
        chkmap._root_node.set_maximum_size(10)
 
300
        chkmap.map(('aaa',), 'v')
 
301
        chkmap.map(('aaaa',), 'v')
 
302
        self.assertCanonicalForm(chkmap)
 
303
        self.assertIsInstance(chkmap._root_node, InternalNode)
 
304
 
 
305
    def test_with_linefeed_in_key(self):
 
306
        store = self.get_chk_bytes()
 
307
        chkmap = CHKMap(store, None)
 
308
        # Should fit 1 key per LeafNode
 
309
        chkmap._root_node.set_maximum_size(10)
 
310
        chkmap.map(('a\ra',), 'val1')
 
311
        chkmap.map(('a\rb',), 'val2')
 
312
        chkmap.map(('ac',), 'val3')
 
313
        self.assertCanonicalForm(chkmap)
 
314
        self.assertEqualDiff("'' InternalNode\n"
 
315
                             "  'a\\r' InternalNode\n"
 
316
                             "    'a\\ra' LeafNode\n"
 
317
                             "      ('a\\ra',) 'val1'\n"
 
318
                             "    'a\\rb' LeafNode\n"
 
319
                             "      ('a\\rb',) 'val2'\n"
 
320
                             "  'ac' LeafNode\n"
 
321
                             "      ('ac',) 'val3'\n",
 
322
                             chkmap._dump_tree())
 
323
        # We should also successfully serialise and deserialise these items
 
324
        root_key = chkmap._save()
 
325
        chkmap = CHKMap(store, root_key)
 
326
        self.assertEqualDiff("'' InternalNode\n"
 
327
                             "  'a\\r' InternalNode\n"
 
328
                             "    'a\\ra' LeafNode\n"
 
329
                             "      ('a\\ra',) 'val1'\n"
 
330
                             "    'a\\rb' LeafNode\n"
 
331
                             "      ('a\\rb',) 'val2'\n"
 
332
                             "  'ac' LeafNode\n"
 
333
                             "      ('ac',) 'val3'\n",
 
334
                             chkmap._dump_tree())
 
335
 
 
336
    def test_deep_splitting(self):
 
337
        store = self.get_chk_bytes()
 
338
        chkmap = CHKMap(store, None)
 
339
        # Should fit 2 keys per LeafNode
 
340
        chkmap._root_node.set_maximum_size(40)
 
341
        chkmap.map(('aaaaaaaa',), 'v')
 
342
        chkmap.map(('aaaaabaa',), 'v')
 
343
        self.assertEqualDiff("'' LeafNode\n"
 
344
                             "      ('aaaaaaaa',) 'v'\n"
 
345
                             "      ('aaaaabaa',) 'v'\n",
 
346
                             chkmap._dump_tree())
 
347
        chkmap.map(('aaabaaaa',), 'v')
 
348
        chkmap.map(('aaababaa',), 'v')
 
349
        self.assertEqualDiff("'' InternalNode\n"
 
350
                             "  'aaaa' LeafNode\n"
 
351
                             "      ('aaaaaaaa',) 'v'\n"
 
352
                             "      ('aaaaabaa',) 'v'\n"
 
353
                             "  'aaab' LeafNode\n"
 
354
                             "      ('aaabaaaa',) 'v'\n"
 
355
                             "      ('aaababaa',) 'v'\n",
 
356
                             chkmap._dump_tree())
 
357
        chkmap.map(('aaabacaa',), 'v')
 
358
        chkmap.map(('aaabadaa',), 'v')
 
359
        self.assertEqualDiff("'' InternalNode\n"
 
360
                             "  'aaaa' LeafNode\n"
 
361
                             "      ('aaaaaaaa',) 'v'\n"
 
362
                             "      ('aaaaabaa',) 'v'\n"
 
363
                             "  'aaab' InternalNode\n"
 
364
                             "    'aaabaa' LeafNode\n"
 
365
                             "      ('aaabaaaa',) 'v'\n"
 
366
                             "    'aaabab' LeafNode\n"
 
367
                             "      ('aaababaa',) 'v'\n"
 
368
                             "    'aaabac' LeafNode\n"
 
369
                             "      ('aaabacaa',) 'v'\n"
 
370
                             "    'aaabad' LeafNode\n"
 
371
                             "      ('aaabadaa',) 'v'\n",
 
372
                             chkmap._dump_tree())
 
373
        chkmap.map(('aaababba',), 'val')
 
374
        chkmap.map(('aaababca',), 'val')
 
375
        self.assertEqualDiff("'' InternalNode\n"
 
376
                             "  'aaaa' LeafNode\n"
 
377
                             "      ('aaaaaaaa',) 'v'\n"
 
378
                             "      ('aaaaabaa',) 'v'\n"
 
379
                             "  'aaab' InternalNode\n"
 
380
                             "    'aaabaa' LeafNode\n"
 
381
                             "      ('aaabaaaa',) 'v'\n"
 
382
                             "    'aaabab' InternalNode\n"
 
383
                             "      'aaababa' LeafNode\n"
 
384
                             "      ('aaababaa',) 'v'\n"
 
385
                             "      'aaababb' LeafNode\n"
 
386
                             "      ('aaababba',) 'val'\n"
 
387
                             "      'aaababc' LeafNode\n"
 
388
                             "      ('aaababca',) 'val'\n"
 
389
                             "    'aaabac' LeafNode\n"
 
390
                             "      ('aaabacaa',) 'v'\n"
 
391
                             "    'aaabad' LeafNode\n"
 
392
                             "      ('aaabadaa',) 'v'\n",
 
393
                             chkmap._dump_tree())
 
394
        # Now we add a node that should fit around an existing InternalNode,
 
395
        # but has a slightly different key prefix, which causes a new
 
396
        # InternalNode split
 
397
        chkmap.map(('aaabDaaa',), 'v')
 
398
        self.assertEqualDiff("'' InternalNode\n"
 
399
                             "  'aaaa' LeafNode\n"
 
400
                             "      ('aaaaaaaa',) 'v'\n"
 
401
                             "      ('aaaaabaa',) 'v'\n"
 
402
                             "  'aaab' InternalNode\n"
 
403
                             "    'aaabD' LeafNode\n"
 
404
                             "      ('aaabDaaa',) 'v'\n"
 
405
                             "    'aaaba' InternalNode\n"
 
406
                             "      'aaabaa' LeafNode\n"
 
407
                             "      ('aaabaaaa',) 'v'\n"
 
408
                             "      'aaabab' InternalNode\n"
 
409
                             "        'aaababa' LeafNode\n"
 
410
                             "      ('aaababaa',) 'v'\n"
 
411
                             "        'aaababb' LeafNode\n"
 
412
                             "      ('aaababba',) 'val'\n"
 
413
                             "        'aaababc' LeafNode\n"
 
414
                             "      ('aaababca',) 'val'\n"
 
415
                             "      'aaabac' LeafNode\n"
 
416
                             "      ('aaabacaa',) 'v'\n"
 
417
                             "      'aaabad' LeafNode\n"
 
418
                             "      ('aaabadaa',) 'v'\n",
 
419
                             chkmap._dump_tree())
 
420
 
 
421
    def test_map_collapses_if_size_changes(self):
 
422
        store = self.get_chk_bytes()
 
423
        chkmap = CHKMap(store, None)
 
424
        # Should fit 2 keys per LeafNode
 
425
        chkmap._root_node.set_maximum_size(35)
 
426
        chkmap.map(('aaa',), 'v')
 
427
        chkmap.map(('aab',), 'very long value that splits')
 
428
        self.assertEqualDiff("'' InternalNode\n"
 
429
                             "  'aaa' LeafNode\n"
 
430
                             "      ('aaa',) 'v'\n"
 
431
                             "  'aab' LeafNode\n"
 
432
                             "      ('aab',) 'very long value that splits'\n",
 
433
                             chkmap._dump_tree())
 
434
        self.assertCanonicalForm(chkmap)
 
435
        # Now changing the value to something small should cause a rebuild
 
436
        chkmap.map(('aab',), 'v')
 
437
        self.assertEqualDiff("'' LeafNode\n"
 
438
                             "      ('aaa',) 'v'\n"
 
439
                             "      ('aab',) 'v'\n",
 
440
                             chkmap._dump_tree())
 
441
        self.assertCanonicalForm(chkmap)
 
442
 
 
443
    def test_map_double_deep_collapses(self):
 
444
        store = self.get_chk_bytes()
 
445
        chkmap = CHKMap(store, None)
 
446
        # Should fit 3 small keys per LeafNode
 
447
        chkmap._root_node.set_maximum_size(40)
 
448
        chkmap.map(('aaa',), 'v')
 
449
        chkmap.map(('aab',), 'very long value that splits')
 
450
        chkmap.map(('abc',), 'v')
 
451
        self.assertEqualDiff("'' InternalNode\n"
 
452
                             "  'aa' InternalNode\n"
 
453
                             "    'aaa' LeafNode\n"
 
454
                             "      ('aaa',) 'v'\n"
 
455
                             "    'aab' LeafNode\n"
 
456
                             "      ('aab',) 'very long value that splits'\n"
 
457
                             "  'ab' LeafNode\n"
 
458
                             "      ('abc',) 'v'\n",
 
459
                             chkmap._dump_tree())
 
460
        chkmap.map(('aab',), 'v')
 
461
        self.assertCanonicalForm(chkmap)
 
462
        self.assertEqualDiff("'' LeafNode\n"
 
463
                             "      ('aaa',) 'v'\n"
 
464
                             "      ('aab',) 'v'\n"
 
465
                             "      ('abc',) 'v'\n",
 
466
                             chkmap._dump_tree())
 
467
 
 
468
    def test_stable_unmap(self):
 
469
        store = self.get_chk_bytes()
 
470
        chkmap = CHKMap(store, None)
 
471
        # Should fit 2 keys per LeafNode
 
472
        chkmap._root_node.set_maximum_size(35)
 
473
        chkmap.map(('aaa',), 'v')
 
474
        chkmap.map(('aab',), 'v')
 
475
        self.assertEqualDiff("'' LeafNode\n"
 
476
                             "      ('aaa',) 'v'\n"
 
477
                             "      ('aab',) 'v'\n",
 
478
                             chkmap._dump_tree())
 
479
        # Creates a new internal node, and splits the others into leaves
 
480
        chkmap.map(('aac',), 'v')
 
481
        self.assertEqualDiff("'' InternalNode\n"
 
482
                             "  'aaa' LeafNode\n"
 
483
                             "      ('aaa',) 'v'\n"
 
484
                             "  'aab' LeafNode\n"
 
485
                             "      ('aab',) 'v'\n"
 
486
                             "  'aac' LeafNode\n"
 
487
                             "      ('aac',) 'v'\n",
 
488
                             chkmap._dump_tree())
 
489
        self.assertCanonicalForm(chkmap)
 
490
        # Now lets unmap one of the keys, and assert that we collapse the
 
491
        # structures.
 
492
        chkmap.unmap(('aac',))
 
493
        self.assertEqualDiff("'' LeafNode\n"
 
494
                             "      ('aaa',) 'v'\n"
 
495
                             "      ('aab',) 'v'\n",
 
496
                             chkmap._dump_tree())
 
497
        self.assertCanonicalForm(chkmap)
 
498
 
 
499
    def test_unmap_double_deep(self):
 
500
        store = self.get_chk_bytes()
 
501
        chkmap = CHKMap(store, None)
 
502
        # Should fit 3 keys per LeafNode
 
503
        chkmap._root_node.set_maximum_size(40)
 
504
        chkmap.map(('aaa',), 'v')
 
505
        chkmap.map(('aaab',), 'v')
 
506
        chkmap.map(('aab',), 'very long value')
 
507
        chkmap.map(('abc',), 'v')
 
508
        self.assertEqualDiff("'' InternalNode\n"
 
509
                             "  'aa' InternalNode\n"
 
510
                             "    'aaa' LeafNode\n"
 
511
                             "      ('aaa',) 'v'\n"
 
512
                             "      ('aaab',) 'v'\n"
 
513
                             "    'aab' LeafNode\n"
 
514
                             "      ('aab',) 'very long value'\n"
 
515
                             "  'ab' LeafNode\n"
 
516
                             "      ('abc',) 'v'\n",
 
517
                             chkmap._dump_tree())
 
518
        # Removing the 'aab' key should cause everything to collapse back to a
 
519
        # single node
 
520
        chkmap.unmap(('aab',))
 
521
        self.assertEqualDiff("'' LeafNode\n"
 
522
                             "      ('aaa',) 'v'\n"
 
523
                             "      ('aaab',) 'v'\n"
 
524
                             "      ('abc',) 'v'\n",
 
525
                             chkmap._dump_tree())
 
526
 
 
527
    def test_unmap_double_deep_non_empty_leaf(self):
 
528
        store = self.get_chk_bytes()
 
529
        chkmap = CHKMap(store, None)
 
530
        # Should fit 3 keys per LeafNode
 
531
        chkmap._root_node.set_maximum_size(40)
 
532
        chkmap.map(('aaa',), 'v')
 
533
        chkmap.map(('aab',), 'long value')
 
534
        chkmap.map(('aabb',), 'v')
 
535
        chkmap.map(('abc',), 'v')
 
536
        self.assertEqualDiff("'' InternalNode\n"
 
537
                             "  'aa' InternalNode\n"
 
538
                             "    'aaa' LeafNode\n"
 
539
                             "      ('aaa',) 'v'\n"
 
540
                             "    'aab' LeafNode\n"
 
541
                             "      ('aab',) 'long value'\n"
 
542
                             "      ('aabb',) 'v'\n"
 
543
                             "  'ab' LeafNode\n"
 
544
                             "      ('abc',) 'v'\n",
 
545
                             chkmap._dump_tree())
 
546
        # Removing the 'aab' key should cause everything to collapse back to a
 
547
        # single node
 
548
        chkmap.unmap(('aab',))
 
549
        self.assertEqualDiff("'' LeafNode\n"
 
550
                             "      ('aaa',) 'v'\n"
 
551
                             "      ('aabb',) 'v'\n"
 
552
                             "      ('abc',) 'v'\n",
 
553
                             chkmap._dump_tree())
 
554
 
 
555
    def test_unmap_with_known_internal_node_doesnt_page(self):
 
556
        store = self.get_chk_bytes()
 
557
        chkmap = CHKMap(store, None)
 
558
        # Should fit 3 keys per LeafNode
 
559
        chkmap._root_node.set_maximum_size(30)
 
560
        chkmap.map(('aaa',), 'v')
 
561
        chkmap.map(('aab',), 'v')
 
562
        chkmap.map(('aac',), 'v')
 
563
        chkmap.map(('abc',), 'v')
 
564
        chkmap.map(('acd',), 'v')
 
565
        self.assertEqualDiff("'' InternalNode\n"
 
566
                             "  'aa' InternalNode\n"
 
567
                             "    'aaa' LeafNode\n"
 
568
                             "      ('aaa',) 'v'\n"
 
569
                             "    'aab' LeafNode\n"
 
570
                             "      ('aab',) 'v'\n"
 
571
                             "    'aac' LeafNode\n"
 
572
                             "      ('aac',) 'v'\n"
 
573
                             "  'ab' LeafNode\n"
 
574
                             "      ('abc',) 'v'\n"
 
575
                             "  'ac' LeafNode\n"
 
576
                             "      ('acd',) 'v'\n",
 
577
                             chkmap._dump_tree())
 
578
        # Save everything to the map, and start over
 
579
        chkmap = CHKMap(store, chkmap._save())
 
580
        # Mapping an 'aa' key loads the internal node, but should not map the
 
581
        # 'ab' and 'ac' nodes
 
582
        chkmap.map(('aad',), 'v')
 
583
        self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
 
584
        self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
 
585
        self.assertIsInstance(chkmap._root_node._items['ac'], tuple)
 
586
        # Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
 
587
        # to map in 'ab'
 
588
        chkmap.unmap(('acd',))
 
589
        self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
 
590
        self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
 
591
 
 
592
    def test_unmap_without_fitting_doesnt_page_in(self):
 
593
        store = self.get_chk_bytes()
 
594
        chkmap = CHKMap(store, None)
 
595
        # Should fit 2 keys per LeafNode
 
596
        chkmap._root_node.set_maximum_size(20)
 
597
        chkmap.map(('aaa',), 'v')
 
598
        chkmap.map(('aab',), 'v')
 
599
        self.assertEqualDiff("'' InternalNode\n"
 
600
                             "  'aaa' LeafNode\n"
 
601
                             "      ('aaa',) 'v'\n"
 
602
                             "  'aab' LeafNode\n"
 
603
                             "      ('aab',) 'v'\n",
 
604
                             chkmap._dump_tree())
 
605
        # Save everything to the map, and start over
 
606
        chkmap = CHKMap(store, chkmap._save())
 
607
        chkmap.map(('aac',), 'v')
 
608
        chkmap.map(('aad',), 'v')
 
609
        chkmap.map(('aae',), 'v')
 
610
        chkmap.map(('aaf',), 'v')
 
611
        # At this point, the previous nodes should not be paged in, but the
 
612
        # newly added nodes would be
 
613
        self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
 
614
        self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
 
615
        self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
 
616
        self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
 
617
        self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
 
618
        self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
 
619
        # Now unmapping one of the new nodes will use only the already-paged-in
 
620
        # nodes to determine that we don't need to do more.
 
621
        chkmap.unmap(('aaf',))
 
622
        self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
 
623
        self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
 
624
        self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
 
625
        self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
 
626
        self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
 
627
 
 
628
    def test_unmap_pages_in_if_necessary(self):
 
629
        store = self.get_chk_bytes()
 
630
        chkmap = CHKMap(store, None)
 
631
        # Should fit 2 keys per LeafNode
 
632
        chkmap._root_node.set_maximum_size(30)
 
633
        chkmap.map(('aaa',), 'val')
 
634
        chkmap.map(('aab',), 'val')
 
635
        chkmap.map(('aac',), 'val')
 
636
        self.assertEqualDiff("'' InternalNode\n"
 
637
                             "  'aaa' LeafNode\n"
 
638
                             "      ('aaa',) 'val'\n"
 
639
                             "  'aab' LeafNode\n"
 
640
                             "      ('aab',) 'val'\n"
 
641
                             "  'aac' LeafNode\n"
 
642
                             "      ('aac',) 'val'\n",
 
643
                             chkmap._dump_tree())
 
644
        root_key = chkmap._save()
 
645
        # Save everything to the map, and start over
 
646
        chkmap = CHKMap(store, root_key)
 
647
        chkmap.map(('aad',), 'v')
 
648
        # At this point, the previous nodes should not be paged in, but the
 
649
        # newly added node would be
 
650
        self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
 
651
        self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
 
652
        self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
 
653
        self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
 
654
        # Unmapping the new node will check the existing nodes to see if they
 
655
        # would fit.
 
656
        # Clear the page cache so we ensure we have to read all the children
 
657
        chk_map._page_cache.clear()
 
658
        chkmap.unmap(('aad',))
 
659
        self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
 
660
        self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
 
661
        self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
 
662
 
 
663
    def test_unmap_pages_in_from_page_cache(self):
 
664
        store = self.get_chk_bytes()
 
665
        chkmap = CHKMap(store, None)
 
666
        # Should fit 2 keys per LeafNode
 
667
        chkmap._root_node.set_maximum_size(30)
 
668
        chkmap.map(('aaa',), 'val')
 
669
        chkmap.map(('aab',), 'val')
 
670
        chkmap.map(('aac',), 'val')
 
671
        root_key = chkmap._save()
 
672
        # Save everything to the map, and start over
 
673
        chkmap = CHKMap(store, root_key)
 
674
        chkmap.map(('aad',), 'val')
 
675
        self.assertEqualDiff("'' InternalNode\n"
 
676
                             "  'aaa' LeafNode\n"
 
677
                             "      ('aaa',) 'val'\n"
 
678
                             "  'aab' LeafNode\n"
 
679
                             "      ('aab',) 'val'\n"
 
680
                             "  'aac' LeafNode\n"
 
681
                             "      ('aac',) 'val'\n"
 
682
                             "  'aad' LeafNode\n"
 
683
                             "      ('aad',) 'val'\n",
 
684
                             chkmap._dump_tree())
 
685
        # Save everything to the map, start over after _dump_tree
 
686
        chkmap = CHKMap(store, root_key)
 
687
        chkmap.map(('aad',), 'v')
 
688
        # At this point, the previous nodes should not be paged in, but the
 
689
        # newly added node would be
 
690
        self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
 
691
        self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
 
692
        self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
 
693
        self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
 
694
        # Now clear the page cache, and only include 2 of the children in the
 
695
        # cache
 
696
        aab_key = chkmap._root_node._items['aab']
 
697
        aab_bytes = chk_map._page_cache[aab_key]
 
698
        aac_key = chkmap._root_node._items['aac']
 
699
        aac_bytes = chk_map._page_cache[aac_key]
 
700
        chk_map._page_cache.clear()
 
701
        chk_map._page_cache[aab_key] = aab_bytes
 
702
        chk_map._page_cache[aac_key] = aac_bytes
 
703
 
 
704
        # Unmapping the new node will check the nodes from the page cache
 
705
        # first, and not have to read in 'aaa'
 
706
        chkmap.unmap(('aad',))
 
707
        self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
 
708
        self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
 
709
        self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
 
710
 
 
711
    def test_unmap_uses_existing_items(self):
 
712
        store = self.get_chk_bytes()
 
713
        chkmap = CHKMap(store, None)
 
714
        # Should fit 2 keys per LeafNode
 
715
        chkmap._root_node.set_maximum_size(30)
 
716
        chkmap.map(('aaa',), 'val')
 
717
        chkmap.map(('aab',), 'val')
 
718
        chkmap.map(('aac',), 'val')
 
719
        root_key = chkmap._save()
 
720
        # Save everything to the map, and start over
 
721
        chkmap = CHKMap(store, root_key)
 
722
        chkmap.map(('aad',), 'val')
 
723
        chkmap.map(('aae',), 'val')
 
724
        chkmap.map(('aaf',), 'val')
 
725
        # At this point, the previous nodes should not be paged in, but the
 
726
        # newly added node would be
 
727
        self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
 
728
        self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
 
729
        self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
 
730
        self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
 
731
        self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
 
732
        self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
 
733
 
 
734
        # Unmapping a new node will see the other nodes that are already in
 
735
        # memory, and not need to page in anything else
 
736
        chkmap.unmap(('aad',))
 
737
        self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
 
738
        self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
 
739
        self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
 
740
        self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
 
741
        self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
 
742
 
 
743
    def test_iter_changes_empty_ab(self):
 
744
        # Asking for changes between an empty dict to a dict with keys returns
 
745
        # all the keys.
 
746
        basis = self._get_map({}, maximum_size=10)
 
747
        target = self._get_map(
 
748
            {('a',): 'content here', ('b',): 'more content'},
 
749
            chk_bytes=basis._store, maximum_size=10)
 
750
        self.assertEqual([(('a',), None, 'content here'),
 
751
            (('b',), None, 'more content')],
 
752
            sorted(list(target.iter_changes(basis))))
 
753
 
 
754
    def test_iter_changes_ab_empty(self):
 
755
        # Asking for changes between a dict with keys to an empty dict returns
 
756
        # all the keys.
 
757
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
 
758
            maximum_size=10)
 
759
        target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
 
760
        self.assertEqual([(('a',), 'content here', None),
 
761
            (('b',), 'more content', None)],
 
762
            sorted(list(target.iter_changes(basis))))
 
763
 
 
764
    def test_iter_changes_empty_empty_is_empty(self):
 
765
        basis = self._get_map({}, maximum_size=10)
 
766
        target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
 
767
        self.assertEqual([], sorted(list(target.iter_changes(basis))))
 
768
 
 
769
    def test_iter_changes_ab_ab_is_empty(self):
 
770
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
 
771
            maximum_size=10)
 
772
        target = self._get_map(
 
773
            {('a',): 'content here', ('b',): 'more content'},
 
774
            chk_bytes=basis._store, maximum_size=10)
 
775
        self.assertEqual([], sorted(list(target.iter_changes(basis))))
 
776
 
 
777
    def test_iter_changes_ab_ab_nodes_not_loaded(self):
 
778
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
 
779
            maximum_size=10)
 
780
        target = self._get_map(
 
781
            {('a',): 'content here', ('b',): 'more content'},
 
782
            chk_bytes=basis._store, maximum_size=10)
 
783
        list(target.iter_changes(basis))
 
784
        self.assertIsInstance(target._root_node, tuple)
 
785
        self.assertIsInstance(basis._root_node, tuple)
 
786
 
 
787
    def test_iter_changes_ab_ab_changed_values_shown(self):
 
788
        basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
 
789
            maximum_size=10)
 
790
        target = self._get_map(
 
791
            {('a',): 'content here', ('b',): 'different content'},
 
792
            chk_bytes=basis._store, maximum_size=10)
 
793
        result = sorted(list(target.iter_changes(basis)))
 
794
        self.assertEqual([(('b',), 'more content', 'different content')],
 
795
            result)
 
796
 
 
797
    def test_iter_changes_mixed_node_length(self):
 
798
        # When one side has different node lengths than the other, common
 
799
        # but different keys still need to be show, and new-and-old included
 
800
        # appropriately.
 
801
        # aaa - common unaltered
 
802
        # aab - common altered
 
803
        # b - basis only
 
804
        # at - target only
 
805
        # we expect:
 
806
        # aaa to be not loaded (later test)
 
807
        # aab, b, at to be returned.
 
808
        # basis splits at byte 0,1,2, aaa is commonb is basis only
 
809
        basis_dict = {('aaa',): 'foo bar',
 
810
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
 
811
        # target splits at byte 1,2, at is target only
 
812
        target_dict = {('aaa',): 'foo bar',
 
813
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
 
814
        changes = [
 
815
            (('aab',), 'common altered a', 'common altered b'),
 
816
            (('at',), None, 'foo bar t'),
 
817
            (('b',), 'foo bar b', None),
 
818
            ]
 
819
        basis = self._get_map(basis_dict, maximum_size=10)
 
820
        target = self._get_map(target_dict, maximum_size=10,
 
821
            chk_bytes=basis._store)
 
822
        self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
 
823
 
 
824
    def test_iter_changes_common_pages_not_loaded(self):
 
825
        # aaa - common unaltered
 
826
        # aab - common altered
 
827
        # b - basis only
 
828
        # at - target only
 
829
        # we expect:
 
830
        # aaa to be not loaded
 
831
        # aaa not to be in result.
 
832
        basis_dict = {('aaa',): 'foo bar',
 
833
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
 
834
        # target splits at byte 1, at is target only
 
835
        target_dict = {('aaa',): 'foo bar',
 
836
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
 
837
        basis = self._get_map(basis_dict, maximum_size=10)
 
838
        target = self._get_map(target_dict, maximum_size=10,
 
839
            chk_bytes=basis._store)
 
840
        basis_get = basis._store.get_record_stream
 
841
        def get_record_stream(keys, order, fulltext):
 
842
            if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
 
843
                self.fail("'aaa' pointer was followed %r" % keys)
 
844
            return basis_get(keys, order, fulltext)
 
845
        basis._store.get_record_stream = get_record_stream
 
846
        result = sorted(list(target.iter_changes(basis)))
 
847
        for change in result:
 
848
            if change[0] == ('aaa',):
 
849
                self.fail("Found unexpected change: %s" % change)
 
850
 
 
851
    def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
 
852
        # Within a leaf there are no hash's to exclude keys, make sure multi
 
853
        # value leaf nodes are handled well.
 
854
        basis_dict = {('aaa',): 'foo bar',
 
855
            ('aab',): 'common altered a', ('b',): 'foo bar b'}
 
856
        target_dict = {('aaa',): 'foo bar',
 
857
            ('aab',): 'common altered b', ('at',): 'foo bar t'}
 
858
        changes = [
 
859
            (('aab',), 'common altered a', 'common altered b'),
 
860
            (('at',), None, 'foo bar t'),
 
861
            (('b',), 'foo bar b', None),
 
862
            ]
 
863
        basis = self._get_map(basis_dict)
 
864
        target = self._get_map(target_dict, chk_bytes=basis._store)
 
865
        self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
 
866
 
 
867
    def test_iteritems_empty(self):
 
868
        chk_bytes = self.get_chk_bytes()
 
869
        root_key = CHKMap.from_dict(chk_bytes, {})
 
870
        chkmap = CHKMap(chk_bytes, root_key)
 
871
        self.assertEqual([], list(chkmap.iteritems()))
 
872
 
 
873
    def test_iteritems_two_items(self):
 
874
        chk_bytes = self.get_chk_bytes()
 
875
        root_key = CHKMap.from_dict(chk_bytes,
 
876
            {"a":"content here", "b":"more content"})
 
877
        chkmap = CHKMap(chk_bytes, root_key)
 
878
        self.assertEqual([(("a",), "content here"), (("b",), "more content")],
 
879
            sorted(list(chkmap.iteritems())))
 
880
 
 
881
    def test_iteritems_selected_one_of_two_items(self):
 
882
        chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
 
883
        self.assertEqual({("a",): "content here"},
 
884
            self.to_dict(chkmap, [("a",)]))
 
885
 
 
886
    def test_iteritems_keys_prefixed_by_2_width_nodes(self):
 
887
        chkmap = self._get_map(
 
888
            {("a","a"):"content here", ("a", "b",):"more content",
 
889
             ("b", ""): 'boring content'},
 
890
            maximum_size=10, key_width=2)
 
891
        self.assertEqual(
 
892
            {("a", "a"): "content here", ("a", "b"): 'more content'},
 
893
            self.to_dict(chkmap, [("a",)]))
 
894
 
 
895
    def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
 
896
        search_key_func = chk_map.search_key_registry.get('hash-16-way')
 
897
        self.assertEqual('E8B7BE43\x00E8B7BE43', search_key_func(('a', 'a')))
 
898
        self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
 
899
        self.assertEqual('71BEEFF9\x0000000000', search_key_func(('b', '')))
 
900
        chkmap = self._get_map(
 
901
            {("a","a"):"content here", ("a", "b",):"more content",
 
902
             ("b", ""): 'boring content'},
 
903
            maximum_size=10, key_width=2, search_key_func=search_key_func)
 
904
        self.assertEqual(
 
905
            {("a", "a"): "content here", ("a", "b"): 'more content'},
 
906
            self.to_dict(chkmap, [("a",)]))
 
907
 
 
908
    def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
 
909
        chkmap = self._get_map(
 
910
            {("a","a"):"content here", ("a", "b",):"more content",
 
911
             ("b", ""): 'boring content'}, key_width=2)
 
912
        self.assertEqual(
 
913
            {("a", "a"): "content here", ("a", "b"): 'more content'},
 
914
            self.to_dict(chkmap, [("a",)]))
 
915
 
 
916
    def test___len__empty(self):
 
917
        chkmap = self._get_map({})
 
918
        self.assertEqual(0, len(chkmap))
 
919
 
 
920
    def test___len__2(self):
 
921
        chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"})
 
922
        self.assertEqual(2, len(chkmap))
 
923
 
 
924
    def test_max_size_100_bytes_new(self):
 
925
        # When there is a 100 byte upper node limit, a tree is formed.
 
926
        chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
 
927
        # We expect three nodes:
 
928
        # A root, with two children, and with two key prefixes - k1 to one, and
 
929
        # k2 to the other as our node splitting is only just being developed.
 
930
        # The maximum size should be embedded
 
931
        chkmap._ensure_root()
 
932
        self.assertEqual(100, chkmap._root_node.maximum_size)
 
933
        self.assertEqual(1, chkmap._root_node._key_width)
 
934
        # There should be two child nodes, and prefix of 2(bytes):
 
935
        self.assertEqual(2, len(chkmap._root_node._items))
 
936
        self.assertEqual("k", chkmap._root_node._compute_search_prefix())
 
937
        # The actual nodes pointed at will change as serialisers change; so
 
938
        # here we test that the key prefix is correct; then load the nodes and
 
939
        # check they have the right pointed at key; whether they have the
 
940
        # pointed at value inline or not is also unrelated to this test so we
 
941
        # don't check that in detail - rather we just check the aggregate
 
942
        # value.
 
943
        nodes = sorted(chkmap._root_node._items.items())
 
944
        ptr1 = nodes[0]
 
945
        ptr2 = nodes[1]
 
946
        self.assertEqual('k1', ptr1[0])
 
947
        self.assertEqual('k2', ptr2[0])
 
948
        node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
 
949
        self.assertIsInstance(node1, LeafNode)
 
950
        self.assertEqual(1, len(node1))
 
951
        self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
 
952
        node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
 
953
        self.assertIsInstance(node2, LeafNode)
 
954
        self.assertEqual(1, len(node2))
 
955
        self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
 
956
        # Having checked we have a good structure, check that the content is
 
957
        # still accessible.
 
958
        self.assertEqual(2, len(chkmap))
 
959
        self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
 
960
            self.to_dict(chkmap))
 
961
 
 
962
    def test_init_root_is_LeafNode_new(self):
 
963
        chk_bytes = self.get_chk_bytes()
 
964
        chkmap = CHKMap(chk_bytes, None)
 
965
        self.assertIsInstance(chkmap._root_node, LeafNode)
 
966
        self.assertEqual({}, self.to_dict(chkmap))
 
967
        self.assertEqual(0, len(chkmap))
 
968
 
 
969
    def test_init_and_save_new(self):
 
970
        chk_bytes = self.get_chk_bytes()
 
971
        chkmap = CHKMap(chk_bytes, None)
 
972
        key = chkmap._save()
 
973
        leaf_node = LeafNode()
 
974
        self.assertEqual([key], leaf_node.serialise(chk_bytes))
 
975
 
 
976
    def test_map_first_item_new(self):
 
977
        chk_bytes = self.get_chk_bytes()
 
978
        chkmap = CHKMap(chk_bytes, None)
 
979
        chkmap.map(("foo,",), "bar")
 
980
        self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
 
981
        self.assertEqual(1, len(chkmap))
 
982
        key = chkmap._save()
 
983
        leaf_node = LeafNode()
 
984
        leaf_node.map(chk_bytes, ("foo,",), "bar")
 
985
        self.assertEqual([key], leaf_node.serialise(chk_bytes))
 
986
 
 
987
    def test_unmap_last_item_root_is_leaf_new(self):
 
988
        chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
 
989
        chkmap.unmap(("k1"*50,))
 
990
        chkmap.unmap(("k2"*50,))
 
991
        self.assertEqual(0, len(chkmap))
 
992
        self.assertEqual({}, self.to_dict(chkmap))
 
993
        key = chkmap._save()
 
994
        leaf_node = LeafNode()
 
995
        self.assertEqual([key], leaf_node.serialise(chkmap._store))
 
996
 
 
997
    def test__dump_tree(self):
 
998
        chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2",
 
999
                                ("bbb",): "value3",},
 
1000
                               maximum_size=15)
 
1001
        self.assertEqualDiff("'' InternalNode\n"
 
1002
                             "  'a' InternalNode\n"
 
1003
                             "    'aaa' LeafNode\n"
 
1004
                             "      ('aaa',) 'value1'\n"
 
1005
                             "    'aab' LeafNode\n"
 
1006
                             "      ('aab',) 'value2'\n"
 
1007
                             "  'b' LeafNode\n"
 
1008
                             "      ('bbb',) 'value3'\n",
 
1009
                             chkmap._dump_tree())
 
1010
        self.assertEqualDiff("'' InternalNode\n"
 
1011
                             "  'a' InternalNode\n"
 
1012
                             "    'aaa' LeafNode\n"
 
1013
                             "      ('aaa',) 'value1'\n"
 
1014
                             "    'aab' LeafNode\n"
 
1015
                             "      ('aab',) 'value2'\n"
 
1016
                             "  'b' LeafNode\n"
 
1017
                             "      ('bbb',) 'value3'\n",
 
1018
                             chkmap._dump_tree())
 
1019
        self.assertEqualDiff(
 
1020
            "'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
 
1021
            "  'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
 
1022
            "    'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
 
1023
            "      ('aaa',) 'value1'\n"
 
1024
            "    'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
 
1025
            "      ('aab',) 'value2'\n"
 
1026
            "  'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
 
1027
            "      ('bbb',) 'value3'\n",
 
1028
            chkmap._dump_tree(include_keys=True))
 
1029
 
 
1030
    def test__dump_tree_in_progress(self):
 
1031
        chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"},
 
1032
                               maximum_size=10)
 
1033
        chkmap.map(('bbb',), 'value3')
 
1034
        self.assertEqualDiff("'' InternalNode\n"
 
1035
                             "  'a' InternalNode\n"
 
1036
                             "    'aaa' LeafNode\n"
 
1037
                             "      ('aaa',) 'value1'\n"
 
1038
                             "    'aab' LeafNode\n"
 
1039
                             "      ('aab',) 'value2'\n"
 
1040
                             "  'b' LeafNode\n"
 
1041
                             "      ('bbb',) 'value3'\n",
 
1042
                             chkmap._dump_tree())
 
1043
        # For things that are updated by adding 'bbb', we don't have a sha key
 
1044
        # for them yet, so they are listed as None
 
1045
        self.assertEqualDiff(
 
1046
            "'' InternalNode None\n"
 
1047
            "  'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
 
1048
            "    'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
 
1049
            "      ('aaa',) 'value1'\n"
 
1050
            "    'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
 
1051
            "      ('aab',) 'value2'\n"
 
1052
            "  'b' LeafNode None\n"
 
1053
            "      ('bbb',) 'value3'\n",
 
1054
            chkmap._dump_tree(include_keys=True))
 
1055
 
 
1056
 
 
1057
def _search_key_single(key):
 
1058
    """A search key function that maps all nodes to the same value"""
 
1059
    return 'value'
 
1060
 
 
1061
def _test_search_key(key):
 
1062
    return 'test:' + '\x00'.join(key)
 
1063
 
 
1064
 
 
1065
class TestMapSearchKeys(TestCaseWithStore):
 
1066
 
 
1067
    def test_default_chk_map_uses_flat_search_key(self):
 
1068
        chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
 
1069
        self.assertEqual('1',
 
1070
                         chkmap._search_key_func(('1',)))
 
1071
        self.assertEqual('1\x002',
 
1072
                         chkmap._search_key_func(('1', '2')))
 
1073
        self.assertEqual('1\x002\x003',
 
1074
                         chkmap._search_key_func(('1', '2', '3')))
 
1075
 
 
1076
    def test_search_key_is_passed_to_root_node(self):
 
1077
        chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
 
1078
                                search_key_func=_test_search_key)
 
1079
        self.assertIs(_test_search_key, chkmap._search_key_func)
 
1080
        self.assertEqual('test:1\x002\x003',
 
1081
                         chkmap._search_key_func(('1', '2', '3')))
 
1082
        self.assertEqual('test:1\x002\x003',
 
1083
                         chkmap._root_node._search_key(('1', '2', '3')))
 
1084
 
 
1085
    def test_search_key_passed_via__ensure_root(self):
 
1086
        chk_bytes = self.get_chk_bytes()
 
1087
        chkmap = chk_map.CHKMap(chk_bytes, None,
 
1088
                                search_key_func=_test_search_key)
 
1089
        root_key = chkmap._save()
 
1090
        chkmap = chk_map.CHKMap(chk_bytes, root_key,
 
1091
                                search_key_func=_test_search_key)
 
1092
        chkmap._ensure_root()
 
1093
        self.assertEqual('test:1\x002\x003',
 
1094
                         chkmap._root_node._search_key(('1', '2', '3')))
 
1095
 
 
1096
    def test_search_key_with_internal_node(self):
 
1097
        chk_bytes = self.get_chk_bytes()
 
1098
        chkmap = chk_map.CHKMap(chk_bytes, None,
 
1099
                                search_key_func=_test_search_key)
 
1100
        chkmap._root_node.set_maximum_size(10)
 
1101
        chkmap.map(('1',), 'foo')
 
1102
        chkmap.map(('2',), 'bar')
 
1103
        chkmap.map(('3',), 'baz')
 
1104
        self.assertEqualDiff("'' InternalNode\n"
 
1105
                             "  'test:1' LeafNode\n"
 
1106
                             "      ('1',) 'foo'\n"
 
1107
                             "  'test:2' LeafNode\n"
 
1108
                             "      ('2',) 'bar'\n"
 
1109
                             "  'test:3' LeafNode\n"
 
1110
                             "      ('3',) 'baz'\n"
 
1111
                             , chkmap._dump_tree())
 
1112
        root_key = chkmap._save()
 
1113
        chkmap = chk_map.CHKMap(chk_bytes, root_key,
 
1114
                                search_key_func=_test_search_key)
 
1115
        self.assertEqualDiff("'' InternalNode\n"
 
1116
                             "  'test:1' LeafNode\n"
 
1117
                             "      ('1',) 'foo'\n"
 
1118
                             "  'test:2' LeafNode\n"
 
1119
                             "      ('2',) 'bar'\n"
 
1120
                             "  'test:3' LeafNode\n"
 
1121
                             "      ('3',) 'baz'\n"
 
1122
                             , chkmap._dump_tree())
 
1123
 
 
1124
    def test_search_key_16(self):
 
1125
        chk_bytes = self.get_chk_bytes()
 
1126
        chkmap = chk_map.CHKMap(chk_bytes, None,
 
1127
                                search_key_func=chk_map._search_key_16)
 
1128
        chkmap._root_node.set_maximum_size(10)
 
1129
        chkmap.map(('1',), 'foo')
 
1130
        chkmap.map(('2',), 'bar')
 
1131
        chkmap.map(('3',), 'baz')
 
1132
        self.assertEqualDiff("'' InternalNode\n"
 
1133
                             "  '1' LeafNode\n"
 
1134
                             "      ('2',) 'bar'\n"
 
1135
                             "  '6' LeafNode\n"
 
1136
                             "      ('3',) 'baz'\n"
 
1137
                             "  '8' LeafNode\n"
 
1138
                             "      ('1',) 'foo'\n"
 
1139
                             , chkmap._dump_tree())
 
1140
        root_key = chkmap._save()
 
1141
        chkmap = chk_map.CHKMap(chk_bytes, root_key,
 
1142
                                search_key_func=chk_map._search_key_16)
 
1143
        # We can get the values back correctly
 
1144
        self.assertEqual([(('1',), 'foo')],
 
1145
                         list(chkmap.iteritems([('1',)])))
 
1146
        self.assertEqualDiff("'' InternalNode\n"
 
1147
                             "  '1' LeafNode\n"
 
1148
                             "      ('2',) 'bar'\n"
 
1149
                             "  '6' LeafNode\n"
 
1150
                             "      ('3',) 'baz'\n"
 
1151
                             "  '8' LeafNode\n"
 
1152
                             "      ('1',) 'foo'\n"
 
1153
                             , chkmap._dump_tree())
 
1154
 
 
1155
    def test_search_key_255(self):
 
1156
        chk_bytes = self.get_chk_bytes()
 
1157
        chkmap = chk_map.CHKMap(chk_bytes, None,
 
1158
                                search_key_func=chk_map._search_key_255)
 
1159
        chkmap._root_node.set_maximum_size(10)
 
1160
        chkmap.map(('1',), 'foo')
 
1161
        chkmap.map(('2',), 'bar')
 
1162
        chkmap.map(('3',), 'baz')
 
1163
        self.assertEqualDiff("'' InternalNode\n"
 
1164
                             "  '\\x1a' LeafNode\n"
 
1165
                             "      ('2',) 'bar'\n"
 
1166
                             "  'm' LeafNode\n"
 
1167
                             "      ('3',) 'baz'\n"
 
1168
                             "  '\\x83' LeafNode\n"
 
1169
                             "      ('1',) 'foo'\n"
 
1170
                             , chkmap._dump_tree())
 
1171
        root_key = chkmap._save()
 
1172
        chkmap = chk_map.CHKMap(chk_bytes, root_key,
 
1173
                                search_key_func=chk_map._search_key_255)
 
1174
        # We can get the values back correctly
 
1175
        self.assertEqual([(('1',), 'foo')],
 
1176
                         list(chkmap.iteritems([('1',)])))
 
1177
        self.assertEqualDiff("'' InternalNode\n"
 
1178
                             "  '\\x1a' LeafNode\n"
 
1179
                             "      ('2',) 'bar'\n"
 
1180
                             "  'm' LeafNode\n"
 
1181
                             "      ('3',) 'baz'\n"
 
1182
                             "  '\\x83' LeafNode\n"
 
1183
                             "      ('1',) 'foo'\n"
 
1184
                             , chkmap._dump_tree())
 
1185
 
 
1186
    def test_search_key_collisions(self):
 
1187
        chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
 
1188
                                search_key_func=_search_key_single)
 
1189
        # The node will want to expand, but it cannot, because it knows that
 
1190
        # all the keys must map to this node
 
1191
        chkmap._root_node.set_maximum_size(20)
 
1192
        chkmap.map(('1',), 'foo')
 
1193
        chkmap.map(('2',), 'bar')
 
1194
        chkmap.map(('3',), 'baz')
 
1195
        self.assertEqualDiff("'' LeafNode\n"
 
1196
                             "      ('1',) 'foo'\n"
 
1197
                             "      ('2',) 'bar'\n"
 
1198
                             "      ('3',) 'baz'\n"
 
1199
                             , chkmap._dump_tree())
 
1200
 
 
1201
 
 
1202
class TestSearchKeyFuncs(tests.TestCase):
 
1203
 
 
1204
    def assertSearchKey16(self, expected, key):
 
1205
        self.assertEqual(expected, chk_map._search_key_16(key))
 
1206
 
 
1207
    def assertSearchKey255(self, expected, key):
 
1208
        actual = chk_map._search_key_255(key)
 
1209
        self.assertEqual(expected, actual, 'actual: %r' % (actual,))
 
1210
 
 
1211
    def test_simple_16(self):
 
1212
        self.assertSearchKey16('8C736521', ('foo',))
 
1213
        self.assertSearchKey16('8C736521\x008C736521', ('foo', 'foo'))
 
1214
        self.assertSearchKey16('8C736521\x0076FF8CAA', ('foo', 'bar'))
 
1215
        self.assertSearchKey16('ED82CD11', ('abcd',))
 
1216
 
 
1217
    def test_simple_255(self):
 
1218
        self.assertSearchKey255('\x8cse!', ('foo',))
 
1219
        self.assertSearchKey255('\x8cse!\x00\x8cse!', ('foo', 'foo'))
 
1220
        self.assertSearchKey255('\x8cse!\x00v\xff\x8c\xaa', ('foo', 'bar'))
 
1221
        # The standard mapping for these would include '\n', so it should be
 
1222
        # mapped to '_'
 
1223
        self.assertSearchKey255('\xfdm\x93_\x00P_\x1bL', ('<', 'V'))
 
1224
 
 
1225
    def test_255_does_not_include_newline(self):
 
1226
        # When mapping via _search_key_255, we should never have the '\n'
 
1227
        # character, but all other 255 values should be present
 
1228
        chars_used = set()
 
1229
        for char_in in range(256):
 
1230
            search_key = chk_map._search_key_255((chr(char_in),))
 
1231
            chars_used.update(search_key)
 
1232
        all_chars = set([chr(x) for x in range(256)])
 
1233
        unused_chars = all_chars.symmetric_difference(chars_used)
 
1234
        self.assertEqual(set('\n'), unused_chars)
 
1235
 
 
1236
 
 
1237
class TestLeafNode(TestCaseWithStore):
 
1238
 
 
1239
    def test_current_size_empty(self):
 
1240
        node = LeafNode()
 
1241
        self.assertEqual(16, node._current_size())
 
1242
 
 
1243
    def test_current_size_size_changed(self):
 
1244
        node = LeafNode()
 
1245
        node.set_maximum_size(10)
 
1246
        self.assertEqual(17, node._current_size())
 
1247
 
 
1248
    def test_current_size_width_changed(self):
 
1249
        node = LeafNode()
 
1250
        node._key_width = 10
 
1251
        self.assertEqual(17, node._current_size())
 
1252
 
 
1253
    def test_current_size_items(self):
 
1254
        node = LeafNode()
 
1255
        base_size = node._current_size()
 
1256
        node.map(None, ("foo bar",), "baz")
 
1257
        self.assertEqual(base_size + 14, node._current_size())
 
1258
 
 
1259
    def test_deserialise_empty(self):
 
1260
        node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
 
1261
        self.assertEqual(0, len(node))
 
1262
        self.assertEqual(10, node.maximum_size)
 
1263
        self.assertEqual(("sha1:1234",), node.key())
 
1264
        self.assertIs(None, node._search_prefix)
 
1265
        self.assertIs(None, node._common_serialised_prefix)
 
1266
 
 
1267
    def test_deserialise_items(self):
 
1268
        node = LeafNode.deserialise(
 
1269
            "chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
 
1270
            ("sha1:1234",))
 
1271
        self.assertEqual(2, len(node))
 
1272
        self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
 
1273
            sorted(node.iteritems(None)))
 
1274
 
 
1275
    def test_deserialise_item_with_null_width_1(self):
 
1276
        node = LeafNode.deserialise(
 
1277
            "chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
 
1278
            ("sha1:1234",))
 
1279
        self.assertEqual(2, len(node))
 
1280
        self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
 
1281
            sorted(node.iteritems(None)))
 
1282
 
 
1283
    def test_deserialise_item_with_null_width_2(self):
 
1284
        node = LeafNode.deserialise(
 
1285
            "chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
 
1286
            "quux\x00\x001\nblarh\n",
 
1287
            ("sha1:1234",))
 
1288
        self.assertEqual(2, len(node))
 
1289
        self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
 
1290
            sorted(node.iteritems(None)))
 
1291
 
 
1292
    def test_iteritems_selected_one_of_two_items(self):
 
1293
        node = LeafNode.deserialise(
 
1294
            "chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
 
1295
            ("sha1:1234",))
 
1296
        self.assertEqual(2, len(node))
 
1297
        self.assertEqual([(("quux",), "blarh")],
 
1298
            sorted(node.iteritems(None, [("quux",), ("qaz",)])))
 
1299
 
 
1300
    def test_deserialise_item_with_common_prefix(self):
 
1301
        node = LeafNode.deserialise(
 
1302
            "chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
 
1303
            ("sha1:1234",))
 
1304
        self.assertEqual(2, len(node))
 
1305
        self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")],
 
1306
            sorted(node.iteritems(None)))
 
1307
        self.assertIs(chk_map._unknown, node._search_prefix)
 
1308
        self.assertEqual('foo\x00', node._common_serialised_prefix)
 
1309
 
 
1310
    def test_deserialise_multi_line(self):
 
1311
        node = LeafNode.deserialise(
 
1312
            "chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
 
1313
            ("sha1:1234",))
 
1314
        self.assertEqual(2, len(node))
 
1315
        self.assertEqual([(("foo", "1"), "bar\nbaz"),
 
1316
                          (("foo", "2"), "blarh\n"),
 
1317
                         ], sorted(node.iteritems(None)))
 
1318
        self.assertIs(chk_map._unknown, node._search_prefix)
 
1319
        self.assertEqual('foo\x00', node._common_serialised_prefix)
 
1320
 
 
1321
    def test_key_new(self):
 
1322
        node = LeafNode()
 
1323
        self.assertEqual(None, node.key())
 
1324
 
 
1325
    def test_key_after_map(self):
 
1326
        node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
 
1327
        node.map(None, ("foo bar",), "baz quux")
 
1328
        self.assertEqual(None, node.key())
 
1329
 
 
1330
    def test_key_after_unmap(self):
 
1331
        node = LeafNode.deserialise(
 
1332
            "chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
 
1333
            ("sha1:1234",))
 
1334
        node.unmap(None, ("foo bar",))
 
1335
        self.assertEqual(None, node.key())
 
1336
 
 
1337
    def test_map_exceeding_max_size_only_entry_new(self):
 
1338
        node = LeafNode()
 
1339
        node.set_maximum_size(10)
 
1340
        result = node.map(None, ("foo bar",), "baz quux")
 
1341
        self.assertEqual(("foo bar", [("", node)]), result)
 
1342
        self.assertTrue(10 < node._current_size())
 
1343
 
 
1344
    def test_map_exceeding_max_size_second_entry_early_difference_new(self):
 
1345
        node = LeafNode()
 
1346
        node.set_maximum_size(10)
 
1347
        node.map(None, ("foo bar",), "baz quux")
 
1348
        prefix, result = list(node.map(None, ("blue",), "red"))
 
1349
        self.assertEqual("", prefix)
 
1350
        self.assertEqual(2, len(result))
 
1351
        split_chars = set([result[0][0], result[1][0]])
 
1352
        self.assertEqual(set(["f", "b"]), split_chars)
 
1353
        nodes = dict(result)
 
1354
        node = nodes["f"]
 
1355
        self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
 
1356
        self.assertEqual(10, node.maximum_size)
 
1357
        self.assertEqual(1, node._key_width)
 
1358
        node = nodes["b"]
 
1359
        self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
 
1360
        self.assertEqual(10, node.maximum_size)
 
1361
        self.assertEqual(1, node._key_width)
 
1362
 
 
1363
    def test_map_first(self):
 
1364
        node = LeafNode()
 
1365
        result = node.map(None, ("foo bar",), "baz quux")
 
1366
        self.assertEqual(("foo bar", [("", node)]), result)
 
1367
        self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
 
1368
        self.assertEqual(1, len(node))
 
1369
 
 
1370
    def test_map_second(self):
 
1371
        node = LeafNode()
 
1372
        node.map(None, ("foo bar",), "baz quux")
 
1373
        result = node.map(None, ("bingo",), "bango")
 
1374
        self.assertEqual(("", [("", node)]), result)
 
1375
        self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
 
1376
            self.to_dict(node, None))
 
1377
        self.assertEqual(2, len(node))
 
1378
 
 
1379
    def test_map_replacement(self):
 
1380
        node = LeafNode()
 
1381
        node.map(None, ("foo bar",), "baz quux")
 
1382
        result = node.map(None, ("foo bar",), "bango")
 
1383
        self.assertEqual(("foo bar", [("", node)]), result)
 
1384
        self.assertEqual({("foo bar",): "bango"},
 
1385
            self.to_dict(node, None))
 
1386
        self.assertEqual(1, len(node))
 
1387
 
 
1388
    def test_serialise_empty(self):
 
1389
        store = self.get_chk_bytes()
 
1390
        node = LeafNode()
 
1391
        node.set_maximum_size(10)
 
1392
        expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
 
1393
        self.assertEqual([expected_key],
 
1394
            list(node.serialise(store)))
 
1395
        self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
 
1396
        self.assertEqual(expected_key, node.key())
 
1397
 
 
1398
    def test_serialise_items(self):
 
1399
        store = self.get_chk_bytes()
 
1400
        node = LeafNode()
 
1401
        node.set_maximum_size(10)
 
1402
        node.map(None, ("foo bar",), "baz quux")
 
1403
        expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
 
1404
        self.assertEqual('foo bar', node._common_serialised_prefix)
 
1405
        self.assertEqual([expected_key],
 
1406
            list(node.serialise(store)))
 
1407
        self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
 
1408
            self.read_bytes(store, expected_key))
 
1409
        self.assertEqual(expected_key, node.key())
 
1410
 
 
1411
    def test_unique_serialised_prefix_empty_new(self):
 
1412
        node = LeafNode()
 
1413
        self.assertIs(None, node._compute_search_prefix())
 
1414
 
 
1415
    def test_unique_serialised_prefix_one_item_new(self):
 
1416
        node = LeafNode()
 
1417
        node.map(None, ("foo bar", "baz"), "baz quux")
 
1418
        self.assertEqual("foo bar\x00baz", node._compute_search_prefix())
 
1419
 
 
1420
    def test_unmap_missing(self):
 
1421
        node = LeafNode()
 
1422
        self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
 
1423
 
 
1424
    def test_unmap_present(self):
 
1425
        node = LeafNode()
 
1426
        node.map(None, ("foo bar",), "baz quux")
 
1427
        result = node.unmap(None, ("foo bar",))
 
1428
        self.assertEqual(node, result)
 
1429
        self.assertEqual({}, self.to_dict(node, None))
 
1430
        self.assertEqual(0, len(node))
 
1431
 
 
1432
    def test_map_maintains_common_prefixes(self):
 
1433
        node = LeafNode()
 
1434
        node._key_width = 2
 
1435
        node.map(None, ("foo bar", "baz"), "baz quux")
 
1436
        self.assertEqual('foo bar\x00baz', node._search_prefix)
 
1437
        self.assertEqual('foo bar\x00baz', node._common_serialised_prefix)
 
1438
        node.map(None, ("foo bar", "bing"), "baz quux")
 
1439
        self.assertEqual('foo bar\x00b', node._search_prefix)
 
1440
        self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
 
1441
        node.map(None, ("fool", "baby"), "baz quux")
 
1442
        self.assertEqual('foo', node._search_prefix)
 
1443
        self.assertEqual('foo', node._common_serialised_prefix)
 
1444
        node.map(None, ("foo bar", "baz"), "replaced")
 
1445
        self.assertEqual('foo', node._search_prefix)
 
1446
        self.assertEqual('foo', node._common_serialised_prefix)
 
1447
        node.map(None, ("very", "different"), "value")
 
1448
        self.assertEqual('', node._search_prefix)
 
1449
        self.assertEqual('', node._common_serialised_prefix)
 
1450
 
 
1451
    def test_unmap_maintains_common_prefixes(self):
 
1452
        node = LeafNode()
 
1453
        node._key_width = 2
 
1454
        node.map(None, ("foo bar", "baz"), "baz quux")
 
1455
        node.map(None, ("foo bar", "bing"), "baz quux")
 
1456
        node.map(None, ("fool", "baby"), "baz quux")
 
1457
        node.map(None, ("very", "different"), "value")
 
1458
        self.assertEqual('', node._search_prefix)
 
1459
        self.assertEqual('', node._common_serialised_prefix)
 
1460
        node.unmap(None, ("very", "different"))
 
1461
        self.assertEqual("foo", node._search_prefix)
 
1462
        self.assertEqual("foo", node._common_serialised_prefix)
 
1463
        node.unmap(None, ("fool", "baby"))
 
1464
        self.assertEqual('foo bar\x00b', node._search_prefix)
 
1465
        self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
 
1466
        node.unmap(None, ("foo bar", "baz"))
 
1467
        self.assertEqual('foo bar\x00bing', node._search_prefix)
 
1468
        self.assertEqual('foo bar\x00bing', node._common_serialised_prefix)
 
1469
        node.unmap(None, ("foo bar", "bing"))
 
1470
        self.assertEqual(None, node._search_prefix)
 
1471
        self.assertEqual(None, node._common_serialised_prefix)
 
1472
 
 
1473
 
 
1474
class TestInternalNode(TestCaseWithStore):
 
1475
 
 
1476
    def test_add_node_empty_new(self):
 
1477
        node = InternalNode('fo')
 
1478
        child = LeafNode()
 
1479
        child.set_maximum_size(100)
 
1480
        child.map(None, ("foo",), "bar")
 
1481
        node.add_node("foo", child)
 
1482
        # Note that node isn't strictly valid now as a tree (only one child),
 
1483
        # but thats ok for this test.
 
1484
        # The first child defines the node's width:
 
1485
        self.assertEqual(3, node._node_width)
 
1486
        # We should be able to iterate over the contents without doing IO.
 
1487
        self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
 
1488
        # The length should be known:
 
1489
        self.assertEqual(1, len(node))
 
1490
        # serialising the node should serialise the child and the node.
 
1491
        chk_bytes = self.get_chk_bytes()
 
1492
        keys = list(node.serialise(chk_bytes))
 
1493
        child_key = child.serialise(chk_bytes)[0]
 
1494
        self.assertEqual(
 
1495
            [child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
 
1496
            keys)
 
1497
        # We should be able to access deserialised content.
 
1498
        bytes = self.read_bytes(chk_bytes, keys[1])
 
1499
        node = chk_map._deserialise(bytes, keys[1], None)
 
1500
        self.assertEqual(1, len(node))
 
1501
        self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
 
1502
        self.assertEqual(3, node._node_width)
 
1503
 
 
1504
    def test_add_node_resets_key_new(self):
 
1505
        node = InternalNode('fo')
 
1506
        child = LeafNode()
 
1507
        child.set_maximum_size(100)
 
1508
        child.map(None, ("foo",), "bar")
 
1509
        node.add_node("foo", child)
 
1510
        chk_bytes = self.get_chk_bytes()
 
1511
        keys = list(node.serialise(chk_bytes))
 
1512
        self.assertEqual(keys[1], node._key)
 
1513
        node.add_node("fos", child)
 
1514
        self.assertEqual(None, node._key)
 
1515
 
 
1516
#    def test_add_node_empty_oversized_one_ok_new(self):
 
1517
#    def test_add_node_one_oversized_second_kept_minimum_fan(self):
 
1518
#    def test_add_node_two_oversized_third_kept_minimum_fan(self):
 
1519
#    def test_add_node_one_oversized_second_splits_errors(self):
 
1520
 
 
1521
    def test__iter_nodes_no_key_filter(self):
 
1522
        node = InternalNode('')
 
1523
        child = LeafNode()
 
1524
        child.set_maximum_size(100)
 
1525
        child.map(None, ("foo",), "bar")
 
1526
        node.add_node("f", child)
 
1527
        child = LeafNode()
 
1528
        child.set_maximum_size(100)
 
1529
        child.map(None, ("bar",), "baz")
 
1530
        node.add_node("b", child)
 
1531
 
 
1532
        for child, node_key_filter in node._iter_nodes(None, key_filter=None):
 
1533
            self.assertEqual(None, node_key_filter)
 
1534
 
 
1535
    def test__iter_nodes_splits_key_filter(self):
 
1536
        node = InternalNode('')
 
1537
        child = LeafNode()
 
1538
        child.set_maximum_size(100)
 
1539
        child.map(None, ("foo",), "bar")
 
1540
        node.add_node("f", child)
 
1541
        child = LeafNode()
 
1542
        child.set_maximum_size(100)
 
1543
        child.map(None, ("bar",), "baz")
 
1544
        node.add_node("b", child)
 
1545
 
 
1546
        # foo and bar both match exactly one leaf node, but 'cat' should not
 
1547
        # match any, and should not be placed in one.
 
1548
        key_filter = (('foo',), ('bar',), ('cat',))
 
1549
        for child, node_key_filter in node._iter_nodes(None,
 
1550
                                                       key_filter=key_filter):
 
1551
            # each child could only match one key filter, so make sure it was
 
1552
            # properly filtered
 
1553
            self.assertEqual(1, len(node_key_filter))
 
1554
 
 
1555
    def test__iter_nodes_with_multiple_matches(self):
 
1556
        node = InternalNode('')
 
1557
        child = LeafNode()
 
1558
        child.set_maximum_size(100)
 
1559
        child.map(None, ("foo",), "val")
 
1560
        child.map(None, ("fob",), "val")
 
1561
        node.add_node("f", child)
 
1562
        child = LeafNode()
 
1563
        child.set_maximum_size(100)
 
1564
        child.map(None, ("bar",), "val")
 
1565
        child.map(None, ("baz",), "val")
 
1566
        node.add_node("b", child)
 
1567
 
 
1568
        # Note that 'ram' doesn't match anything, so it should be freely
 
1569
        # ignored
 
1570
        key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
 
1571
        for child, node_key_filter in node._iter_nodes(None,
 
1572
                                                       key_filter=key_filter):
 
1573
            # each child could match two key filters, so make sure they were
 
1574
            # both included.
 
1575
            self.assertEqual(2, len(node_key_filter))
 
1576
 
 
1577
    def make_fo_fa_node(self):
 
1578
        node = InternalNode('f')
 
1579
        child = LeafNode()
 
1580
        child.set_maximum_size(100)
 
1581
        child.map(None, ("foo",), "val")
 
1582
        child.map(None, ("fob",), "val")
 
1583
        node.add_node('fo', child)
 
1584
        child = LeafNode()
 
1585
        child.set_maximum_size(100)
 
1586
        child.map(None, ("far",), "val")
 
1587
        child.map(None, ("faz",), "val")
 
1588
        node.add_node("fa", child)
 
1589
        return node
 
1590
 
 
1591
    def test__iter_nodes_single_entry(self):
 
1592
        node = self.make_fo_fa_node()
 
1593
        key_filter = [('foo',)]
 
1594
        nodes = list(node._iter_nodes(None, key_filter=key_filter))
 
1595
        self.assertEqual(1, len(nodes))
 
1596
        self.assertEqual(key_filter, nodes[0][1])
 
1597
 
 
1598
    def test__iter_nodes_single_entry_misses(self):
 
1599
        node = self.make_fo_fa_node()
 
1600
        key_filter = [('bar',)]
 
1601
        nodes = list(node._iter_nodes(None, key_filter=key_filter))
 
1602
        self.assertEqual(0, len(nodes))
 
1603
 
 
1604
    def test__iter_nodes_mixed_key_width(self):
 
1605
        node = self.make_fo_fa_node()
 
1606
        key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
 
1607
        nodes = list(node._iter_nodes(None, key_filter=key_filter))
 
1608
        self.assertEqual(1, len(nodes))
 
1609
        matches = key_filter[:]
 
1610
        matches.remove(('b',))
 
1611
        self.assertEqual(sorted(matches), sorted(nodes[0][1]))
 
1612
 
 
1613
    def test__iter_nodes_match_all(self):
 
1614
        node = self.make_fo_fa_node()
 
1615
        key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
 
1616
        nodes = list(node._iter_nodes(None, key_filter=key_filter))
 
1617
        self.assertEqual(2, len(nodes))
 
1618
 
 
1619
    def test__iter_nodes_fixed_widths_and_misses(self):
 
1620
        node = self.make_fo_fa_node()
 
1621
        # foo and faa should both match one child, baz should miss
 
1622
        key_filter = [('foo',), ('faa',), ('baz',)]
 
1623
        nodes = list(node._iter_nodes(None, key_filter=key_filter))
 
1624
        self.assertEqual(2, len(nodes))
 
1625
        for node, matches in nodes:
 
1626
            self.assertEqual(1, len(matches))
 
1627
 
 
1628
    def test_iteritems_empty_new(self):
 
1629
        node = InternalNode()
 
1630
        self.assertEqual([], sorted(node.iteritems(None)))
 
1631
 
 
1632
    def test_iteritems_two_children(self):
 
1633
        node = InternalNode()
 
1634
        leaf1 = LeafNode()
 
1635
        leaf1.map(None, ('foo bar',), 'quux')
 
1636
        leaf2 = LeafNode()
 
1637
        leaf2.map(None, ('strange',), 'beast')
 
1638
        node.add_node("f", leaf1)
 
1639
        node.add_node("s", leaf2)
 
1640
        self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
 
1641
            sorted(node.iteritems(None)))
 
1642
 
 
1643
    def test_iteritems_two_children_partial(self):
 
1644
        node = InternalNode()
 
1645
        leaf1 = LeafNode()
 
1646
        leaf1.map(None, ('foo bar',), 'quux')
 
1647
        leaf2 = LeafNode()
 
1648
        leaf2.map(None, ('strange',), 'beast')
 
1649
        node.add_node("f", leaf1)
 
1650
        # This sets up a path that should not be followed - it will error if
 
1651
        # the code tries to.
 
1652
        node._items['f'] = None
 
1653
        node.add_node("s", leaf2)
 
1654
        self.assertEqual([(('strange',), 'beast')],
 
1655
            sorted(node.iteritems(None, [('strange',), ('weird',)])))
 
1656
 
 
1657
    def test_iteritems_two_children_with_hash(self):
 
1658
        search_key_func = chk_map.search_key_registry.get('hash-255-way')
 
1659
        node = InternalNode(search_key_func=search_key_func)
 
1660
        leaf1 = LeafNode(search_key_func=search_key_func)
 
1661
        leaf1.map(None, ('foo bar',), 'quux')
 
1662
        leaf2 = LeafNode(search_key_func=search_key_func)
 
1663
        leaf2.map(None, ('strange',), 'beast')
 
1664
        self.assertEqual('\xbeF\x014', search_key_func(('foo bar',)))
 
1665
        self.assertEqual('\x85\xfa\xf7K', search_key_func(('strange',)))
 
1666
        node.add_node("\xbe", leaf1)
 
1667
        # This sets up a path that should not be followed - it will error if
 
1668
        # the code tries to.
 
1669
        node._items['\xbe'] = None
 
1670
        node.add_node("\x85", leaf2)
 
1671
        self.assertEqual([(('strange',), 'beast')],
 
1672
            sorted(node.iteritems(None, [('strange',), ('weird',)])))
 
1673
 
 
1674
    def test_iteritems_partial_empty(self):
 
1675
        node = InternalNode()
 
1676
        self.assertEqual([], sorted(node.iteritems([('missing',)])))
 
1677
 
 
1678
    def test_map_to_new_child_new(self):
 
1679
        chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
 
1680
        chkmap._ensure_root()
 
1681
        node = chkmap._root_node
 
1682
        # Ensure test validity: nothing paged in below the root.
 
1683
        self.assertEqual(2,
 
1684
            len([value for value in node._items.values()
 
1685
                if type(value) == tuple]))
 
1686
        # now, mapping to k3 should add a k3 leaf
 
1687
        prefix, nodes = node.map(None, ('k3',), 'quux')
 
1688
        self.assertEqual("k", prefix)
 
1689
        self.assertEqual([("", node)], nodes)
 
1690
        # check new child details
 
1691
        child = node._items['k3']
 
1692
        self.assertIsInstance(child, LeafNode)
 
1693
        self.assertEqual(1, len(child))
 
1694
        self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
 
1695
        self.assertEqual(None, child._key)
 
1696
        self.assertEqual(10, child.maximum_size)
 
1697
        self.assertEqual(1, child._key_width)
 
1698
        # Check overall structure:
 
1699
        self.assertEqual(3, len(chkmap))
 
1700
        self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
 
1701
            self.to_dict(chkmap))
 
1702
        # serialising should only serialise the new data - k3 and the internal
 
1703
        # node.
 
1704
        keys = list(node.serialise(chkmap._store))
 
1705
        child_key = child.serialise(chkmap._store)[0]
 
1706
        self.assertEqual([child_key, keys[1]], keys)
 
1707
 
 
1708
    def test_map_to_child_child_splits_new(self):
 
1709
        chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
 
1710
        # Check for the canonical root value for this tree:
 
1711
        self.assertEqualDiff("'' InternalNode\n"
 
1712
                             "  'k1' LeafNode\n"
 
1713
                             "      ('k1',) 'foo'\n"
 
1714
                             "  'k2' LeafNode\n"
 
1715
                             "      ('k22',) 'bar'\n"
 
1716
                             , chkmap._dump_tree())
 
1717
        # _dump_tree pages everything in, so reload using just the root
 
1718
        chkmap = CHKMap(chkmap._store, chkmap._root_node)
 
1719
        chkmap._ensure_root()
 
1720
        node = chkmap._root_node
 
1721
        # Ensure test validity: nothing paged in below the root.
 
1722
        self.assertEqual(2,
 
1723
            len([value for value in node._items.values()
 
1724
                if type(value) == tuple]))
 
1725
        # now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
 
1726
        # k23, which for simplicity in the current implementation generates
 
1727
        # a new internal node between node, and k22/k23.
 
1728
        prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
 
1729
        self.assertEqual("k", prefix)
 
1730
        self.assertEqual([("", node)], nodes)
 
1731
        # check new child details
 
1732
        child = node._items['k2']
 
1733
        self.assertIsInstance(child, InternalNode)
 
1734
        self.assertEqual(2, len(child))
 
1735
        self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
 
1736
            self.to_dict(child, None))
 
1737
        self.assertEqual(None, child._key)
 
1738
        self.assertEqual(10, child.maximum_size)
 
1739
        self.assertEqual(1, child._key_width)
 
1740
        self.assertEqual(3, child._node_width)
 
1741
        # Check overall structure:
 
1742
        self.assertEqual(3, len(chkmap))
 
1743
        self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
 
1744
            self.to_dict(chkmap))
 
1745
        # serialising should only serialise the new data - although k22 hasn't
 
1746
        # changed because its a special corner case (splitting on with only one
 
1747
        # key leaves one node unaltered), in general k22 is serialised, so we
 
1748
        # expect k22, k23, the new internal node, and node, to be serialised.
 
1749
        keys = list(node.serialise(chkmap._store))
 
1750
        child_key = child._key
 
1751
        k22_key = child._items['k22']._key
 
1752
        k23_key = child._items['k23']._key
 
1753
        self.assertEqual([k22_key, k23_key, child_key, node.key()], keys)
 
1754
        self.assertEqualDiff("'' InternalNode\n"
 
1755
                             "  'k1' LeafNode\n"
 
1756
                             "      ('k1',) 'foo'\n"
 
1757
                             "  'k2' InternalNode\n"
 
1758
                             "    'k22' LeafNode\n"
 
1759
                             "      ('k22',) 'bar'\n"
 
1760
                             "    'k23' LeafNode\n"
 
1761
                             "      ('k23',) 'quux'\n"
 
1762
                             , chkmap._dump_tree())
 
1763
 
 
1764
    def test__search_prefix_filter_with_hash(self):
 
1765
        search_key_func = chk_map.search_key_registry.get('hash-16-way')
 
1766
        node = InternalNode(search_key_func=search_key_func)
 
1767
        node._key_width = 2
 
1768
        node._node_width = 4
 
1769
        self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
 
1770
        self.assertEqual('E8B7', node._search_prefix_filter(('a', 'b')))
 
1771
        self.assertEqual('E8B7', node._search_prefix_filter(('a',)))
 
1772
 
 
1773
    def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
 
1774
        chkmap = self._get_map(
 
1775
            {('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
 
1776
        # Check we have the expected tree.
 
1777
        self.assertEqualDiff("'' InternalNode\n"
 
1778
                             "  'k1' LeafNode\n"
 
1779
                             "      ('k1',) 'foo'\n"
 
1780
                             "  'k2' InternalNode\n"
 
1781
                             "    'k22' LeafNode\n"
 
1782
                             "      ('k22',) 'bar'\n"
 
1783
                             "    'k23' LeafNode\n"
 
1784
                             "      ('k23',) 'quux'\n"
 
1785
                             , chkmap._dump_tree())
 
1786
        chkmap = CHKMap(chkmap._store, chkmap._root_node)
 
1787
        chkmap._ensure_root()
 
1788
        node = chkmap._root_node
 
1789
        # unmapping k23 should give us a root, with k1 and k22 as direct
 
1790
        # children.
 
1791
        result = node.unmap(chkmap._store, ('k23',))
 
1792
        # check the pointed-at object within node - k2 should now point at the
 
1793
        # k22 leaf (which has been paged in to see if we can collapse the tree)
 
1794
        child = node._items['k2']
 
1795
        self.assertIsInstance(child, LeafNode)
 
1796
        self.assertEqual(1, len(child))
 
1797
        self.assertEqual({('k22',): 'bar'},
 
1798
            self.to_dict(child, None))
 
1799
        # Check overall structure is instact:
 
1800
        self.assertEqual(2, len(chkmap))
 
1801
        self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
 
1802
            self.to_dict(chkmap))
 
1803
        # serialising should only serialise the new data - the root node.
 
1804
        keys = list(node.serialise(chkmap._store))
 
1805
        self.assertEqual([keys[-1]], keys)
 
1806
        chkmap = CHKMap(chkmap._store, keys[-1])
 
1807
        self.assertEqualDiff("'' InternalNode\n"
 
1808
                             "  'k1' LeafNode\n"
 
1809
                             "      ('k1',) 'foo'\n"
 
1810
                             "  'k2' LeafNode\n"
 
1811
                             "      ('k22',) 'bar'\n"
 
1812
                             , chkmap._dump_tree())
 
1813
 
 
1814
    def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
 
1815
        chkmap = self._get_map(
 
1816
            {('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
 
1817
        self.assertEqualDiff("'' InternalNode\n"
 
1818
                             "  'k1' LeafNode\n"
 
1819
                             "      ('k1',) 'foo'\n"
 
1820
                             "  'k2' InternalNode\n"
 
1821
                             "    'k22' LeafNode\n"
 
1822
                             "      ('k22',) 'bar'\n"
 
1823
                             "    'k23' LeafNode\n"
 
1824
                             "      ('k23',) 'quux'\n"
 
1825
                             , chkmap._dump_tree())
 
1826
        orig_root = chkmap._root_node
 
1827
        chkmap = CHKMap(chkmap._store, orig_root)
 
1828
        chkmap._ensure_root()
 
1829
        node = chkmap._root_node
 
1830
        k2_ptr = node._items['k2']
 
1831
        # unmapping k1 should give us a root, with k22 and k23 as direct
 
1832
        # children, and should not have needed to page in the subtree.
 
1833
        result = node.unmap(chkmap._store, ('k1',))
 
1834
        self.assertEqual(k2_ptr, result)
 
1835
        chkmap = CHKMap(chkmap._store, orig_root)
 
1836
        # Unmapping at the CHKMap level should switch to the new root
 
1837
        chkmap.unmap(('k1',))
 
1838
        self.assertEqual(k2_ptr, chkmap._root_node)
 
1839
        self.assertEqualDiff("'' InternalNode\n"
 
1840
                             "  'k22' LeafNode\n"
 
1841
                             "      ('k22',) 'bar'\n"
 
1842
                             "  'k23' LeafNode\n"
 
1843
                             "      ('k23',) 'quux'\n"
 
1844
                             , chkmap._dump_tree())
 
1845
 
 
1846
 
 
1847
# leaf:
 
1848
# map -> fits - done
 
1849
# map -> doesn't fit - shrink from left till fits
 
1850
#        key data to return: the common prefix, new nodes.
 
1851
 
 
1852
# unmap -> how to tell if siblings can be combined.
 
1853
#          combing leaf nodes means expanding the prefix to the left; so gather the size of
 
1854
#          all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
 
1855
#          is an internal node, we know that that is a dense subtree - can't combine.
 
1856
#          otherwise as soon as the sum of serialised values exceeds the split threshold
 
1857
#          we know we can't combine - stop.
 
1858
# unmap -> key return data - space in node, common prefix length? and key count
 
1859
# internal:
 
1860
# variable length prefixes? -> later start with fixed width to get something going
 
1861
# map -> fits - update pointer to leaf
 
1862
#        return [prefix and node] - seems sound.
 
1863
# map -> doesn't fit - find unique prefix and shift right
 
1864
#        create internal nodes for all the partitions, return list of unique
 
1865
#        prefixes and nodes.
 
1866
# map -> new prefix - create a leaf
 
1867
# unmap -> if child key count 0, remove
 
1868
# unmap -> return space in node, common prefix length? (why?), key count
 
1869
# map:
 
1870
# map, if 1 node returned, use it, otherwise make an internal and populate.
 
1871
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
 
1872
# code)
 
1873
# map inits as empty leafnode.
 
1874
# tools:
 
1875
# visualiser
 
1876
 
 
1877
 
 
1878
# how to handle:
 
1879
# AA, AB, AC, AD, BA
 
1880
# packed internal node - ideal:
 
1881
# AA, AB, AC, AD, BA
 
1882
# single byte fanout - A,B,   AA,AB,AC,AD,     BA
 
1883
# build order's:
 
1884
# BA
 
1885
# AB - split, but we want to end up with AB, BA, in one node, with
 
1886
# 1-4K get0
 
1887
 
 
1888
 
 
1889
class TestIterInterestingNodes(TestCaseWithStore):
 
1890
 
 
1891
    def get_chk_bytes(self):
 
1892
        if getattr(self, '_chk_bytes', None) is None:
 
1893
            self._chk_bytes = super(TestIterInterestingNodes,
 
1894
                                    self).get_chk_bytes()
 
1895
        return self._chk_bytes
 
1896
 
 
1897
    def get_map_key(self, a_dict, maximum_size=10):
 
1898
        c_map = self._get_map(a_dict, maximum_size=maximum_size,
 
1899
                              chk_bytes=self.get_chk_bytes())
 
1900
        return c_map.key()
 
1901
 
 
1902
    def assertIterInteresting(self, expected, interesting_keys,
 
1903
                              uninteresting_keys):
 
1904
        """Check the result of iter_interesting_nodes.
 
1905
 
 
1906
        :param expected: A list of (record_keys, interesting_chk_pages,
 
1907
                                    interesting key value pairs)
 
1908
        """
 
1909
        store = self.get_chk_bytes()
 
1910
        iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
 
1911
                                                    uninteresting_keys)
 
1912
        nodes = list(iter_nodes)
 
1913
        for count, (exp, act) in enumerate(izip(expected, nodes)):
 
1914
            exp_record, exp_items = exp
 
1915
            record, items = act
 
1916
            exp_tuple = (exp_record, sorted(exp_items))
 
1917
            if record is None:
 
1918
                act_tuple = (None, sorted(items))
 
1919
            else:
 
1920
                act_tuple = (record.key, sorted(items))
 
1921
            self.assertEqual(exp_tuple, act_tuple,
 
1922
                             'entry %d did not match expected' % count)
 
1923
        self.assertEqual(len(expected), len(nodes))
 
1924
 
 
1925
    def test_empty_to_one_keys(self):
 
1926
        target = self.get_map_key({('a',): 'content'})
 
1927
        self.assertIterInteresting(
 
1928
            [(target, [(('a',), 'content')]),
 
1929
            ], [target], [])
 
1930
 
 
1931
    def test_none_to_one_key(self):
 
1932
        basis = self.get_map_key({})
 
1933
        target = self.get_map_key({('a',): 'content'})
 
1934
        self.assertIterInteresting(
 
1935
            [(None, [(('a',), 'content')]),
 
1936
             (target, []),
 
1937
            ], [target], [basis])
 
1938
 
 
1939
    def test_one_to_none_key(self):
 
1940
        basis = self.get_map_key({('a',): 'content'})
 
1941
        target = self.get_map_key({})
 
1942
        self.assertIterInteresting(
 
1943
            [(target, [])],
 
1944
            [target], [basis])
 
1945
 
 
1946
    def test_common_pages(self):
 
1947
        basis = self.get_map_key({('a',): 'content',
 
1948
                                  ('b',): 'content',
 
1949
                                  ('c',): 'content',
 
1950
                                 })
 
1951
        target = self.get_map_key({('a',): 'content',
 
1952
                                   ('b',): 'other content',
 
1953
                                   ('c',): 'content',
 
1954
                                  })
 
1955
        target_map = CHKMap(self.get_chk_bytes(), target)
 
1956
        self.assertEqualDiff(
 
1957
            "'' InternalNode\n"
 
1958
            "  'a' LeafNode\n"
 
1959
            "      ('a',) 'content'\n"
 
1960
            "  'b' LeafNode\n"
 
1961
            "      ('b',) 'other content'\n"
 
1962
            "  'c' LeafNode\n"
 
1963
            "      ('c',) 'content'\n",
 
1964
            target_map._dump_tree())
 
1965
        b_key = target_map._root_node._items['b'].key()
 
1966
        # This should return the root node, and the node for the 'b' key
 
1967
        self.assertIterInteresting(
 
1968
            [(target, []),
 
1969
             (b_key, [(('b',), 'other content')])],
 
1970
            [target], [basis])
 
1971
 
 
1972
    def test_common_sub_page(self):
 
1973
        basis = self.get_map_key({('aaa',): 'common',
 
1974
                                  ('c',): 'common',
 
1975
                                 })
 
1976
        target = self.get_map_key({('aaa',): 'common',
 
1977
                                   ('aab',): 'new',
 
1978
                                   ('c',): 'common',
 
1979
                                  })
 
1980
        target_map = CHKMap(self.get_chk_bytes(), target)
 
1981
        self.assertEqualDiff(
 
1982
            "'' InternalNode\n"
 
1983
            "  'a' InternalNode\n"
 
1984
            "    'aaa' LeafNode\n"
 
1985
            "      ('aaa',) 'common'\n"
 
1986
            "    'aab' LeafNode\n"
 
1987
            "      ('aab',) 'new'\n"
 
1988
            "  'c' LeafNode\n"
 
1989
            "      ('c',) 'common'\n",
 
1990
            target_map._dump_tree())
 
1991
        # The key for the internal aa node
 
1992
        a_key = target_map._root_node._items['a'].key()
 
1993
        # The key for the leaf aab node
 
1994
        aab_key = target_map._root_node._items['a']._items['aab'].key()
 
1995
        self.assertIterInteresting(
 
1996
            [(target, []),
 
1997
             (a_key, []),
 
1998
             (aab_key, [(('aab',), 'new')])],
 
1999
            [target], [basis])
 
2000
 
 
2001
    def test_common_leaf(self):
 
2002
        basis = self.get_map_key({})
 
2003
        target1 = self.get_map_key({('aaa',): 'common'})
 
2004
        target2 = self.get_map_key({('aaa',): 'common',
 
2005
                                    ('bbb',): 'new',
 
2006
                                   })
 
2007
        target3 = self.get_map_key({('aaa',): 'common',
 
2008
                                    ('aac',): 'other',
 
2009
                                    ('bbb',): 'new',
 
2010
                                   })
 
2011
        # The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
 
2012
        # Once as a root node, once as a second layer, and once as a third
 
2013
        # layer. It should only be returned one time regardless
 
2014
        target1_map = CHKMap(self.get_chk_bytes(), target1)
 
2015
        self.assertEqualDiff(
 
2016
            "'' LeafNode\n"
 
2017
            "      ('aaa',) 'common'\n",
 
2018
            target1_map._dump_tree())
 
2019
        target2_map = CHKMap(self.get_chk_bytes(), target2)
 
2020
        self.assertEqualDiff(
 
2021
            "'' InternalNode\n"
 
2022
            "  'a' LeafNode\n"
 
2023
            "      ('aaa',) 'common'\n"
 
2024
            "  'b' LeafNode\n"
 
2025
            "      ('bbb',) 'new'\n",
 
2026
            target2_map._dump_tree())
 
2027
        target3_map = CHKMap(self.get_chk_bytes(), target3)
 
2028
        self.assertEqualDiff(
 
2029
            "'' InternalNode\n"
 
2030
            "  'a' InternalNode\n"
 
2031
            "    'aaa' LeafNode\n"
 
2032
            "      ('aaa',) 'common'\n"
 
2033
            "    'aac' LeafNode\n"
 
2034
            "      ('aac',) 'other'\n"
 
2035
            "  'b' LeafNode\n"
 
2036
            "      ('bbb',) 'new'\n",
 
2037
            target3_map._dump_tree())
 
2038
        aaa_key = target1_map._root_node.key()
 
2039
        b_key = target2_map._root_node._items['b'].key()
 
2040
        a_key = target3_map._root_node._items['a'].key()
 
2041
        aac_key = target3_map._root_node._items['a']._items['aac'].key()
 
2042
        self.assertIterInteresting(
 
2043
            [(None, [(('aaa',), 'common')]),
 
2044
             (target1, []),
 
2045
             (target2, []),
 
2046
             (target3, []),
 
2047
             (b_key, [(('bbb',), 'new')]),
 
2048
             (a_key, []),
 
2049
             (aac_key, [(('aac',), 'other')]),
 
2050
            ], [target1, target2, target3], [basis])
 
2051
 
 
2052
        self.assertIterInteresting(
 
2053
            [(target2, []),
 
2054
             (target3, []),
 
2055
             (b_key, [(('bbb',), 'new')]),
 
2056
             (a_key, []),
 
2057
             (aac_key, [(('aac',), 'other')]),
 
2058
            ], [target2, target3], [target1])
 
2059
 
 
2060
        # This may be a case that we relax. A root node is a deep child of the
 
2061
        # excluded set. The cost is buffering root nodes until we have
 
2062
        # determined all possible exclusions. (Because a prefix of '', cannot
 
2063
        # be excluded.)
 
2064
        self.assertIterInteresting(
 
2065
            [], [target1], [target3])
 
2066
 
 
2067
    def test_multiple_maps(self):
 
2068
        basis1 = self.get_map_key({('aaa',): 'common',
 
2069
                                   ('aab',): 'basis1',
 
2070
                                  })
 
2071
        basis2 = self.get_map_key({('bbb',): 'common',
 
2072
                                   ('bbc',): 'basis2',
 
2073
                                  })
 
2074
        target1 = self.get_map_key({('aaa',): 'common',
 
2075
                                    ('aac',): 'target1',
 
2076
                                    ('bbb',): 'common',
 
2077
                                   })
 
2078
        target2 = self.get_map_key({('aaa',): 'common',
 
2079
                                    ('bba',): 'target2',
 
2080
                                    ('bbb',): 'common',
 
2081
                                   })
 
2082
        target1_map = CHKMap(self.get_chk_bytes(), target1)
 
2083
        self.assertEqualDiff(
 
2084
            "'' InternalNode\n"
 
2085
            "  'a' InternalNode\n"
 
2086
            "    'aaa' LeafNode\n"
 
2087
            "      ('aaa',) 'common'\n"
 
2088
            "    'aac' LeafNode\n"
 
2089
            "      ('aac',) 'target1'\n"
 
2090
            "  'b' LeafNode\n"
 
2091
            "      ('bbb',) 'common'\n",
 
2092
            target1_map._dump_tree())
 
2093
        # The key for the target1 internal a node
 
2094
        a_key = target1_map._root_node._items['a'].key()
 
2095
        # The key for the leaf aac node
 
2096
        aac_key = target1_map._root_node._items['a']._items['aac'].key()
 
2097
 
 
2098
        target2_map = CHKMap(self.get_chk_bytes(), target2)
 
2099
        self.assertEqualDiff(
 
2100
            "'' InternalNode\n"
 
2101
            "  'a' LeafNode\n"
 
2102
            "      ('aaa',) 'common'\n"
 
2103
            "  'b' InternalNode\n"
 
2104
            "    'bba' LeafNode\n"
 
2105
            "      ('bba',) 'target2'\n"
 
2106
            "    'bbb' LeafNode\n"
 
2107
            "      ('bbb',) 'common'\n",
 
2108
            target2_map._dump_tree())
 
2109
        # The key for the target2 internal bb node
 
2110
        b_key = target2_map._root_node._items['b'].key()
 
2111
        # The key for the leaf bba node
 
2112
        bba_key = target2_map._root_node._items['b']._items['bba'].key()
 
2113
        self.assertIterInteresting(
 
2114
            [(target1, []),
 
2115
             (target2, []),
 
2116
             (a_key, []),
 
2117
             (b_key, []),
 
2118
             (aac_key, [(('aac',), 'target1')]),
 
2119
             (bba_key, [(('bba',), 'target2')]),
 
2120
            ], [target1, target2], [basis1, basis2])
 
2121
 
 
2122
    def test_multiple_maps_overlapping_common_new(self):
 
2123
        # Test that when a node found through the interesting_keys iteration
 
2124
        # for *some roots* and also via the uninteresting keys iteration, that
 
2125
        # it is still scanned for uninteresting refs and items, because its
 
2126
        # not truely new. This requires 2 levels of InternalNodes to expose,
 
2127
        # because of the way the bootstrap in _find_children_info works.
 
2128
        # This suggests that the code is probably amenable to/benefit from
 
2129
        # consolidation.
 
2130
        # How does this test work?
 
2131
        # 1) We need a second level InternalNode present in a basis tree.
 
2132
        # 2) We need a left side new tree that uses that InternalNode
 
2133
        # 3) We need a right side new tree that does not use that InternalNode
 
2134
        #    at all but that has an unchanged *value* that was reachable inside
 
2135
        #    that InternalNode
 
2136
        basis = self.get_map_key({
 
2137
            # InternalNode, unchanged in left:
 
2138
            ('aaa',): 'left',
 
2139
            ('abb',): 'right',
 
2140
            # Forces an internalNode at 'a'
 
2141
            ('ccc',): 'common',
 
2142
            })
 
2143
        left = self.get_map_key({
 
2144
            # All of basis unchanged
 
2145
            ('aaa',): 'left',
 
2146
            ('abb',): 'right',
 
2147
            ('ccc',): 'common',
 
2148
            # And a new top level node so the root key is different
 
2149
            ('ddd',): 'change',
 
2150
            })
 
2151
        right = self.get_map_key({
 
2152
            # A value that is unchanged from basis and thus should be filtered
 
2153
            # out.
 
2154
            ('abb',): 'right'
 
2155
            })
 
2156
        basis_map = CHKMap(self.get_chk_bytes(), basis)
 
2157
        self.assertEqualDiff(
 
2158
            "'' InternalNode\n"
 
2159
            "  'a' InternalNode\n"
 
2160
            "    'aa' LeafNode\n"
 
2161
            "      ('aaa',) 'left'\n"
 
2162
            "    'ab' LeafNode\n"
 
2163
            "      ('abb',) 'right'\n"
 
2164
            "  'c' LeafNode\n"
 
2165
            "      ('ccc',) 'common'\n",
 
2166
            basis_map._dump_tree())
 
2167
        # Get left expected data
 
2168
        left_map = CHKMap(self.get_chk_bytes(), left)
 
2169
        self.assertEqualDiff(
 
2170
            "'' InternalNode\n"
 
2171
            "  'a' InternalNode\n"
 
2172
            "    'aa' LeafNode\n"
 
2173
            "      ('aaa',) 'left'\n"
 
2174
            "    'ab' LeafNode\n"
 
2175
            "      ('abb',) 'right'\n"
 
2176
            "  'c' LeafNode\n"
 
2177
            "      ('ccc',) 'common'\n"
 
2178
            "  'd' LeafNode\n"
 
2179
            "      ('ddd',) 'change'\n",
 
2180
            left_map._dump_tree())
 
2181
        # Keys from left side target
 
2182
        l_d_key = left_map._root_node._items['d'].key()
 
2183
        # Get right expected data
 
2184
        right_map = CHKMap(self.get_chk_bytes(), right)
 
2185
        self.assertEqualDiff(
 
2186
            "'' LeafNode\n"
 
2187
            "      ('abb',) 'right'\n",
 
2188
            right_map._dump_tree())
 
2189
        # Keys from the right side target - none, the root is enough.
 
2190
        # Test behaviour
 
2191
        self.expectFailure("we don't properly filter different depths",
 
2192
            self.assertIterInteresting,
 
2193
            [(left, []),
 
2194
             (right, []),
 
2195
             (l_d_key, [(('ddd',), 'change')]),
 
2196
            ], [left, right], [basis])
 
2197
        self.assertIterInteresting(
 
2198
            [(left, []),
 
2199
             (right, []),
 
2200
             (l_d_key, [(('ddd',), 'change')]),
 
2201
            ], [left, right], [basis])
 
2202
 
 
2203
    def test_multiple_maps_similar(self):
 
2204
        # We want to have a depth=2 tree, with multiple entries in each leaf
 
2205
        # node
 
2206
        basis = self.get_map_key({
 
2207
            ('aaa',): 'unchanged',
 
2208
            ('abb',): 'will change left',
 
2209
            ('caa',): 'unchanged',
 
2210
            ('cbb',): 'will change right',
 
2211
            }, maximum_size=60)
 
2212
        left = self.get_map_key({
 
2213
            ('aaa',): 'unchanged',
 
2214
            ('abb',): 'changed left',
 
2215
            ('caa',): 'unchanged',
 
2216
            ('cbb',): 'will change right',
 
2217
            }, maximum_size=60)
 
2218
        right = self.get_map_key({
 
2219
            ('aaa',): 'unchanged',
 
2220
            ('abb',): 'will change left',
 
2221
            ('caa',): 'unchanged',
 
2222
            ('cbb',): 'changed right',
 
2223
            }, maximum_size=60)
 
2224
        basis_map = CHKMap(self.get_chk_bytes(), basis)
 
2225
        self.assertEqualDiff(
 
2226
            "'' InternalNode\n"
 
2227
            "  'a' LeafNode\n"
 
2228
            "      ('aaa',) 'unchanged'\n"
 
2229
            "      ('abb',) 'will change left'\n"
 
2230
            "  'c' LeafNode\n"
 
2231
            "      ('caa',) 'unchanged'\n"
 
2232
            "      ('cbb',) 'will change right'\n",
 
2233
            basis_map._dump_tree())
 
2234
        # Get left expected data
 
2235
        left_map = CHKMap(self.get_chk_bytes(), left)
 
2236
        self.assertEqualDiff(
 
2237
            "'' InternalNode\n"
 
2238
            "  'a' LeafNode\n"
 
2239
            "      ('aaa',) 'unchanged'\n"
 
2240
            "      ('abb',) 'changed left'\n"
 
2241
            "  'c' LeafNode\n"
 
2242
            "      ('caa',) 'unchanged'\n"
 
2243
            "      ('cbb',) 'will change right'\n",
 
2244
            left_map._dump_tree())
 
2245
        # Keys from left side target
 
2246
        l_a_key = left_map._root_node._items['a'].key()
 
2247
        l_c_key = left_map._root_node._items['c'].key()
 
2248
        # Get right expected data
 
2249
        right_map = CHKMap(self.get_chk_bytes(), right)
 
2250
        self.assertEqualDiff(
 
2251
            "'' InternalNode\n"
 
2252
            "  'a' LeafNode\n"
 
2253
            "      ('aaa',) 'unchanged'\n"
 
2254
            "      ('abb',) 'will change left'\n"
 
2255
            "  'c' LeafNode\n"
 
2256
            "      ('caa',) 'unchanged'\n"
 
2257
            "      ('cbb',) 'changed right'\n",
 
2258
            right_map._dump_tree())
 
2259
        r_a_key = right_map._root_node._items['a'].key()
 
2260
        r_c_key = right_map._root_node._items['c'].key()
 
2261
        self.assertIterInteresting(
 
2262
            [(left, []),
 
2263
             (right, []),
 
2264
             (l_a_key, [(('abb',), 'changed left')]),
 
2265
             (r_c_key, [(('cbb',), 'changed right')]),
 
2266
            ], [left, right], [basis])