1
# Copyright (C) 2008-2011, 2016 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
26
from bzrlib.chk_map import (
32
from bzrlib.static_tuple import StaticTuple
35
class TestNode(tests.TestCase):
37
def assertCommonPrefix(self, expected_common, prefix, key):
38
common = Node.common_prefix(prefix, key)
39
self.assertTrue(len(common) <= len(prefix))
40
self.assertTrue(len(common) <= len(key))
41
self.assertStartsWith(prefix, common)
42
self.assertStartsWith(key, common)
43
self.assertEqual(expected_common, common)
45
def test_common_prefix(self):
46
self.assertCommonPrefix('beg', 'beg', 'begin')
48
def test_no_common_prefix(self):
49
self.assertCommonPrefix('', 'begin', 'end')
52
self.assertCommonPrefix('begin', 'begin', 'begin')
54
def test_not_a_prefix(self):
55
self.assertCommonPrefix('b', 'begin', 'b')
58
self.assertCommonPrefix('', '', 'end')
59
self.assertCommonPrefix('', 'begin', '')
60
self.assertCommonPrefix('', '', '')
63
class TestCaseWithStore(tests.TestCaseWithMemoryTransport):
65
def get_chk_bytes(self):
66
# This creates a standalone CHK store.
67
factory = groupcompress.make_pack_factory(False, False, 1)
68
self.chk_bytes = factory(self.get_transport())
71
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
72
search_key_func=None):
74
chk_bytes = self.get_chk_bytes()
75
root_key = CHKMap.from_dict(chk_bytes, a_dict,
76
maximum_size=maximum_size, key_width=key_width,
77
search_key_func=search_key_func)
78
root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
79
maximum_size=maximum_size, key_width=key_width,
80
search_key_func=search_key_func)
81
self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
82
" match CHKMap._create_via_map")
83
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
86
def read_bytes(self, chk_bytes, key):
87
stream = chk_bytes.get_record_stream([key], 'unordered', True)
88
record = stream.next()
89
if record.storage_kind == 'absent':
90
self.fail('Store does not contain the key %s' % (key,))
91
return record.get_bytes_as("fulltext")
93
def to_dict(self, node, *args):
94
return dict(node.iteritems(*args))
97
class TestCaseWithExampleMaps(TestCaseWithStore):
99
def get_chk_bytes(self):
100
if getattr(self, '_chk_bytes', None) is None:
101
self._chk_bytes = super(TestCaseWithExampleMaps,
102
self).get_chk_bytes()
103
return self._chk_bytes
105
def get_map(self, a_dict, maximum_size=100, search_key_func=None):
106
c_map = self._get_map(a_dict, maximum_size=maximum_size,
107
chk_bytes=self.get_chk_bytes(),
108
search_key_func=search_key_func)
111
def make_root_only_map(self, search_key_func=None):
112
return self.get_map({
113
('aaa',): 'initial aaa content',
114
('abb',): 'initial abb content',
115
}, search_key_func=search_key_func)
117
def make_root_only_aaa_ddd_map(self, search_key_func=None):
118
return self.get_map({
119
('aaa',): 'initial aaa content',
120
('ddd',): 'initial ddd content',
121
}, search_key_func=search_key_func)
123
def make_one_deep_map(self, search_key_func=None):
124
# Same as root_only_map, except it forces an InternalNode at the root
125
return self.get_map({
126
('aaa',): 'initial aaa content',
127
('abb',): 'initial abb content',
128
('ccc',): 'initial ccc content',
129
('ddd',): 'initial ddd content',
130
}, search_key_func=search_key_func)
132
def make_two_deep_map(self, search_key_func=None):
133
# Carefully chosen so that it creates a 2-deep map for both
134
# _search_key_plain and for _search_key_16
135
# Also so that things line up with make_one_deep_two_prefix_map
136
return self.get_map({
137
('aaa',): 'initial aaa content',
138
('abb',): 'initial abb content',
139
('acc',): 'initial acc content',
140
('ace',): 'initial ace content',
141
('add',): 'initial add content',
142
('adh',): 'initial adh content',
143
('adl',): 'initial adl content',
144
('ccc',): 'initial ccc content',
145
('ddd',): 'initial ddd content',
146
}, search_key_func=search_key_func)
148
def make_one_deep_two_prefix_map(self, search_key_func=None):
149
"""Create a map with one internal node, but references are extra long.
151
Otherwise has similar content to make_two_deep_map.
153
return self.get_map({
154
('aaa',): 'initial aaa content',
155
('add',): 'initial add content',
156
('adh',): 'initial adh content',
157
('adl',): 'initial adl content',
158
}, search_key_func=search_key_func)
160
def make_one_deep_one_prefix_map(self, search_key_func=None):
161
"""Create a map with one internal node, but references are extra long.
163
Similar to make_one_deep_two_prefix_map, except the split is at the
164
first char, rather than the second.
166
return self.get_map({
167
('add',): 'initial add content',
168
('adh',): 'initial adh content',
169
('adl',): 'initial adl content',
170
('bbb',): 'initial bbb content',
171
}, search_key_func=search_key_func)
174
class TestTestCaseWithExampleMaps(TestCaseWithExampleMaps):
175
"""Actual tests for the provided examples."""
177
def test_root_only_map_plain(self):
178
c_map = self.make_root_only_map()
179
self.assertEqualDiff(
181
" ('aaa',) 'initial aaa content'\n"
182
" ('abb',) 'initial abb content'\n",
185
def test_root_only_map_16(self):
186
c_map = self.make_root_only_map(search_key_func=chk_map._search_key_16)
187
self.assertEqualDiff(
189
" ('aaa',) 'initial aaa content'\n"
190
" ('abb',) 'initial abb content'\n",
193
def test_one_deep_map_plain(self):
194
c_map = self.make_one_deep_map()
195
self.assertEqualDiff(
198
" ('aaa',) 'initial aaa content'\n"
199
" ('abb',) 'initial abb content'\n"
201
" ('ccc',) 'initial ccc content'\n"
203
" ('ddd',) 'initial ddd content'\n",
206
def test_one_deep_map_16(self):
207
c_map = self.make_one_deep_map(search_key_func=chk_map._search_key_16)
208
self.assertEqualDiff(
211
" ('ccc',) 'initial ccc content'\n"
213
" ('abb',) 'initial abb content'\n"
215
" ('aaa',) 'initial aaa content'\n"
216
" ('ddd',) 'initial ddd content'\n",
219
def test_root_only_aaa_ddd_plain(self):
220
c_map = self.make_root_only_aaa_ddd_map()
221
self.assertEqualDiff(
223
" ('aaa',) 'initial aaa content'\n"
224
" ('ddd',) 'initial ddd content'\n",
227
def test_root_only_aaa_ddd_16(self):
228
c_map = self.make_root_only_aaa_ddd_map(
229
search_key_func=chk_map._search_key_16)
230
# We use 'aaa' and 'ddd' because they happen to map to 'F' when using
232
self.assertEqualDiff(
234
" ('aaa',) 'initial aaa content'\n"
235
" ('ddd',) 'initial ddd content'\n",
238
def test_two_deep_map_plain(self):
239
c_map = self.make_two_deep_map()
240
self.assertEqualDiff(
242
" 'a' InternalNode\n"
244
" ('aaa',) 'initial aaa content'\n"
246
" ('abb',) 'initial abb content'\n"
248
" ('acc',) 'initial acc content'\n"
249
" ('ace',) 'initial ace content'\n"
251
" ('add',) 'initial add content'\n"
252
" ('adh',) 'initial adh content'\n"
253
" ('adl',) 'initial adl content'\n"
255
" ('ccc',) 'initial ccc content'\n"
257
" ('ddd',) 'initial ddd content'\n",
260
def test_two_deep_map_16(self):
261
c_map = self.make_two_deep_map(search_key_func=chk_map._search_key_16)
262
self.assertEqualDiff(
265
" ('acc',) 'initial acc content'\n"
266
" ('ccc',) 'initial ccc content'\n"
268
" ('abb',) 'initial abb content'\n"
270
" ('ace',) 'initial ace content'\n"
271
" 'F' InternalNode\n"
273
" ('aaa',) 'initial aaa content'\n"
275
" ('adl',) 'initial adl content'\n"
277
" ('adh',) 'initial adh content'\n"
279
" ('ddd',) 'initial ddd content'\n"
281
" ('add',) 'initial add content'\n",
284
def test_one_deep_two_prefix_map_plain(self):
285
c_map = self.make_one_deep_two_prefix_map()
286
self.assertEqualDiff(
289
" ('aaa',) 'initial aaa content'\n"
291
" ('add',) 'initial add content'\n"
292
" ('adh',) 'initial adh content'\n"
293
" ('adl',) 'initial adl content'\n",
296
def test_one_deep_two_prefix_map_16(self):
297
c_map = self.make_one_deep_two_prefix_map(
298
search_key_func=chk_map._search_key_16)
299
self.assertEqualDiff(
302
" ('aaa',) 'initial aaa content'\n"
304
" ('adl',) 'initial adl content'\n"
306
" ('adh',) 'initial adh content'\n"
308
" ('add',) 'initial add content'\n",
311
def test_one_deep_one_prefix_map_plain(self):
312
c_map = self.make_one_deep_one_prefix_map()
313
self.assertEqualDiff(
316
" ('add',) 'initial add content'\n"
317
" ('adh',) 'initial adh content'\n"
318
" ('adl',) 'initial adl content'\n"
320
" ('bbb',) 'initial bbb content'\n",
323
def test_one_deep_one_prefix_map_16(self):
324
c_map = self.make_one_deep_one_prefix_map(
325
search_key_func=chk_map._search_key_16)
326
self.assertEqualDiff(
329
" ('bbb',) 'initial bbb content'\n"
331
" ('add',) 'initial add content'\n"
332
" ('adh',) 'initial adh content'\n"
333
" ('adl',) 'initial adl content'\n",
337
class TestMap(TestCaseWithStore):
339
def assertHasABMap(self, chk_bytes):
340
ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
341
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
342
self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
343
root_key = ('sha1:' + ab_sha1,)
344
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
347
def assertHasEmptyMap(self, chk_bytes):
348
empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n'
349
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
350
self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
351
root_key = ('sha1:' + empty_sha1,)
352
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
355
def assertMapLayoutEqual(self, map_one, map_two):
356
"""Assert that the internal structure is identical between the maps."""
357
map_one._ensure_root()
358
node_one_stack = [map_one._root_node]
359
map_two._ensure_root()
360
node_two_stack = [map_two._root_node]
361
while node_one_stack:
362
node_one = node_one_stack.pop()
363
node_two = node_two_stack.pop()
364
if node_one.__class__ != node_two.__class__:
365
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
366
map_two._dump_tree(include_keys=True))
367
self.assertEqual(node_one._search_prefix,
368
node_two._search_prefix)
369
if isinstance(node_one, InternalNode):
370
# Internal nodes must have identical references
371
self.assertEqual(sorted(node_one._items.keys()),
372
sorted(node_two._items.keys()))
373
node_one_stack.extend([n for n, _ in
374
node_one._iter_nodes(map_one._store)])
375
node_two_stack.extend([n for n, _ in
376
node_two._iter_nodes(map_two._store)])
378
# Leaf nodes must have identical contents
379
self.assertEqual(node_one._items, node_two._items)
380
self.assertEqual([], node_two_stack)
382
def assertCanonicalForm(self, chkmap):
383
"""Assert that the chkmap is in 'canonical' form.
385
We do this by adding all of the key value pairs from scratch, both in
386
forward order and reverse order, and assert that the final tree layout
389
items = list(chkmap.iteritems())
390
map_forward = chk_map.CHKMap(None, None)
391
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
392
for key, value in items:
393
map_forward.map(key, value)
394
self.assertMapLayoutEqual(map_forward, chkmap)
395
map_reverse = chk_map.CHKMap(None, None)
396
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
397
for key, value in reversed(items):
398
map_reverse.map(key, value)
399
self.assertMapLayoutEqual(map_reverse, chkmap)
401
def test_assert_map_layout_equal(self):
402
store = self.get_chk_bytes()
403
map_one = CHKMap(store, None)
404
map_one._root_node.set_maximum_size(20)
405
map_two = CHKMap(store, None)
406
map_two._root_node.set_maximum_size(20)
407
self.assertMapLayoutEqual(map_one, map_two)
408
map_one.map('aaa', 'value')
409
self.assertRaises(AssertionError,
410
self.assertMapLayoutEqual, map_one, map_two)
411
map_two.map('aaa', 'value')
412
self.assertMapLayoutEqual(map_one, map_two)
413
# Split the tree, so we ensure that internal nodes and leaf nodes are
415
map_one.map('aab', 'value')
416
self.assertIsInstance(map_one._root_node, InternalNode)
417
self.assertRaises(AssertionError,
418
self.assertMapLayoutEqual, map_one, map_two)
419
map_two.map('aab', 'value')
420
self.assertMapLayoutEqual(map_one, map_two)
421
map_one.map('aac', 'value')
422
self.assertRaises(AssertionError,
423
self.assertMapLayoutEqual, map_one, map_two)
424
self.assertCanonicalForm(map_one)
426
def test_from_dict_empty(self):
427
chk_bytes = self.get_chk_bytes()
428
root_key = CHKMap.from_dict(chk_bytes, {})
429
# Check the data was saved and inserted correctly.
430
expected_root_key = self.assertHasEmptyMap(chk_bytes)
431
self.assertEqual(expected_root_key, root_key)
433
def test_from_dict_ab(self):
434
chk_bytes = self.get_chk_bytes()
435
root_key = CHKMap.from_dict(chk_bytes, {"a": "b"})
436
# Check the data was saved and inserted correctly.
437
expected_root_key = self.assertHasABMap(chk_bytes)
438
self.assertEqual(expected_root_key, root_key)
440
def test_apply_empty_ab(self):
441
# applying a delta (None, "a", "b") to an empty chkmap generates the
442
# same map as from_dict_ab.
443
chk_bytes = self.get_chk_bytes()
444
root_key = CHKMap.from_dict(chk_bytes, {})
445
chkmap = CHKMap(chk_bytes, root_key)
446
new_root = chkmap.apply_delta([(None, "a", "b")])
447
# Check the data was saved and inserted correctly.
448
expected_root_key = self.assertHasABMap(chk_bytes)
449
self.assertEqual(expected_root_key, new_root)
450
# The update should have left us with an in memory root node, with an
452
self.assertEqual(new_root, chkmap._root_node._key)
454
def test_apply_ab_empty(self):
455
# applying a delta ("a", None, None) to a map with 'a' in it generates
457
chk_bytes = self.get_chk_bytes()
458
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
459
chkmap = CHKMap(chk_bytes, root_key)
460
new_root = chkmap.apply_delta([(("a",), None, None)])
461
# Check the data was saved and inserted correctly.
462
expected_root_key = self.assertHasEmptyMap(chk_bytes)
463
self.assertEqual(expected_root_key, new_root)
464
# The update should have left us with an in memory root node, with an
466
self.assertEqual(new_root, chkmap._root_node._key)
468
def test_apply_delete_to_internal_node(self):
469
# applying a delta should be convert an internal root node to a leaf
470
# node if the delta shrinks the map enough.
471
store = self.get_chk_bytes()
472
chkmap = CHKMap(store, None)
473
# Add three items: 2 small enough to fit in one node, and one huge to
474
# force multiple nodes.
475
chkmap._root_node.set_maximum_size(100)
476
chkmap.map(('small',), 'value')
477
chkmap.map(('little',), 'value')
478
chkmap.map(('very-big',), 'x' * 100)
479
# (Check that we have constructed the scenario we want to test)
480
self.assertIsInstance(chkmap._root_node, InternalNode)
481
# Delete the huge item so that the map fits in one node again.
482
delta = [(('very-big',), None, None)]
483
chkmap.apply_delta(delta)
484
self.assertCanonicalForm(chkmap)
485
self.assertIsInstance(chkmap._root_node, LeafNode)
487
def test_apply_new_keys_must_be_new(self):
488
# applying a delta (None, "a", "b") to a map with 'a' in it generates
490
chk_bytes = self.get_chk_bytes()
491
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
492
chkmap = CHKMap(chk_bytes, root_key)
493
self.assertRaises(errors.InconsistentDelta, chkmap.apply_delta,
494
[(None, ("a",), "b")])
495
# As an error occured, the update should have left us without changing
496
# anything (the root should be unchanged).
497
self.assertEqual(root_key, chkmap._root_node._key)
499
def test_apply_delta_is_deterministic(self):
500
chk_bytes = self.get_chk_bytes()
501
chkmap1 = CHKMap(chk_bytes, None)
502
chkmap1._root_node.set_maximum_size(10)
503
chkmap1.apply_delta([(None, ('aaa',), 'common'),
504
(None, ('bba',), 'target2'),
505
(None, ('bbb',), 'common')])
506
root_key1 = chkmap1._save()
507
self.assertCanonicalForm(chkmap1)
509
chkmap2 = CHKMap(chk_bytes, None)
510
chkmap2._root_node.set_maximum_size(10)
511
chkmap2.apply_delta([(None, ('bbb',), 'common'),
512
(None, ('bba',), 'target2'),
513
(None, ('aaa',), 'common')])
514
root_key2 = chkmap2._save()
515
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
516
chkmap2._dump_tree(include_keys=True))
517
self.assertEqual(root_key1, root_key2)
518
self.assertCanonicalForm(chkmap2)
520
def test_stable_splitting(self):
521
store = self.get_chk_bytes()
522
chkmap = CHKMap(store, None)
523
# Should fit 2 keys per LeafNode
524
chkmap._root_node.set_maximum_size(35)
525
chkmap.map(('aaa',), 'v')
526
self.assertEqualDiff("'' LeafNode\n"
529
chkmap.map(('aab',), 'v')
530
self.assertEqualDiff("'' LeafNode\n"
534
self.assertCanonicalForm(chkmap)
536
# Creates a new internal node, and splits the others into leaves
537
chkmap.map(('aac',), 'v')
538
self.assertEqualDiff("'' InternalNode\n"
546
self.assertCanonicalForm(chkmap)
548
# Splits again, because it can't fit in the current structure
549
chkmap.map(('bbb',), 'v')
550
self.assertEqualDiff("'' InternalNode\n"
551
" 'a' InternalNode\n"
561
self.assertCanonicalForm(chkmap)
563
def test_map_splits_with_longer_key(self):
564
store = self.get_chk_bytes()
565
chkmap = CHKMap(store, None)
566
# Should fit 1 key per LeafNode
567
chkmap._root_node.set_maximum_size(10)
568
chkmap.map(('aaa',), 'v')
569
chkmap.map(('aaaa',), 'v')
570
self.assertCanonicalForm(chkmap)
571
self.assertIsInstance(chkmap._root_node, InternalNode)
573
def test_with_linefeed_in_key(self):
574
store = self.get_chk_bytes()
575
chkmap = CHKMap(store, None)
576
# Should fit 1 key per LeafNode
577
chkmap._root_node.set_maximum_size(10)
578
chkmap.map(('a\ra',), 'val1')
579
chkmap.map(('a\rb',), 'val2')
580
chkmap.map(('ac',), 'val3')
581
self.assertCanonicalForm(chkmap)
582
self.assertEqualDiff("'' InternalNode\n"
583
" 'a\\r' InternalNode\n"
584
" 'a\\ra' LeafNode\n"
585
" ('a\\ra',) 'val1'\n"
586
" 'a\\rb' LeafNode\n"
587
" ('a\\rb',) 'val2'\n"
591
# We should also successfully serialise and deserialise these items
592
root_key = chkmap._save()
593
chkmap = CHKMap(store, root_key)
594
self.assertEqualDiff("'' InternalNode\n"
595
" 'a\\r' InternalNode\n"
596
" 'a\\ra' LeafNode\n"
597
" ('a\\ra',) 'val1'\n"
598
" 'a\\rb' LeafNode\n"
599
" ('a\\rb',) 'val2'\n"
604
def test_deep_splitting(self):
605
store = self.get_chk_bytes()
606
chkmap = CHKMap(store, None)
607
# Should fit 2 keys per LeafNode
608
chkmap._root_node.set_maximum_size(40)
609
chkmap.map(('aaaaaaaa',), 'v')
610
chkmap.map(('aaaaabaa',), 'v')
611
self.assertEqualDiff("'' LeafNode\n"
612
" ('aaaaaaaa',) 'v'\n"
613
" ('aaaaabaa',) 'v'\n",
615
chkmap.map(('aaabaaaa',), 'v')
616
chkmap.map(('aaababaa',), 'v')
617
self.assertEqualDiff("'' InternalNode\n"
619
" ('aaaaaaaa',) 'v'\n"
620
" ('aaaaabaa',) 'v'\n"
622
" ('aaabaaaa',) 'v'\n"
623
" ('aaababaa',) 'v'\n",
625
chkmap.map(('aaabacaa',), 'v')
626
chkmap.map(('aaabadaa',), 'v')
627
self.assertEqualDiff("'' InternalNode\n"
629
" ('aaaaaaaa',) 'v'\n"
630
" ('aaaaabaa',) 'v'\n"
631
" 'aaab' InternalNode\n"
632
" 'aaabaa' LeafNode\n"
633
" ('aaabaaaa',) 'v'\n"
634
" 'aaabab' LeafNode\n"
635
" ('aaababaa',) 'v'\n"
636
" 'aaabac' LeafNode\n"
637
" ('aaabacaa',) 'v'\n"
638
" 'aaabad' LeafNode\n"
639
" ('aaabadaa',) 'v'\n",
641
chkmap.map(('aaababba',), 'val')
642
chkmap.map(('aaababca',), 'val')
643
self.assertEqualDiff("'' InternalNode\n"
645
" ('aaaaaaaa',) 'v'\n"
646
" ('aaaaabaa',) 'v'\n"
647
" 'aaab' InternalNode\n"
648
" 'aaabaa' LeafNode\n"
649
" ('aaabaaaa',) 'v'\n"
650
" 'aaabab' InternalNode\n"
651
" 'aaababa' LeafNode\n"
652
" ('aaababaa',) 'v'\n"
653
" 'aaababb' LeafNode\n"
654
" ('aaababba',) 'val'\n"
655
" 'aaababc' LeafNode\n"
656
" ('aaababca',) 'val'\n"
657
" 'aaabac' LeafNode\n"
658
" ('aaabacaa',) 'v'\n"
659
" 'aaabad' LeafNode\n"
660
" ('aaabadaa',) 'v'\n",
662
# Now we add a node that should fit around an existing InternalNode,
663
# but has a slightly different key prefix, which causes a new
665
chkmap.map(('aaabDaaa',), 'v')
666
self.assertEqualDiff("'' InternalNode\n"
668
" ('aaaaaaaa',) 'v'\n"
669
" ('aaaaabaa',) 'v'\n"
670
" 'aaab' InternalNode\n"
671
" 'aaabD' LeafNode\n"
672
" ('aaabDaaa',) 'v'\n"
673
" 'aaaba' InternalNode\n"
674
" 'aaabaa' LeafNode\n"
675
" ('aaabaaaa',) 'v'\n"
676
" 'aaabab' InternalNode\n"
677
" 'aaababa' LeafNode\n"
678
" ('aaababaa',) 'v'\n"
679
" 'aaababb' LeafNode\n"
680
" ('aaababba',) 'val'\n"
681
" 'aaababc' LeafNode\n"
682
" ('aaababca',) 'val'\n"
683
" 'aaabac' LeafNode\n"
684
" ('aaabacaa',) 'v'\n"
685
" 'aaabad' LeafNode\n"
686
" ('aaabadaa',) 'v'\n",
689
def test_map_collapses_if_size_changes(self):
690
store = self.get_chk_bytes()
691
chkmap = CHKMap(store, None)
692
# Should fit 2 keys per LeafNode
693
chkmap._root_node.set_maximum_size(35)
694
chkmap.map(('aaa',), 'v')
695
chkmap.map(('aab',), 'very long value that splits')
696
self.assertEqualDiff("'' InternalNode\n"
700
" ('aab',) 'very long value that splits'\n",
702
self.assertCanonicalForm(chkmap)
703
# Now changing the value to something small should cause a rebuild
704
chkmap.map(('aab',), 'v')
705
self.assertEqualDiff("'' LeafNode\n"
709
self.assertCanonicalForm(chkmap)
711
def test_map_double_deep_collapses(self):
712
store = self.get_chk_bytes()
713
chkmap = CHKMap(store, None)
714
# Should fit 3 small keys per LeafNode
715
chkmap._root_node.set_maximum_size(40)
716
chkmap.map(('aaa',), 'v')
717
chkmap.map(('aab',), 'very long value that splits')
718
chkmap.map(('abc',), 'v')
719
self.assertEqualDiff("'' InternalNode\n"
720
" 'aa' InternalNode\n"
724
" ('aab',) 'very long value that splits'\n"
728
chkmap.map(('aab',), 'v')
729
self.assertCanonicalForm(chkmap)
730
self.assertEqualDiff("'' LeafNode\n"
736
def test_stable_unmap(self):
737
store = self.get_chk_bytes()
738
chkmap = CHKMap(store, None)
739
# Should fit 2 keys per LeafNode
740
chkmap._root_node.set_maximum_size(35)
741
chkmap.map(('aaa',), 'v')
742
chkmap.map(('aab',), 'v')
743
self.assertEqualDiff("'' LeafNode\n"
747
# Creates a new internal node, and splits the others into leaves
748
chkmap.map(('aac',), 'v')
749
self.assertEqualDiff("'' InternalNode\n"
757
self.assertCanonicalForm(chkmap)
758
# Now lets unmap one of the keys, and assert that we collapse the
760
chkmap.unmap(('aac',))
761
self.assertEqualDiff("'' LeafNode\n"
765
self.assertCanonicalForm(chkmap)
767
def test_unmap_double_deep(self):
768
store = self.get_chk_bytes()
769
chkmap = CHKMap(store, None)
770
# Should fit 3 keys per LeafNode
771
chkmap._root_node.set_maximum_size(40)
772
chkmap.map(('aaa',), 'v')
773
chkmap.map(('aaab',), 'v')
774
chkmap.map(('aab',), 'very long value')
775
chkmap.map(('abc',), 'v')
776
self.assertEqualDiff("'' InternalNode\n"
777
" 'aa' InternalNode\n"
782
" ('aab',) 'very long value'\n"
786
# Removing the 'aab' key should cause everything to collapse back to a
788
chkmap.unmap(('aab',))
789
self.assertEqualDiff("'' LeafNode\n"
795
def test_unmap_double_deep_non_empty_leaf(self):
796
store = self.get_chk_bytes()
797
chkmap = CHKMap(store, None)
798
# Should fit 3 keys per LeafNode
799
chkmap._root_node.set_maximum_size(40)
800
chkmap.map(('aaa',), 'v')
801
chkmap.map(('aab',), 'long value')
802
chkmap.map(('aabb',), 'v')
803
chkmap.map(('abc',), 'v')
804
self.assertEqualDiff("'' InternalNode\n"
805
" 'aa' InternalNode\n"
809
" ('aab',) 'long value'\n"
814
# Removing the 'aab' key should cause everything to collapse back to a
816
chkmap.unmap(('aab',))
817
self.assertEqualDiff("'' LeafNode\n"
823
def test_unmap_with_known_internal_node_doesnt_page(self):
824
store = self.get_chk_bytes()
825
chkmap = CHKMap(store, None)
826
# Should fit 3 keys per LeafNode
827
chkmap._root_node.set_maximum_size(30)
828
chkmap.map(('aaa',), 'v')
829
chkmap.map(('aab',), 'v')
830
chkmap.map(('aac',), 'v')
831
chkmap.map(('abc',), 'v')
832
chkmap.map(('acd',), 'v')
833
self.assertEqualDiff("'' InternalNode\n"
834
" 'aa' InternalNode\n"
846
# Save everything to the map, and start over
847
chkmap = CHKMap(store, chkmap._save())
848
# Mapping an 'aa' key loads the internal node, but should not map the
849
# 'ab' and 'ac' nodes
850
chkmap.map(('aad',), 'v')
851
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
852
self.assertIsInstance(chkmap._root_node._items['ab'], StaticTuple)
853
self.assertIsInstance(chkmap._root_node._items['ac'], StaticTuple)
854
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
856
chkmap.unmap(('acd',))
857
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
858
self.assertIsInstance(chkmap._root_node._items['ab'], StaticTuple)
860
def test_unmap_without_fitting_doesnt_page_in(self):
861
store = self.get_chk_bytes()
862
chkmap = CHKMap(store, None)
863
# Should fit 2 keys per LeafNode
864
chkmap._root_node.set_maximum_size(20)
865
chkmap.map(('aaa',), 'v')
866
chkmap.map(('aab',), 'v')
867
self.assertEqualDiff("'' InternalNode\n"
873
# Save everything to the map, and start over
874
chkmap = CHKMap(store, chkmap._save())
875
chkmap.map(('aac',), 'v')
876
chkmap.map(('aad',), 'v')
877
chkmap.map(('aae',), 'v')
878
chkmap.map(('aaf',), 'v')
879
# At this point, the previous nodes should not be paged in, but the
880
# newly added nodes would be
881
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
882
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
883
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
884
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
885
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
886
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
887
# Now unmapping one of the new nodes will use only the already-paged-in
888
# nodes to determine that we don't need to do more.
889
chkmap.unmap(('aaf',))
890
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
891
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
892
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
893
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
894
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
896
def test_unmap_pages_in_if_necessary(self):
897
store = self.get_chk_bytes()
898
chkmap = CHKMap(store, None)
899
# Should fit 2 keys per LeafNode
900
chkmap._root_node.set_maximum_size(30)
901
chkmap.map(('aaa',), 'val')
902
chkmap.map(('aab',), 'val')
903
chkmap.map(('aac',), 'val')
904
self.assertEqualDiff("'' InternalNode\n"
912
root_key = chkmap._save()
913
# Save everything to the map, and start over
914
chkmap = CHKMap(store, root_key)
915
chkmap.map(('aad',), 'v')
916
# At this point, the previous nodes should not be paged in, but the
917
# newly added node would be
918
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
919
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
920
self.assertIsInstance(chkmap._root_node._items['aac'], StaticTuple)
921
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
922
# Unmapping the new node will check the existing nodes to see if they
924
# Clear the page cache so we ensure we have to read all the children
925
chk_map.clear_cache()
926
chkmap.unmap(('aad',))
927
self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
928
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
929
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
931
def test_unmap_pages_in_from_page_cache(self):
932
store = self.get_chk_bytes()
933
chkmap = CHKMap(store, None)
934
# Should fit 2 keys per LeafNode
935
chkmap._root_node.set_maximum_size(30)
936
chkmap.map(('aaa',), 'val')
937
chkmap.map(('aab',), 'val')
938
chkmap.map(('aac',), 'val')
939
root_key = chkmap._save()
940
# Save everything to the map, and start over
941
chkmap = CHKMap(store, root_key)
942
chkmap.map(('aad',), 'val')
943
self.assertEqualDiff("'' InternalNode\n"
953
# Save everything to the map, start over after _dump_tree
954
chkmap = CHKMap(store, root_key)
955
chkmap.map(('aad',), 'v')
956
# At this point, the previous nodes should not be paged in, but the
957
# newly added node would be
958
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
959
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
960
self.assertIsInstance(chkmap._root_node._items['aac'], StaticTuple)
961
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
962
# Now clear the page cache, and only include 2 of the children in the
964
aab_key = chkmap._root_node._items['aab']
965
aab_bytes = chk_map._get_cache()[aab_key]
966
aac_key = chkmap._root_node._items['aac']
967
aac_bytes = chk_map._get_cache()[aac_key]
968
chk_map.clear_cache()
969
chk_map._get_cache()[aab_key] = aab_bytes
970
chk_map._get_cache()[aac_key] = aac_bytes
972
# Unmapping the new node will check the nodes from the page cache
973
# first, and not have to read in 'aaa'
974
chkmap.unmap(('aad',))
975
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
976
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
977
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
979
def test_unmap_uses_existing_items(self):
980
store = self.get_chk_bytes()
981
chkmap = CHKMap(store, None)
982
# Should fit 2 keys per LeafNode
983
chkmap._root_node.set_maximum_size(30)
984
chkmap.map(('aaa',), 'val')
985
chkmap.map(('aab',), 'val')
986
chkmap.map(('aac',), 'val')
987
root_key = chkmap._save()
988
# Save everything to the map, and start over
989
chkmap = CHKMap(store, root_key)
990
chkmap.map(('aad',), 'val')
991
chkmap.map(('aae',), 'val')
992
chkmap.map(('aaf',), 'val')
993
# At this point, the previous nodes should not be paged in, but the
994
# newly added node would be
995
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
996
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
997
self.assertIsInstance(chkmap._root_node._items['aac'], StaticTuple)
998
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
999
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
1000
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
1002
# Unmapping a new node will see the other nodes that are already in
1003
# memory, and not need to page in anything else
1004
chkmap.unmap(('aad',))
1005
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
1006
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
1007
self.assertIsInstance(chkmap._root_node._items['aac'], StaticTuple)
1008
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
1009
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
1011
def test_iter_changes_empty_ab(self):
1012
# Asking for changes between an empty dict to a dict with keys returns
1014
basis = self._get_map({}, maximum_size=10)
1015
target = self._get_map(
1016
{('a',): 'content here', ('b',): 'more content'},
1017
chk_bytes=basis._store, maximum_size=10)
1018
self.assertEqual([(('a',), None, 'content here'),
1019
(('b',), None, 'more content')],
1020
sorted(list(target.iter_changes(basis))))
1022
def test_iter_changes_ab_empty(self):
1023
# Asking for changes between a dict with keys to an empty dict returns
1025
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1027
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1028
self.assertEqual([(('a',), 'content here', None),
1029
(('b',), 'more content', None)],
1030
sorted(list(target.iter_changes(basis))))
1032
def test_iter_changes_empty_empty_is_empty(self):
1033
basis = self._get_map({}, maximum_size=10)
1034
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1035
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1037
def test_iter_changes_ab_ab_is_empty(self):
1038
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1040
target = self._get_map(
1041
{('a',): 'content here', ('b',): 'more content'},
1042
chk_bytes=basis._store, maximum_size=10)
1043
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1045
def test_iter_changes_ab_ab_nodes_not_loaded(self):
1046
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1048
target = self._get_map(
1049
{('a',): 'content here', ('b',): 'more content'},
1050
chk_bytes=basis._store, maximum_size=10)
1051
list(target.iter_changes(basis))
1052
self.assertIsInstance(target._root_node, StaticTuple)
1053
self.assertIsInstance(basis._root_node, StaticTuple)
1055
def test_iter_changes_ab_ab_changed_values_shown(self):
1056
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1058
target = self._get_map(
1059
{('a',): 'content here', ('b',): 'different content'},
1060
chk_bytes=basis._store, maximum_size=10)
1061
result = sorted(list(target.iter_changes(basis)))
1062
self.assertEqual([(('b',), 'more content', 'different content')],
1065
def test_iter_changes_mixed_node_length(self):
1066
# When one side has different node lengths than the other, common
1067
# but different keys still need to be show, and new-and-old included
1069
# aaa - common unaltered
1070
# aab - common altered
1074
# aaa to be not loaded (later test)
1075
# aab, b, at to be returned.
1076
# basis splits at byte 0,1,2, aaa is commonb is basis only
1077
basis_dict = {('aaa',): 'foo bar',
1078
('aab',): 'common altered a', ('b',): 'foo bar b'}
1079
# target splits at byte 1,2, at is target only
1080
target_dict = {('aaa',): 'foo bar',
1081
('aab',): 'common altered b', ('at',): 'foo bar t'}
1083
(('aab',), 'common altered a', 'common altered b'),
1084
(('at',), None, 'foo bar t'),
1085
(('b',), 'foo bar b', None),
1087
basis = self._get_map(basis_dict, maximum_size=10)
1088
target = self._get_map(target_dict, maximum_size=10,
1089
chk_bytes=basis._store)
1090
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1092
def test_iter_changes_common_pages_not_loaded(self):
1093
# aaa - common unaltered
1094
# aab - common altered
1098
# aaa to be not loaded
1099
# aaa not to be in result.
1100
basis_dict = {('aaa',): 'foo bar',
1101
('aab',): 'common altered a', ('b',): 'foo bar b'}
1102
# target splits at byte 1, at is target only
1103
target_dict = {('aaa',): 'foo bar',
1104
('aab',): 'common altered b', ('at',): 'foo bar t'}
1105
basis = self._get_map(basis_dict, maximum_size=10)
1106
target = self._get_map(target_dict, maximum_size=10,
1107
chk_bytes=basis._store)
1108
basis_get = basis._store.get_record_stream
1109
def get_record_stream(keys, order, fulltext):
1110
if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
1111
raise AssertionError("'aaa' pointer was followed %r" % keys)
1112
return basis_get(keys, order, fulltext)
1113
basis._store.get_record_stream = get_record_stream
1114
result = sorted(list(target.iter_changes(basis)))
1115
for change in result:
1116
if change[0] == ('aaa',):
1117
self.fail("Found unexpected change: %s" % change)
1119
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
1120
# Within a leaf there are no hash's to exclude keys, make sure multi
1121
# value leaf nodes are handled well.
1122
basis_dict = {('aaa',): 'foo bar',
1123
('aab',): 'common altered a', ('b',): 'foo bar b'}
1124
target_dict = {('aaa',): 'foo bar',
1125
('aab',): 'common altered b', ('at',): 'foo bar t'}
1127
(('aab',), 'common altered a', 'common altered b'),
1128
(('at',), None, 'foo bar t'),
1129
(('b',), 'foo bar b', None),
1131
basis = self._get_map(basis_dict)
1132
target = self._get_map(target_dict, chk_bytes=basis._store)
1133
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1135
def test_iteritems_empty(self):
1136
chk_bytes = self.get_chk_bytes()
1137
root_key = CHKMap.from_dict(chk_bytes, {})
1138
chkmap = CHKMap(chk_bytes, root_key)
1139
self.assertEqual([], list(chkmap.iteritems()))
1141
def test_iteritems_two_items(self):
1142
chk_bytes = self.get_chk_bytes()
1143
root_key = CHKMap.from_dict(chk_bytes,
1144
{"a":"content here", "b":"more content"})
1145
chkmap = CHKMap(chk_bytes, root_key)
1146
self.assertEqual([(("a",), "content here"), (("b",), "more content")],
1147
sorted(list(chkmap.iteritems())))
1149
def test_iteritems_selected_one_of_two_items(self):
1150
chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
1151
self.assertEqual({("a",): "content here"},
1152
self.to_dict(chkmap, [("a",)]))
1154
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
1155
chkmap = self._get_map(
1156
{("a","a"):"content here", ("a", "b",):"more content",
1157
("b", ""): 'boring content'},
1158
maximum_size=10, key_width=2)
1160
{("a", "a"): "content here", ("a", "b"): 'more content'},
1161
self.to_dict(chkmap, [("a",)]))
1163
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
1164
search_key_func = chk_map.search_key_registry.get('hash-16-way')
1165
self.assertEqual('E8B7BE43\x00E8B7BE43',
1166
search_key_func(StaticTuple('a', 'a')))
1167
self.assertEqual('E8B7BE43\x0071BEEFF9',
1168
search_key_func(StaticTuple('a', 'b')))
1169
self.assertEqual('71BEEFF9\x0000000000',
1170
search_key_func(StaticTuple('b', '')))
1171
chkmap = self._get_map(
1172
{("a","a"):"content here", ("a", "b",):"more content",
1173
("b", ""): 'boring content'},
1174
maximum_size=10, key_width=2, search_key_func=search_key_func)
1176
{("a", "a"): "content here", ("a", "b"): 'more content'},
1177
self.to_dict(chkmap, [("a",)]))
1179
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
1180
chkmap = self._get_map(
1181
{("a","a"):"content here", ("a", "b",):"more content",
1182
("b", ""): 'boring content'}, key_width=2)
1184
{("a", "a"): "content here", ("a", "b"): 'more content'},
1185
self.to_dict(chkmap, [("a",)]))
1187
def test___len__empty(self):
1188
chkmap = self._get_map({})
1189
self.assertEqual(0, len(chkmap))
1191
def test___len__2(self):
1192
chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"})
1193
self.assertEqual(2, len(chkmap))
1195
def test_max_size_100_bytes_new(self):
1196
# When there is a 100 byte upper node limit, a tree is formed.
1197
chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
1198
# We expect three nodes:
1199
# A root, with two children, and with two key prefixes - k1 to one, and
1200
# k2 to the other as our node splitting is only just being developed.
1201
# The maximum size should be embedded
1202
chkmap._ensure_root()
1203
self.assertEqual(100, chkmap._root_node.maximum_size)
1204
self.assertEqual(1, chkmap._root_node._key_width)
1205
# There should be two child nodes, and prefix of 2(bytes):
1206
self.assertEqual(2, len(chkmap._root_node._items))
1207
self.assertEqual("k", chkmap._root_node._compute_search_prefix())
1208
# The actual nodes pointed at will change as serialisers change; so
1209
# here we test that the key prefix is correct; then load the nodes and
1210
# check they have the right pointed at key; whether they have the
1211
# pointed at value inline or not is also unrelated to this test so we
1212
# don't check that in detail - rather we just check the aggregate
1214
nodes = sorted(chkmap._root_node._items.items())
1217
self.assertEqual('k1', ptr1[0])
1218
self.assertEqual('k2', ptr2[0])
1219
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
1220
self.assertIsInstance(node1, LeafNode)
1221
self.assertEqual(1, len(node1))
1222
self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
1223
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
1224
self.assertIsInstance(node2, LeafNode)
1225
self.assertEqual(1, len(node2))
1226
self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
1227
# Having checked we have a good structure, check that the content is
1229
self.assertEqual(2, len(chkmap))
1230
self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
1231
self.to_dict(chkmap))
1233
def test_init_root_is_LeafNode_new(self):
1234
chk_bytes = self.get_chk_bytes()
1235
chkmap = CHKMap(chk_bytes, None)
1236
self.assertIsInstance(chkmap._root_node, LeafNode)
1237
self.assertEqual({}, self.to_dict(chkmap))
1238
self.assertEqual(0, len(chkmap))
1240
def test_init_and_save_new(self):
1241
chk_bytes = self.get_chk_bytes()
1242
chkmap = CHKMap(chk_bytes, None)
1243
key = chkmap._save()
1244
leaf_node = LeafNode()
1245
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1247
def test_map_first_item_new(self):
1248
chk_bytes = self.get_chk_bytes()
1249
chkmap = CHKMap(chk_bytes, None)
1250
chkmap.map(("foo,",), "bar")
1251
self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
1252
self.assertEqual(1, len(chkmap))
1253
key = chkmap._save()
1254
leaf_node = LeafNode()
1255
leaf_node.map(chk_bytes, ("foo,",), "bar")
1256
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1258
def test_unmap_last_item_root_is_leaf_new(self):
1259
chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
1260
chkmap.unmap(("k1"*50,))
1261
chkmap.unmap(("k2"*50,))
1262
self.assertEqual(0, len(chkmap))
1263
self.assertEqual({}, self.to_dict(chkmap))
1264
key = chkmap._save()
1265
leaf_node = LeafNode()
1266
self.assertEqual([key], leaf_node.serialise(chkmap._store))
1268
def test__dump_tree(self):
1269
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2",
1270
("bbb",): "value3",},
1272
self.assertEqualDiff("'' InternalNode\n"
1273
" 'a' InternalNode\n"
1275
" ('aaa',) 'value1'\n"
1277
" ('aab',) 'value2'\n"
1279
" ('bbb',) 'value3'\n",
1280
chkmap._dump_tree())
1281
self.assertEqualDiff("'' InternalNode\n"
1282
" 'a' InternalNode\n"
1284
" ('aaa',) 'value1'\n"
1286
" ('aab',) 'value2'\n"
1288
" ('bbb',) 'value3'\n",
1289
chkmap._dump_tree())
1290
self.assertEqualDiff(
1291
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1292
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1293
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1294
" ('aaa',) 'value1'\n"
1295
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1296
" ('aab',) 'value2'\n"
1297
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1298
" ('bbb',) 'value3'\n",
1299
chkmap._dump_tree(include_keys=True))
1301
def test__dump_tree_in_progress(self):
1302
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"},
1304
chkmap.map(('bbb',), 'value3')
1305
self.assertEqualDiff("'' InternalNode\n"
1306
" 'a' InternalNode\n"
1308
" ('aaa',) 'value1'\n"
1310
" ('aab',) 'value2'\n"
1312
" ('bbb',) 'value3'\n",
1313
chkmap._dump_tree())
1314
# For things that are updated by adding 'bbb', we don't have a sha key
1315
# for them yet, so they are listed as None
1316
self.assertEqualDiff(
1317
"'' InternalNode None\n"
1318
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1319
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1320
" ('aaa',) 'value1'\n"
1321
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1322
" ('aab',) 'value2'\n"
1323
" 'b' LeafNode None\n"
1324
" ('bbb',) 'value3'\n",
1325
chkmap._dump_tree(include_keys=True))
1328
def _search_key_single(key):
1329
"""A search key function that maps all nodes to the same value"""
1332
def _test_search_key(key):
1333
return 'test:' + '\x00'.join(key)
1336
class TestMapSearchKeys(TestCaseWithStore):
1338
def test_default_chk_map_uses_flat_search_key(self):
1339
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1340
self.assertEqual('1',
1341
chkmap._search_key_func(('1',)))
1342
self.assertEqual('1\x002',
1343
chkmap._search_key_func(('1', '2')))
1344
self.assertEqual('1\x002\x003',
1345
chkmap._search_key_func(('1', '2', '3')))
1347
def test_search_key_is_passed_to_root_node(self):
1348
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1349
search_key_func=_test_search_key)
1350
self.assertIs(_test_search_key, chkmap._search_key_func)
1351
self.assertEqual('test:1\x002\x003',
1352
chkmap._search_key_func(('1', '2', '3')))
1353
self.assertEqual('test:1\x002\x003',
1354
chkmap._root_node._search_key(('1', '2', '3')))
1356
def test_search_key_passed_via__ensure_root(self):
1357
chk_bytes = self.get_chk_bytes()
1358
chkmap = chk_map.CHKMap(chk_bytes, None,
1359
search_key_func=_test_search_key)
1360
root_key = chkmap._save()
1361
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1362
search_key_func=_test_search_key)
1363
chkmap._ensure_root()
1364
self.assertEqual('test:1\x002\x003',
1365
chkmap._root_node._search_key(('1', '2', '3')))
1367
def test_search_key_with_internal_node(self):
1368
chk_bytes = self.get_chk_bytes()
1369
chkmap = chk_map.CHKMap(chk_bytes, None,
1370
search_key_func=_test_search_key)
1371
chkmap._root_node.set_maximum_size(10)
1372
chkmap.map(('1',), 'foo')
1373
chkmap.map(('2',), 'bar')
1374
chkmap.map(('3',), 'baz')
1375
self.assertEqualDiff("'' InternalNode\n"
1376
" 'test:1' LeafNode\n"
1378
" 'test:2' LeafNode\n"
1380
" 'test:3' LeafNode\n"
1382
, chkmap._dump_tree())
1383
root_key = chkmap._save()
1384
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1385
search_key_func=_test_search_key)
1386
self.assertEqualDiff("'' InternalNode\n"
1387
" 'test:1' LeafNode\n"
1389
" 'test:2' LeafNode\n"
1391
" 'test:3' LeafNode\n"
1393
, chkmap._dump_tree())
1395
def test_search_key_16(self):
1396
chk_bytes = self.get_chk_bytes()
1397
chkmap = chk_map.CHKMap(chk_bytes, None,
1398
search_key_func=chk_map._search_key_16)
1399
chkmap._root_node.set_maximum_size(10)
1400
chkmap.map(('1',), 'foo')
1401
chkmap.map(('2',), 'bar')
1402
chkmap.map(('3',), 'baz')
1403
self.assertEqualDiff("'' InternalNode\n"
1410
, chkmap._dump_tree())
1411
root_key = chkmap._save()
1412
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1413
search_key_func=chk_map._search_key_16)
1414
# We can get the values back correctly
1415
self.assertEqual([(('1',), 'foo')],
1416
list(chkmap.iteritems([('1',)])))
1417
self.assertEqualDiff("'' InternalNode\n"
1424
, chkmap._dump_tree())
1426
def test_search_key_255(self):
1427
chk_bytes = self.get_chk_bytes()
1428
chkmap = chk_map.CHKMap(chk_bytes, None,
1429
search_key_func=chk_map._search_key_255)
1430
chkmap._root_node.set_maximum_size(10)
1431
chkmap.map(('1',), 'foo')
1432
chkmap.map(('2',), 'bar')
1433
chkmap.map(('3',), 'baz')
1434
self.assertEqualDiff("'' InternalNode\n"
1435
" '\\x1a' LeafNode\n"
1439
" '\\x83' LeafNode\n"
1441
, chkmap._dump_tree())
1442
root_key = chkmap._save()
1443
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1444
search_key_func=chk_map._search_key_255)
1445
# We can get the values back correctly
1446
self.assertEqual([(('1',), 'foo')],
1447
list(chkmap.iteritems([('1',)])))
1448
self.assertEqualDiff("'' InternalNode\n"
1449
" '\\x1a' LeafNode\n"
1453
" '\\x83' LeafNode\n"
1455
, chkmap._dump_tree())
1457
def test_search_key_collisions(self):
1458
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1459
search_key_func=_search_key_single)
1460
# The node will want to expand, but it cannot, because it knows that
1461
# all the keys must map to this node
1462
chkmap._root_node.set_maximum_size(20)
1463
chkmap.map(('1',), 'foo')
1464
chkmap.map(('2',), 'bar')
1465
chkmap.map(('3',), 'baz')
1466
self.assertEqualDiff("'' LeafNode\n"
1470
, chkmap._dump_tree())
1473
class TestLeafNode(TestCaseWithStore):
1475
def test_current_size_empty(self):
1477
self.assertEqual(16, node._current_size())
1479
def test_current_size_size_changed(self):
1481
node.set_maximum_size(10)
1482
self.assertEqual(17, node._current_size())
1484
def test_current_size_width_changed(self):
1486
node._key_width = 10
1487
self.assertEqual(17, node._current_size())
1489
def test_current_size_items(self):
1491
base_size = node._current_size()
1492
node.map(None, ("foo bar",), "baz")
1493
self.assertEqual(base_size + 14, node._current_size())
1495
def test_deserialise_empty(self):
1496
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1497
self.assertEqual(0, len(node))
1498
self.assertEqual(10, node.maximum_size)
1499
self.assertEqual(("sha1:1234",), node.key())
1500
self.assertIs(None, node._search_prefix)
1501
self.assertIs(None, node._common_serialised_prefix)
1503
def test_deserialise_items(self):
1504
node = LeafNode.deserialise(
1505
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1507
self.assertEqual(2, len(node))
1508
self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
1509
sorted(node.iteritems(None)))
1511
def test_deserialise_item_with_null_width_1(self):
1512
node = LeafNode.deserialise(
1513
"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1515
self.assertEqual(2, len(node))
1516
self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
1517
sorted(node.iteritems(None)))
1519
def test_deserialise_item_with_null_width_2(self):
1520
node = LeafNode.deserialise(
1521
"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1522
"quux\x00\x001\nblarh\n",
1524
self.assertEqual(2, len(node))
1525
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
1526
sorted(node.iteritems(None)))
1528
def test_iteritems_selected_one_of_two_items(self):
1529
node = LeafNode.deserialise(
1530
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1532
self.assertEqual(2, len(node))
1533
self.assertEqual([(("quux",), "blarh")],
1534
sorted(node.iteritems(None, [("quux",), ("qaz",)])))
1536
def test_deserialise_item_with_common_prefix(self):
1537
node = LeafNode.deserialise(
1538
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1540
self.assertEqual(2, len(node))
1541
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")],
1542
sorted(node.iteritems(None)))
1543
self.assertIs(chk_map._unknown, node._search_prefix)
1544
self.assertEqual('foo\x00', node._common_serialised_prefix)
1546
def test_deserialise_multi_line(self):
1547
node = LeafNode.deserialise(
1548
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1550
self.assertEqual(2, len(node))
1551
self.assertEqual([(("foo", "1"), "bar\nbaz"),
1552
(("foo", "2"), "blarh\n"),
1553
], sorted(node.iteritems(None)))
1554
self.assertIs(chk_map._unknown, node._search_prefix)
1555
self.assertEqual('foo\x00', node._common_serialised_prefix)
1557
def test_key_new(self):
1559
self.assertEqual(None, node.key())
1561
def test_key_after_map(self):
1562
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1563
node.map(None, ("foo bar",), "baz quux")
1564
self.assertEqual(None, node.key())
1566
def test_key_after_unmap(self):
1567
node = LeafNode.deserialise(
1568
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1570
node.unmap(None, ("foo bar",))
1571
self.assertEqual(None, node.key())
1573
def test_map_exceeding_max_size_only_entry_new(self):
1575
node.set_maximum_size(10)
1576
result = node.map(None, ("foo bar",), "baz quux")
1577
self.assertEqual(("foo bar", [("", node)]), result)
1578
self.assertTrue(10 < node._current_size())
1580
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1582
node.set_maximum_size(10)
1583
node.map(None, ("foo bar",), "baz quux")
1584
prefix, result = list(node.map(None, ("blue",), "red"))
1585
self.assertEqual("", prefix)
1586
self.assertEqual(2, len(result))
1587
split_chars = set([result[0][0], result[1][0]])
1588
self.assertEqual(set(["f", "b"]), split_chars)
1589
nodes = dict(result)
1591
self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
1592
self.assertEqual(10, node.maximum_size)
1593
self.assertEqual(1, node._key_width)
1595
self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
1596
self.assertEqual(10, node.maximum_size)
1597
self.assertEqual(1, node._key_width)
1599
def test_map_first(self):
1601
result = node.map(None, ("foo bar",), "baz quux")
1602
self.assertEqual(("foo bar", [("", node)]), result)
1603
self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
1604
self.assertEqual(1, len(node))
1606
def test_map_second(self):
1608
node.map(None, ("foo bar",), "baz quux")
1609
result = node.map(None, ("bingo",), "bango")
1610
self.assertEqual(("", [("", node)]), result)
1611
self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
1612
self.to_dict(node, None))
1613
self.assertEqual(2, len(node))
1615
def test_map_replacement(self):
1617
node.map(None, ("foo bar",), "baz quux")
1618
result = node.map(None, ("foo bar",), "bango")
1619
self.assertEqual(("foo bar", [("", node)]), result)
1620
self.assertEqual({("foo bar",): "bango"},
1621
self.to_dict(node, None))
1622
self.assertEqual(1, len(node))
1624
def test_serialise_empty(self):
1625
store = self.get_chk_bytes()
1627
node.set_maximum_size(10)
1628
expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1629
self.assertEqual([expected_key],
1630
list(node.serialise(store)))
1631
self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
1632
self.assertEqual(expected_key, node.key())
1634
def test_serialise_items(self):
1635
store = self.get_chk_bytes()
1637
node.set_maximum_size(10)
1638
node.map(None, ("foo bar",), "baz quux")
1639
expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1640
self.assertEqual('foo bar', node._common_serialised_prefix)
1641
self.assertEqual([expected_key],
1642
list(node.serialise(store)))
1643
self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1644
self.read_bytes(store, expected_key))
1645
self.assertEqual(expected_key, node.key())
1647
def test_unique_serialised_prefix_empty_new(self):
1649
self.assertIs(None, node._compute_search_prefix())
1651
def test_unique_serialised_prefix_one_item_new(self):
1653
node.map(None, ("foo bar", "baz"), "baz quux")
1654
self.assertEqual("foo bar\x00baz", node._compute_search_prefix())
1656
def test_unmap_missing(self):
1658
self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
1660
def test_unmap_present(self):
1662
node.map(None, ("foo bar",), "baz quux")
1663
result = node.unmap(None, ("foo bar",))
1664
self.assertEqual(node, result)
1665
self.assertEqual({}, self.to_dict(node, None))
1666
self.assertEqual(0, len(node))
1668
def test_map_maintains_common_prefixes(self):
1671
node.map(None, ("foo bar", "baz"), "baz quux")
1672
self.assertEqual('foo bar\x00baz', node._search_prefix)
1673
self.assertEqual('foo bar\x00baz', node._common_serialised_prefix)
1674
node.map(None, ("foo bar", "bing"), "baz quux")
1675
self.assertEqual('foo bar\x00b', node._search_prefix)
1676
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1677
node.map(None, ("fool", "baby"), "baz quux")
1678
self.assertEqual('foo', node._search_prefix)
1679
self.assertEqual('foo', node._common_serialised_prefix)
1680
node.map(None, ("foo bar", "baz"), "replaced")
1681
self.assertEqual('foo', node._search_prefix)
1682
self.assertEqual('foo', node._common_serialised_prefix)
1683
node.map(None, ("very", "different"), "value")
1684
self.assertEqual('', node._search_prefix)
1685
self.assertEqual('', node._common_serialised_prefix)
1687
def test_unmap_maintains_common_prefixes(self):
1690
node.map(None, ("foo bar", "baz"), "baz quux")
1691
node.map(None, ("foo bar", "bing"), "baz quux")
1692
node.map(None, ("fool", "baby"), "baz quux")
1693
node.map(None, ("very", "different"), "value")
1694
self.assertEqual('', node._search_prefix)
1695
self.assertEqual('', node._common_serialised_prefix)
1696
node.unmap(None, ("very", "different"))
1697
self.assertEqual("foo", node._search_prefix)
1698
self.assertEqual("foo", node._common_serialised_prefix)
1699
node.unmap(None, ("fool", "baby"))
1700
self.assertEqual('foo bar\x00b', node._search_prefix)
1701
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1702
node.unmap(None, ("foo bar", "baz"))
1703
self.assertEqual('foo bar\x00bing', node._search_prefix)
1704
self.assertEqual('foo bar\x00bing', node._common_serialised_prefix)
1705
node.unmap(None, ("foo bar", "bing"))
1706
self.assertEqual(None, node._search_prefix)
1707
self.assertEqual(None, node._common_serialised_prefix)
1710
class TestInternalNode(TestCaseWithStore):
1712
def test_add_node_empty_new(self):
1713
node = InternalNode('fo')
1715
child.set_maximum_size(100)
1716
child.map(None, ("foo",), "bar")
1717
node.add_node("foo", child)
1718
# Note that node isn't strictly valid now as a tree (only one child),
1719
# but thats ok for this test.
1720
# The first child defines the node's width:
1721
self.assertEqual(3, node._node_width)
1722
# We should be able to iterate over the contents without doing IO.
1723
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
1724
# The length should be known:
1725
self.assertEqual(1, len(node))
1726
# serialising the node should serialise the child and the node.
1727
chk_bytes = self.get_chk_bytes()
1728
keys = list(node.serialise(chk_bytes))
1729
child_key = child.serialise(chk_bytes)[0]
1731
[child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1733
# We should be able to access deserialised content.
1734
bytes = self.read_bytes(chk_bytes, keys[1])
1735
node = chk_map._deserialise(bytes, keys[1], None)
1736
self.assertEqual(1, len(node))
1737
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
1738
self.assertEqual(3, node._node_width)
1740
def test_add_node_resets_key_new(self):
1741
node = InternalNode('fo')
1743
child.set_maximum_size(100)
1744
child.map(None, ("foo",), "bar")
1745
node.add_node("foo", child)
1746
chk_bytes = self.get_chk_bytes()
1747
keys = list(node.serialise(chk_bytes))
1748
self.assertEqual(keys[1], node._key)
1749
node.add_node("fos", child)
1750
self.assertEqual(None, node._key)
1752
# def test_add_node_empty_oversized_one_ok_new(self):
1753
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1754
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1755
# def test_add_node_one_oversized_second_splits_errors(self):
1757
def test__iter_nodes_no_key_filter(self):
1758
node = InternalNode('')
1760
child.set_maximum_size(100)
1761
child.map(None, ("foo",), "bar")
1762
node.add_node("f", child)
1764
child.set_maximum_size(100)
1765
child.map(None, ("bar",), "baz")
1766
node.add_node("b", child)
1768
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1769
self.assertEqual(None, node_key_filter)
1771
def test__iter_nodes_splits_key_filter(self):
1772
node = InternalNode('')
1774
child.set_maximum_size(100)
1775
child.map(None, ("foo",), "bar")
1776
node.add_node("f", child)
1778
child.set_maximum_size(100)
1779
child.map(None, ("bar",), "baz")
1780
node.add_node("b", child)
1782
# foo and bar both match exactly one leaf node, but 'cat' should not
1783
# match any, and should not be placed in one.
1784
key_filter = (('foo',), ('bar',), ('cat',))
1785
for child, node_key_filter in node._iter_nodes(None,
1786
key_filter=key_filter):
1787
# each child could only match one key filter, so make sure it was
1789
self.assertEqual(1, len(node_key_filter))
1791
def test__iter_nodes_with_multiple_matches(self):
1792
node = InternalNode('')
1794
child.set_maximum_size(100)
1795
child.map(None, ("foo",), "val")
1796
child.map(None, ("fob",), "val")
1797
node.add_node("f", child)
1799
child.set_maximum_size(100)
1800
child.map(None, ("bar",), "val")
1801
child.map(None, ("baz",), "val")
1802
node.add_node("b", child)
1804
# Note that 'ram' doesn't match anything, so it should be freely
1806
key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
1807
for child, node_key_filter in node._iter_nodes(None,
1808
key_filter=key_filter):
1809
# each child could match two key filters, so make sure they were
1811
self.assertEqual(2, len(node_key_filter))
1813
def make_fo_fa_node(self):
1814
node = InternalNode('f')
1816
child.set_maximum_size(100)
1817
child.map(None, ("foo",), "val")
1818
child.map(None, ("fob",), "val")
1819
node.add_node('fo', child)
1821
child.set_maximum_size(100)
1822
child.map(None, ("far",), "val")
1823
child.map(None, ("faz",), "val")
1824
node.add_node("fa", child)
1827
def test__iter_nodes_single_entry(self):
1828
node = self.make_fo_fa_node()
1829
key_filter = [('foo',)]
1830
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1831
self.assertEqual(1, len(nodes))
1832
self.assertEqual(key_filter, nodes[0][1])
1834
def test__iter_nodes_single_entry_misses(self):
1835
node = self.make_fo_fa_node()
1836
key_filter = [('bar',)]
1837
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1838
self.assertEqual(0, len(nodes))
1840
def test__iter_nodes_mixed_key_width(self):
1841
node = self.make_fo_fa_node()
1842
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
1843
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1844
self.assertEqual(1, len(nodes))
1845
matches = key_filter[:]
1846
matches.remove(('b',))
1847
self.assertEqual(sorted(matches), sorted(nodes[0][1]))
1849
def test__iter_nodes_match_all(self):
1850
node = self.make_fo_fa_node()
1851
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
1852
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1853
self.assertEqual(2, len(nodes))
1855
def test__iter_nodes_fixed_widths_and_misses(self):
1856
node = self.make_fo_fa_node()
1857
# foo and faa should both match one child, baz should miss
1858
key_filter = [('foo',), ('faa',), ('baz',)]
1859
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1860
self.assertEqual(2, len(nodes))
1861
for node, matches in nodes:
1862
self.assertEqual(1, len(matches))
1864
def test_iteritems_empty_new(self):
1865
node = InternalNode()
1866
self.assertEqual([], sorted(node.iteritems(None)))
1868
def test_iteritems_two_children(self):
1869
node = InternalNode()
1871
leaf1.map(None, ('foo bar',), 'quux')
1873
leaf2.map(None, ('strange',), 'beast')
1874
node.add_node("f", leaf1)
1875
node.add_node("s", leaf2)
1876
self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
1877
sorted(node.iteritems(None)))
1879
def test_iteritems_two_children_partial(self):
1880
node = InternalNode()
1882
leaf1.map(None, ('foo bar',), 'quux')
1884
leaf2.map(None, ('strange',), 'beast')
1885
node.add_node("f", leaf1)
1886
# This sets up a path that should not be followed - it will error if
1887
# the code tries to.
1888
node._items['f'] = None
1889
node.add_node("s", leaf2)
1890
self.assertEqual([(('strange',), 'beast')],
1891
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1893
def test_iteritems_two_children_with_hash(self):
1894
search_key_func = chk_map.search_key_registry.get('hash-255-way')
1895
node = InternalNode(search_key_func=search_key_func)
1896
leaf1 = LeafNode(search_key_func=search_key_func)
1897
leaf1.map(None, StaticTuple('foo bar',), 'quux')
1898
leaf2 = LeafNode(search_key_func=search_key_func)
1899
leaf2.map(None, StaticTuple('strange',), 'beast')
1900
self.assertEqual('\xbeF\x014', search_key_func(StaticTuple('foo bar',)))
1901
self.assertEqual('\x85\xfa\xf7K', search_key_func(StaticTuple('strange',)))
1902
node.add_node("\xbe", leaf1)
1903
# This sets up a path that should not be followed - it will error if
1904
# the code tries to.
1905
node._items['\xbe'] = None
1906
node.add_node("\x85", leaf2)
1907
self.assertEqual([(('strange',), 'beast')],
1908
sorted(node.iteritems(None, [StaticTuple('strange',),
1909
StaticTuple('weird',)])))
1911
def test_iteritems_partial_empty(self):
1912
node = InternalNode()
1913
self.assertEqual([], sorted(node.iteritems([('missing',)])))
1915
def test_map_to_new_child_new(self):
1916
chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
1917
chkmap._ensure_root()
1918
node = chkmap._root_node
1919
# Ensure test validity: nothing paged in below the root.
1921
len([value for value in node._items.values()
1922
if type(value) is StaticTuple]))
1923
# now, mapping to k3 should add a k3 leaf
1924
prefix, nodes = node.map(None, ('k3',), 'quux')
1925
self.assertEqual("k", prefix)
1926
self.assertEqual([("", node)], nodes)
1927
# check new child details
1928
child = node._items['k3']
1929
self.assertIsInstance(child, LeafNode)
1930
self.assertEqual(1, len(child))
1931
self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
1932
self.assertEqual(None, child._key)
1933
self.assertEqual(10, child.maximum_size)
1934
self.assertEqual(1, child._key_width)
1935
# Check overall structure:
1936
self.assertEqual(3, len(chkmap))
1937
self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
1938
self.to_dict(chkmap))
1939
# serialising should only serialise the new data - k3 and the internal
1941
keys = list(node.serialise(chkmap._store))
1942
child_key = child.serialise(chkmap._store)[0]
1943
self.assertEqual([child_key, keys[1]], keys)
1945
def test_map_to_child_child_splits_new(self):
1946
chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
1947
# Check for the canonical root value for this tree:
1948
self.assertEqualDiff("'' InternalNode\n"
1953
, chkmap._dump_tree())
1954
# _dump_tree pages everything in, so reload using just the root
1955
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1956
chkmap._ensure_root()
1957
node = chkmap._root_node
1958
# Ensure test validity: nothing paged in below the root.
1960
len([value for value in node._items.values()
1961
if type(value) is StaticTuple]))
1962
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1963
# k23, which for simplicity in the current implementation generates
1964
# a new internal node between node, and k22/k23.
1965
prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
1966
self.assertEqual("k", prefix)
1967
self.assertEqual([("", node)], nodes)
1968
# check new child details
1969
child = node._items['k2']
1970
self.assertIsInstance(child, InternalNode)
1971
self.assertEqual(2, len(child))
1972
self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
1973
self.to_dict(child, None))
1974
self.assertEqual(None, child._key)
1975
self.assertEqual(10, child.maximum_size)
1976
self.assertEqual(1, child._key_width)
1977
self.assertEqual(3, child._node_width)
1978
# Check overall structure:
1979
self.assertEqual(3, len(chkmap))
1980
self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
1981
self.to_dict(chkmap))
1982
# serialising should only serialise the new data - although k22 hasn't
1983
# changed because its a special corner case (splitting on with only one
1984
# key leaves one node unaltered), in general k22 is serialised, so we
1985
# expect k22, k23, the new internal node, and node, to be serialised.
1986
keys = list(node.serialise(chkmap._store))
1987
child_key = child._key
1988
k22_key = child._items['k22']._key
1989
k23_key = child._items['k23']._key
1990
self.assertEqual([k22_key, k23_key, child_key, node.key()], keys)
1991
self.assertEqualDiff("'' InternalNode\n"
1994
" 'k2' InternalNode\n"
1998
" ('k23',) 'quux'\n"
1999
, chkmap._dump_tree())
2001
def test__search_prefix_filter_with_hash(self):
2002
search_key_func = chk_map.search_key_registry.get('hash-16-way')
2003
node = InternalNode(search_key_func=search_key_func)
2005
node._node_width = 4
2006
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(
2007
StaticTuple('a', 'b')))
2008
self.assertEqual('E8B7', node._search_prefix_filter(
2009
StaticTuple('a', 'b')))
2010
self.assertEqual('E8B7', node._search_prefix_filter(
2013
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
2014
chkmap = self._get_map(
2015
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2016
# Check we have the expected tree.
2017
self.assertEqualDiff("'' InternalNode\n"
2020
" 'k2' InternalNode\n"
2024
" ('k23',) 'quux'\n"
2025
, chkmap._dump_tree())
2026
chkmap = CHKMap(chkmap._store, chkmap._root_node)
2027
chkmap._ensure_root()
2028
node = chkmap._root_node
2029
# unmapping k23 should give us a root, with k1 and k22 as direct
2031
result = node.unmap(chkmap._store, ('k23',))
2032
# check the pointed-at object within node - k2 should now point at the
2033
# k22 leaf (which has been paged in to see if we can collapse the tree)
2034
child = node._items['k2']
2035
self.assertIsInstance(child, LeafNode)
2036
self.assertEqual(1, len(child))
2037
self.assertEqual({('k22',): 'bar'},
2038
self.to_dict(child, None))
2039
# Check overall structure is instact:
2040
self.assertEqual(2, len(chkmap))
2041
self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
2042
self.to_dict(chkmap))
2043
# serialising should only serialise the new data - the root node.
2044
keys = list(node.serialise(chkmap._store))
2045
self.assertEqual([keys[-1]], keys)
2046
chkmap = CHKMap(chkmap._store, keys[-1])
2047
self.assertEqualDiff("'' InternalNode\n"
2052
, chkmap._dump_tree())
2054
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
2055
chkmap = self._get_map(
2056
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2057
self.assertEqualDiff("'' InternalNode\n"
2060
" 'k2' InternalNode\n"
2064
" ('k23',) 'quux'\n"
2065
, chkmap._dump_tree())
2066
orig_root = chkmap._root_node
2067
chkmap = CHKMap(chkmap._store, orig_root)
2068
chkmap._ensure_root()
2069
node = chkmap._root_node
2070
k2_ptr = node._items['k2']
2071
# unmapping k1 should give us a root, with k22 and k23 as direct
2072
# children, and should not have needed to page in the subtree.
2073
result = node.unmap(chkmap._store, ('k1',))
2074
self.assertEqual(k2_ptr, result)
2075
chkmap = CHKMap(chkmap._store, orig_root)
2076
# Unmapping at the CHKMap level should switch to the new root
2077
chkmap.unmap(('k1',))
2078
self.assertEqual(k2_ptr, chkmap._root_node)
2079
self.assertEqualDiff("'' InternalNode\n"
2083
" ('k23',) 'quux'\n"
2084
, chkmap._dump_tree())
2088
# map -> fits - done
2089
# map -> doesn't fit - shrink from left till fits
2090
# key data to return: the common prefix, new nodes.
2092
# unmap -> how to tell if siblings can be combined.
2093
# combing leaf nodes means expanding the prefix to the left; so gather the size of
2094
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
2095
# is an internal node, we know that that is a dense subtree - can't combine.
2096
# otherwise as soon as the sum of serialised values exceeds the split threshold
2097
# we know we can't combine - stop.
2098
# unmap -> key return data - space in node, common prefix length? and key count
2100
# variable length prefixes? -> later start with fixed width to get something going
2101
# map -> fits - update pointer to leaf
2102
# return [prefix and node] - seems sound.
2103
# map -> doesn't fit - find unique prefix and shift right
2104
# create internal nodes for all the partitions, return list of unique
2105
# prefixes and nodes.
2106
# map -> new prefix - create a leaf
2107
# unmap -> if child key count 0, remove
2108
# unmap -> return space in node, common prefix length? (why?), key count
2110
# map, if 1 node returned, use it, otherwise make an internal and populate.
2111
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
2113
# map inits as empty leafnode.
2119
# AA, AB, AC, AD, BA
2120
# packed internal node - ideal:
2121
# AA, AB, AC, AD, BA
2122
# single byte fanout - A,B, AA,AB,AC,AD, BA
2125
# AB - split, but we want to end up with AB, BA, in one node, with
2129
class TestCHKMapDifference(TestCaseWithExampleMaps):
2131
def get_difference(self, new_roots, old_roots,
2132
search_key_func=None):
2133
if search_key_func is None:
2134
search_key_func = chk_map._search_key_plain
2135
return chk_map.CHKMapDifference(self.get_chk_bytes(),
2136
new_roots, old_roots, search_key_func)
2138
def test__init__(self):
2139
c_map = self.make_root_only_map()
2141
c_map.map(('aaa',), 'new aaa content')
2142
key2 = c_map._save()
2143
diff = self.get_difference([key2], [key1])
2144
self.assertEqual(set([key1]), diff._all_old_chks)
2145
self.assertEqual([], diff._old_queue)
2146
self.assertEqual([], diff._new_queue)
2148
def help__read_all_roots(self, search_key_func):
2149
c_map = self.make_root_only_map(search_key_func=search_key_func)
2151
c_map.map(('aaa',), 'new aaa content')
2152
key2 = c_map._save()
2153
diff = self.get_difference([key2], [key1], search_key_func)
2154
root_results = [record.key for record in diff._read_all_roots()]
2155
self.assertEqual([key2], root_results)
2156
# We should have queued up only items that aren't in the old
2158
self.assertEqual([(('aaa',), 'new aaa content')],
2159
diff._new_item_queue)
2160
self.assertEqual([], diff._new_queue)
2161
# And there are no old references, so that queue should be
2163
self.assertEqual([], diff._old_queue)
2165
def test__read_all_roots_plain(self):
2166
self.help__read_all_roots(search_key_func=chk_map._search_key_plain)
2168
def test__read_all_roots_16(self):
2169
self.help__read_all_roots(search_key_func=chk_map._search_key_16)
2171
def test__read_all_roots_skips_known_old(self):
2172
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2174
c_map2 = self.make_root_only_map(chk_map._search_key_plain)
2176
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2177
root_results = [record.key for record in diff._read_all_roots()]
2178
# We should have no results. key2 is completely contained within key1,
2179
# and we should have seen that in the first pass
2180
self.assertEqual([], root_results)
2182
def test__read_all_roots_prepares_queues(self):
2183
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2185
c_map._dump_tree() # load everything
2186
key1_a = c_map._root_node._items['a'].key()
2187
c_map.map(('abb',), 'new abb content')
2188
key2 = c_map._save()
2189
key2_a = c_map._root_node._items['a'].key()
2190
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2191
root_results = [record.key for record in diff._read_all_roots()]
2192
self.assertEqual([key2], root_results)
2193
# At this point, we should have queued up only the 'a' Leaf on both
2194
# sides, both 'c' and 'd' are known to not have changed on both sides
2195
self.assertEqual([key2_a], diff._new_queue)
2196
self.assertEqual([], diff._new_item_queue)
2197
self.assertEqual([key1_a], diff._old_queue)
2199
def test__read_all_roots_multi_new_prepares_queues(self):
2200
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2202
c_map._dump_tree() # load everything
2203
key1_a = c_map._root_node._items['a'].key()
2204
key1_c = c_map._root_node._items['c'].key()
2205
c_map.map(('abb',), 'new abb content')
2206
key2 = c_map._save()
2207
key2_a = c_map._root_node._items['a'].key()
2208
key2_c = c_map._root_node._items['c'].key()
2209
c_map = chk_map.CHKMap(self.get_chk_bytes(), key1,
2210
chk_map._search_key_plain)
2211
c_map.map(('ccc',), 'new ccc content')
2212
key3 = c_map._save()
2213
key3_a = c_map._root_node._items['a'].key()
2214
key3_c = c_map._root_node._items['c'].key()
2215
diff = self.get_difference([key2, key3], [key1],
2216
chk_map._search_key_plain)
2217
root_results = [record.key for record in diff._read_all_roots()]
2218
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2219
# We should have queued up key2_a, and key3_c, but not key2_c or key3_c
2220
self.assertEqual([key2_a, key3_c], diff._new_queue)
2221
self.assertEqual([], diff._new_item_queue)
2222
# And we should have queued up both a and c for the old set
2223
self.assertEqual([key1_a, key1_c], diff._old_queue)
2225
def test__read_all_roots_different_depths(self):
2226
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2227
c_map._dump_tree() # load everything
2229
key1_a = c_map._root_node._items['a'].key()
2230
key1_c = c_map._root_node._items['c'].key()
2231
key1_d = c_map._root_node._items['d'].key()
2233
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2236
key2_aa = c_map2._root_node._items['aa'].key()
2237
key2_ad = c_map2._root_node._items['ad'].key()
2239
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2240
root_results = [record.key for record in diff._read_all_roots()]
2241
self.assertEqual([key2], root_results)
2242
# Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
2244
self.assertEqual([key1_a], diff._old_queue)
2245
self.assertEqual([key2_aa, key2_ad], diff._new_queue)
2246
self.assertEqual([], diff._new_item_queue)
2248
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2249
root_results = [record.key for record in diff._read_all_roots()]
2250
self.assertEqual([key1], root_results)
2252
self.assertEqual([key2_aa, key2_ad], diff._old_queue)
2253
self.assertEqual([key1_a, key1_c, key1_d], diff._new_queue)
2254
self.assertEqual([], diff._new_item_queue)
2256
def test__read_all_roots_different_depths_16(self):
2257
c_map = self.make_two_deep_map(chk_map._search_key_16)
2258
c_map._dump_tree() # load everything
2260
key1_2 = c_map._root_node._items['2'].key()
2261
key1_4 = c_map._root_node._items['4'].key()
2262
key1_C = c_map._root_node._items['C'].key()
2263
key1_F = c_map._root_node._items['F'].key()
2265
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_16)
2268
key2_F0 = c_map2._root_node._items['F0'].key()
2269
key2_F3 = c_map2._root_node._items['F3'].key()
2270
key2_F4 = c_map2._root_node._items['F4'].key()
2271
key2_FD = c_map2._root_node._items['FD'].key()
2273
diff = self.get_difference([key2], [key1], chk_map._search_key_16)
2274
root_results = [record.key for record in diff._read_all_roots()]
2275
self.assertEqual([key2], root_results)
2276
# Only the subset of keys that may be present should be queued up.
2277
self.assertEqual([key1_F], diff._old_queue)
2278
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2279
sorted(diff._new_queue))
2280
self.assertEqual([], diff._new_item_queue)
2282
diff = self.get_difference([key1], [key2], chk_map._search_key_16)
2283
root_results = [record.key for record in diff._read_all_roots()]
2284
self.assertEqual([key1], root_results)
2286
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2287
sorted(diff._old_queue))
2288
self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
2289
sorted(diff._new_queue))
2290
self.assertEqual([], diff._new_item_queue)
2292
def test__read_all_roots_mixed_depth(self):
2293
c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2294
c_map._dump_tree() # load everything
2296
key1_aa = c_map._root_node._items['aa'].key()
2297
key1_ad = c_map._root_node._items['ad'].key()
2299
c_map2 = self.make_one_deep_one_prefix_map(chk_map._search_key_plain)
2302
key2_a = c_map2._root_node._items['a'].key()
2303
key2_b = c_map2._root_node._items['b'].key()
2305
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2306
root_results = [record.key for record in diff._read_all_roots()]
2307
self.assertEqual([key2], root_results)
2308
# 'ad' matches exactly 'a' on the other side, so it should be removed,
2309
# and neither side should have it queued for walking
2310
self.assertEqual([], diff._old_queue)
2311
self.assertEqual([key2_b], diff._new_queue)
2312
self.assertEqual([], diff._new_item_queue)
2314
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2315
root_results = [record.key for record in diff._read_all_roots()]
2316
self.assertEqual([key1], root_results)
2317
# Note: This is technically not the 'true minimal' set that we could
2318
# use The reason is that 'a' was matched exactly to 'ad' (by sha
2319
# sum). However, the code gets complicated in the case of more
2320
# than one interesting key, so for now, we live with this
2321
# Consider revising, though benchmarking showing it to be a
2322
# real-world issue should be done
2323
self.assertEqual([key2_a], diff._old_queue)
2324
# self.assertEqual([], diff._old_queue)
2325
self.assertEqual([key1_aa], diff._new_queue)
2326
self.assertEqual([], diff._new_item_queue)
2328
def test__read_all_roots_yields_extra_deep_records(self):
2329
# This is slightly controversial, as we will yield a chk page that we
2330
# might later on find out could be filtered out. (If a root node is
2331
# referenced deeper in the old set.)
2332
# However, even with stacking, we always have all chk pages that we
2333
# will need. So as long as we filter out the referenced keys, we'll
2334
# never run into problems.
2335
# This allows us to yield a root node record immediately, without any
2337
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2338
c_map._dump_tree() # load all keys
2340
key1_a = c_map._root_node._items['a'].key()
2341
c_map2 = self.get_map({
2342
('acc',): 'initial acc content',
2343
('ace',): 'initial ace content',
2344
}, maximum_size=100)
2345
self.assertEqualDiff(
2347
" ('acc',) 'initial acc content'\n"
2348
" ('ace',) 'initial ace content'\n",
2349
c_map2._dump_tree())
2351
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2352
root_results = [record.key for record in diff._read_all_roots()]
2353
self.assertEqual([key2], root_results)
2354
# However, even though we have yielded the root node to be fetched,
2355
# we should have enqued all of the chk pages to be walked, so that we
2356
# can find the keys if they are present
2357
self.assertEqual([key1_a], diff._old_queue)
2358
self.assertEqual([(('acc',), 'initial acc content'),
2359
(('ace',), 'initial ace content'),
2360
], diff._new_item_queue)
2362
def test__read_all_roots_multiple_targets(self):
2363
c_map = self.make_root_only_map()
2365
c_map = self.make_one_deep_map()
2368
key2_c = c_map._root_node._items['c'].key()
2369
key2_d = c_map._root_node._items['d'].key()
2370
c_map.map(('ccc',), 'new ccc value')
2371
key3 = c_map._save()
2372
key3_c = c_map._root_node._items['c'].key()
2373
diff = self.get_difference([key2, key3], [key1],
2374
chk_map._search_key_plain)
2375
root_results = [record.key for record in diff._read_all_roots()]
2376
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2377
self.assertEqual([], diff._old_queue)
2378
# the key 'd' is interesting from key2 and key3, but should only be
2379
# entered into the queue 1 time
2380
self.assertEqual(sorted([key2_c, key3_c, key2_d]),
2381
sorted(diff._new_queue))
2382
self.assertEqual([], diff._new_item_queue)
2384
def test__read_all_roots_no_old(self):
2385
# This is the 'initial branch' case. With nothing in the old
2386
# set, we can just queue up all root nodes into interesting queue, and
2387
# then have them fast-path flushed via _flush_new_queue
2388
c_map = self.make_two_deep_map()
2390
diff = self.get_difference([key1], [], chk_map._search_key_plain)
2391
root_results = [record.key for record in diff._read_all_roots()]
2392
self.assertEqual([], root_results)
2393
self.assertEqual([], diff._old_queue)
2394
self.assertEqual([key1], diff._new_queue)
2395
self.assertEqual([], diff._new_item_queue)
2397
c_map2 = self.make_one_deep_map()
2399
diff = self.get_difference([key1, key2], [], chk_map._search_key_plain)
2400
root_results = [record.key for record in diff._read_all_roots()]
2401
self.assertEqual([], root_results)
2402
self.assertEqual([], diff._old_queue)
2403
self.assertEqual(sorted([key1, key2]), sorted(diff._new_queue))
2404
self.assertEqual([], diff._new_item_queue)
2406
def test__read_all_roots_no_old_16(self):
2407
c_map = self.make_two_deep_map(chk_map._search_key_16)
2409
diff = self.get_difference([key1], [], chk_map._search_key_16)
2410
root_results = [record.key for record in diff._read_all_roots()]
2411
self.assertEqual([], root_results)
2412
self.assertEqual([], diff._old_queue)
2413
self.assertEqual([key1], diff._new_queue)
2414
self.assertEqual([], diff._new_item_queue)
2416
c_map2 = self.make_one_deep_map(chk_map._search_key_16)
2418
diff = self.get_difference([key1, key2], [],
2419
chk_map._search_key_16)
2420
root_results = [record.key for record in diff._read_all_roots()]
2421
self.assertEqual([], root_results)
2422
self.assertEqual([], diff._old_queue)
2423
self.assertEqual(sorted([key1, key2]),
2424
sorted(diff._new_queue))
2425
self.assertEqual([], diff._new_item_queue)
2427
def test__read_all_roots_multiple_old(self):
2428
c_map = self.make_two_deep_map()
2430
c_map._dump_tree() # load everything
2431
key1_a = c_map._root_node._items['a'].key()
2432
c_map.map(('ccc',), 'new ccc value')
2433
key2 = c_map._save()
2434
key2_a = c_map._root_node._items['a'].key()
2435
c_map.map(('add',), 'new add value')
2436
key3 = c_map._save()
2437
key3_a = c_map._root_node._items['a'].key()
2438
diff = self.get_difference([key3], [key1, key2],
2439
chk_map._search_key_plain)
2440
root_results = [record.key for record in diff._read_all_roots()]
2441
self.assertEqual([key3], root_results)
2442
# the 'a' keys should not be queued up 2 times, since they are
2444
self.assertEqual([key1_a], diff._old_queue)
2445
self.assertEqual([key3_a], diff._new_queue)
2446
self.assertEqual([], diff._new_item_queue)
2448
def test__process_next_old_batched_no_dupes(self):
2449
c_map = self.make_two_deep_map()
2451
c_map._dump_tree() # load everything
2452
key1_a = c_map._root_node._items['a'].key()
2453
key1_aa = c_map._root_node._items['a']._items['aa'].key()
2454
key1_ab = c_map._root_node._items['a']._items['ab'].key()
2455
key1_ac = c_map._root_node._items['a']._items['ac'].key()
2456
key1_ad = c_map._root_node._items['a']._items['ad'].key()
2457
c_map.map(('aaa',), 'new aaa value')
2458
key2 = c_map._save()
2459
key2_a = c_map._root_node._items['a'].key()
2460
key2_aa = c_map._root_node._items['a']._items['aa'].key()
2461
c_map.map(('acc',), 'new acc content')
2462
key3 = c_map._save()
2463
key3_a = c_map._root_node._items['a'].key()
2464
key3_ac = c_map._root_node._items['a']._items['ac'].key()
2465
diff = self.get_difference([key3], [key1, key2],
2466
chk_map._search_key_plain)
2467
root_results = [record.key for record in diff._read_all_roots()]
2468
self.assertEqual([key3], root_results)
2469
self.assertEqual(sorted([key1_a, key2_a]),
2470
sorted(diff._old_queue))
2471
self.assertEqual([key3_a], diff._new_queue)
2472
self.assertEqual([], diff._new_item_queue)
2473
diff._process_next_old()
2474
# All of the old records should be brought in and queued up,
2475
# but we should not have any duplicates
2476
self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
2477
sorted(diff._old_queue))
2480
class TestIterInterestingNodes(TestCaseWithExampleMaps):
2482
def get_map_key(self, a_dict, maximum_size=10):
2483
c_map = self.get_map(a_dict, maximum_size=maximum_size)
2486
def assertIterInteresting(self, records, items, interesting_keys,
2488
"""Check the result of iter_interesting_nodes.
2490
Note that we no longer care how many steps are taken, etc, just that
2491
the right contents are returned.
2493
:param records: A list of record keys that should be yielded
2494
:param items: A list of items (key,value) that should be yielded.
2496
store = self.get_chk_bytes()
2497
store._search_key_func = chk_map._search_key_plain
2498
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
2502
for record, new_items in iter_nodes:
2503
if record is not None:
2504
record_keys.append(record.key)
2506
all_items.extend(new_items)
2507
self.assertEqual(sorted(records), sorted(record_keys))
2508
self.assertEqual(sorted(items), sorted(all_items))
2510
def test_empty_to_one_keys(self):
2511
target = self.get_map_key({('a',): 'content'})
2512
self.assertIterInteresting([target],
2513
[(('a',), 'content')],
2516
def test_none_to_one_key(self):
2517
basis = self.get_map_key({})
2518
target = self.get_map_key({('a',): 'content'})
2519
self.assertIterInteresting([target],
2520
[(('a',), 'content')],
2523
def test_one_to_none_key(self):
2524
basis = self.get_map_key({('a',): 'content'})
2525
target = self.get_map_key({})
2526
self.assertIterInteresting([target],
2530
def test_common_pages(self):
2531
basis = self.get_map_key({('a',): 'content',
2535
target = self.get_map_key({('a',): 'content',
2536
('b',): 'other content',
2539
target_map = CHKMap(self.get_chk_bytes(), target)
2540
self.assertEqualDiff(
2543
" ('a',) 'content'\n"
2545
" ('b',) 'other content'\n"
2547
" ('c',) 'content'\n",
2548
target_map._dump_tree())
2549
b_key = target_map._root_node._items['b'].key()
2550
# This should return the root node, and the node for the 'b' key
2551
self.assertIterInteresting([target, b_key],
2552
[(('b',), 'other content')],
2555
def test_common_sub_page(self):
2556
basis = self.get_map_key({('aaa',): 'common',
2559
target = self.get_map_key({('aaa',): 'common',
2563
target_map = CHKMap(self.get_chk_bytes(), target)
2564
self.assertEqualDiff(
2566
" 'a' InternalNode\n"
2568
" ('aaa',) 'common'\n"
2572
" ('c',) 'common'\n",
2573
target_map._dump_tree())
2574
# The key for the internal aa node
2575
a_key = target_map._root_node._items['a'].key()
2576
# The key for the leaf aab node
2577
# aaa_key = target_map._root_node._items['a']._items['aaa'].key()
2578
aab_key = target_map._root_node._items['a']._items['aab'].key()
2579
self.assertIterInteresting([target, a_key, aab_key],
2580
[(('aab',), 'new')],
2583
def test_common_leaf(self):
2584
basis = self.get_map_key({})
2585
target1 = self.get_map_key({('aaa',): 'common'})
2586
target2 = self.get_map_key({('aaa',): 'common',
2589
target3 = self.get_map_key({('aaa',): 'common',
2593
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
2594
# Once as a root node, once as a second layer, and once as a third
2595
# layer. It should only be returned one time regardless
2596
target1_map = CHKMap(self.get_chk_bytes(), target1)
2597
self.assertEqualDiff(
2599
" ('aaa',) 'common'\n",
2600
target1_map._dump_tree())
2601
target2_map = CHKMap(self.get_chk_bytes(), target2)
2602
self.assertEqualDiff(
2605
" ('aaa',) 'common'\n"
2607
" ('bbb',) 'new'\n",
2608
target2_map._dump_tree())
2609
target3_map = CHKMap(self.get_chk_bytes(), target3)
2610
self.assertEqualDiff(
2612
" 'a' InternalNode\n"
2614
" ('aaa',) 'common'\n"
2616
" ('aac',) 'other'\n"
2618
" ('bbb',) 'new'\n",
2619
target3_map._dump_tree())
2620
aaa_key = target1_map._root_node.key()
2621
b_key = target2_map._root_node._items['b'].key()
2622
a_key = target3_map._root_node._items['a'].key()
2623
aac_key = target3_map._root_node._items['a']._items['aac'].key()
2624
self.assertIterInteresting(
2625
[target1, target2, target3, a_key, aac_key, b_key],
2626
[(('aaa',), 'common'), (('bbb',), 'new'), (('aac',), 'other')],
2627
[target1, target2, target3], [basis])
2629
self.assertIterInteresting(
2630
[target2, target3, a_key, aac_key, b_key],
2631
[(('bbb',), 'new'), (('aac',), 'other')],
2632
[target2, target3], [target1])
2634
# Technically, target1 could be filtered out, but since it is a root
2635
# node, we yield it immediately, rather than waiting to find out much
2637
self.assertIterInteresting(
2640
[target1], [target3])
2642
def test_multiple_maps(self):
2643
basis1 = self.get_map_key({('aaa',): 'common',
2646
basis2 = self.get_map_key({('bbb',): 'common',
2649
target1 = self.get_map_key({('aaa',): 'common',
2650
('aac',): 'target1',
2653
target2 = self.get_map_key({('aaa',): 'common',
2654
('bba',): 'target2',
2657
target1_map = CHKMap(self.get_chk_bytes(), target1)
2658
self.assertEqualDiff(
2660
" 'a' InternalNode\n"
2662
" ('aaa',) 'common'\n"
2664
" ('aac',) 'target1'\n"
2666
" ('bbb',) 'common'\n",
2667
target1_map._dump_tree())
2668
# The key for the target1 internal a node
2669
a_key = target1_map._root_node._items['a'].key()
2670
# The key for the leaf aac node
2671
aac_key = target1_map._root_node._items['a']._items['aac'].key()
2673
target2_map = CHKMap(self.get_chk_bytes(), target2)
2674
self.assertEqualDiff(
2677
" ('aaa',) 'common'\n"
2678
" 'b' InternalNode\n"
2680
" ('bba',) 'target2'\n"
2682
" ('bbb',) 'common'\n",
2683
target2_map._dump_tree())
2684
# The key for the target2 internal bb node
2685
b_key = target2_map._root_node._items['b'].key()
2686
# The key for the leaf bba node
2687
bba_key = target2_map._root_node._items['b']._items['bba'].key()
2688
self.assertIterInteresting(
2689
[target1, target2, a_key, aac_key, b_key, bba_key],
2690
[(('aac',), 'target1'), (('bba',), 'target2')],
2691
[target1, target2], [basis1, basis2])
2693
def test_multiple_maps_overlapping_common_new(self):
2694
# Test that when a node found through the interesting_keys iteration
2695
# for *some roots* and also via the old keys iteration, that
2696
# it is still scanned for old refs and items, because its
2697
# not truely new. This requires 2 levels of InternalNodes to expose,
2698
# because of the way the bootstrap in _find_children_info works.
2699
# This suggests that the code is probably amenable to/benefit from
2701
# How does this test work?
2702
# 1) We need a second level InternalNode present in a basis tree.
2703
# 2) We need a left side new tree that uses that InternalNode
2704
# 3) We need a right side new tree that does not use that InternalNode
2705
# at all but that has an unchanged *value* that was reachable inside
2707
basis = self.get_map_key({
2708
# InternalNode, unchanged in left:
2711
# Forces an internalNode at 'a'
2714
left = self.get_map_key({
2715
# All of basis unchanged
2719
# And a new top level node so the root key is different
2722
right = self.get_map_key({
2723
# A value that is unchanged from basis and thus should be filtered
2727
basis_map = CHKMap(self.get_chk_bytes(), basis)
2728
self.assertEqualDiff(
2730
" 'a' InternalNode\n"
2732
" ('aaa',) 'left'\n"
2734
" ('abb',) 'right'\n"
2736
" ('ccc',) 'common'\n",
2737
basis_map._dump_tree())
2738
# Get left expected data
2739
left_map = CHKMap(self.get_chk_bytes(), left)
2740
self.assertEqualDiff(
2742
" 'a' InternalNode\n"
2744
" ('aaa',) 'left'\n"
2746
" ('abb',) 'right'\n"
2748
" ('ccc',) 'common'\n"
2750
" ('ddd',) 'change'\n",
2751
left_map._dump_tree())
2752
# Keys from left side target
2753
l_d_key = left_map._root_node._items['d'].key()
2754
# Get right expected data
2755
right_map = CHKMap(self.get_chk_bytes(), right)
2756
self.assertEqualDiff(
2758
" ('abb',) 'right'\n",
2759
right_map._dump_tree())
2760
# Keys from the right side target - none, the root is enough.
2762
self.assertIterInteresting(
2763
[right, left, l_d_key],
2764
[(('ddd',), 'change')],
2765
[left, right], [basis])
2767
def test_multiple_maps_similar(self):
2768
# We want to have a depth=2 tree, with multiple entries in each leaf
2770
basis = self.get_map_key({
2771
('aaa',): 'unchanged',
2772
('abb',): 'will change left',
2773
('caa',): 'unchanged',
2774
('cbb',): 'will change right',
2776
left = self.get_map_key({
2777
('aaa',): 'unchanged',
2778
('abb',): 'changed left',
2779
('caa',): 'unchanged',
2780
('cbb',): 'will change right',
2782
right = self.get_map_key({
2783
('aaa',): 'unchanged',
2784
('abb',): 'will change left',
2785
('caa',): 'unchanged',
2786
('cbb',): 'changed right',
2788
basis_map = CHKMap(self.get_chk_bytes(), basis)
2789
self.assertEqualDiff(
2792
" ('aaa',) 'unchanged'\n"
2793
" ('abb',) 'will change left'\n"
2795
" ('caa',) 'unchanged'\n"
2796
" ('cbb',) 'will change right'\n",
2797
basis_map._dump_tree())
2798
# Get left expected data
2799
left_map = CHKMap(self.get_chk_bytes(), left)
2800
self.assertEqualDiff(
2803
" ('aaa',) 'unchanged'\n"
2804
" ('abb',) 'changed left'\n"
2806
" ('caa',) 'unchanged'\n"
2807
" ('cbb',) 'will change right'\n",
2808
left_map._dump_tree())
2809
# Keys from left side target
2810
l_a_key = left_map._root_node._items['a'].key()
2811
l_c_key = left_map._root_node._items['c'].key()
2812
# Get right expected data
2813
right_map = CHKMap(self.get_chk_bytes(), right)
2814
self.assertEqualDiff(
2817
" ('aaa',) 'unchanged'\n"
2818
" ('abb',) 'will change left'\n"
2820
" ('caa',) 'unchanged'\n"
2821
" ('cbb',) 'changed right'\n",
2822
right_map._dump_tree())
2823
r_a_key = right_map._root_node._items['a'].key()
2824
r_c_key = right_map._root_node._items['c'].key()
2825
self.assertIterInteresting(
2826
[right, left, l_a_key, r_c_key],
2827
[(('abb',), 'changed left'), (('cbb',), 'changed right')],
2828
[left, right], [basis])