1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
19
from itertools import izip
28
from bzrlib.chk_map import (
34
from bzrlib.static_tuple import StaticTuple
37
class TestNode(tests.TestCase):
39
def assertCommonPrefix(self, expected_common, prefix, key):
40
common = Node.common_prefix(prefix, key)
41
self.assertTrue(len(common) <= len(prefix))
42
self.assertTrue(len(common) <= len(key))
43
self.assertStartsWith(prefix, common)
44
self.assertStartsWith(key, common)
45
self.assertEquals(expected_common, common)
47
def test_common_prefix(self):
48
self.assertCommonPrefix('beg', 'beg', 'begin')
50
def test_no_common_prefix(self):
51
self.assertCommonPrefix('', 'begin', 'end')
54
self.assertCommonPrefix('begin', 'begin', 'begin')
56
def test_not_a_prefix(self):
57
self.assertCommonPrefix('b', 'begin', 'b')
60
self.assertCommonPrefix('', '', 'end')
61
self.assertCommonPrefix('', 'begin', '')
62
self.assertCommonPrefix('', '', '')
65
class TestCaseWithStore(tests.TestCaseWithMemoryTransport):
67
def get_chk_bytes(self):
68
# This creates a standalone CHK store.
69
factory = groupcompress.make_pack_factory(False, False, 1)
70
self.chk_bytes = factory(self.get_transport())
73
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
74
search_key_func=None):
76
chk_bytes = self.get_chk_bytes()
77
root_key = CHKMap.from_dict(chk_bytes, a_dict,
78
maximum_size=maximum_size, key_width=key_width,
79
search_key_func=search_key_func)
80
root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
81
maximum_size=maximum_size, key_width=key_width,
82
search_key_func=search_key_func)
83
self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
84
" match CHKMap._create_via_map")
85
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
88
def read_bytes(self, chk_bytes, key):
89
stream = chk_bytes.get_record_stream([key], 'unordered', True)
90
record = stream.next()
91
if record.storage_kind == 'absent':
92
self.fail('Store does not contain the key %s' % (key,))
93
return record.get_bytes_as("fulltext")
95
def to_dict(self, node, *args):
96
return dict(node.iteritems(*args))
99
class TestCaseWithExampleMaps(TestCaseWithStore):
101
def get_chk_bytes(self):
102
if getattr(self, '_chk_bytes', None) is None:
103
self._chk_bytes = super(TestCaseWithExampleMaps,
104
self).get_chk_bytes()
105
return self._chk_bytes
107
def get_map(self, a_dict, maximum_size=100, search_key_func=None):
108
c_map = self._get_map(a_dict, maximum_size=maximum_size,
109
chk_bytes=self.get_chk_bytes(),
110
search_key_func=search_key_func)
113
def make_root_only_map(self, search_key_func=None):
114
return self.get_map({
115
('aaa',): 'initial aaa content',
116
('abb',): 'initial abb content',
117
}, search_key_func=search_key_func)
119
def make_root_only_aaa_ddd_map(self, search_key_func=None):
120
return self.get_map({
121
('aaa',): 'initial aaa content',
122
('ddd',): 'initial ddd content',
123
}, search_key_func=search_key_func)
125
def make_one_deep_map(self, search_key_func=None):
126
# Same as root_only_map, except it forces an InternalNode at the root
127
return self.get_map({
128
('aaa',): 'initial aaa content',
129
('abb',): 'initial abb content',
130
('ccc',): 'initial ccc content',
131
('ddd',): 'initial ddd content',
132
}, search_key_func=search_key_func)
134
def make_two_deep_map(self, search_key_func=None):
135
# Carefully chosen so that it creates a 2-deep map for both
136
# _search_key_plain and for _search_key_16
137
# Also so that things line up with make_one_deep_two_prefix_map
138
return self.get_map({
139
('aaa',): 'initial aaa content',
140
('abb',): 'initial abb content',
141
('acc',): 'initial acc content',
142
('ace',): 'initial ace content',
143
('add',): 'initial add content',
144
('adh',): 'initial adh content',
145
('adl',): 'initial adl content',
146
('ccc',): 'initial ccc content',
147
('ddd',): 'initial ddd content',
148
}, search_key_func=search_key_func)
150
def make_one_deep_two_prefix_map(self, search_key_func=None):
151
"""Create a map with one internal node, but references are extra long.
153
Otherwise has similar content to make_two_deep_map.
155
return self.get_map({
156
('aaa',): 'initial aaa content',
157
('add',): 'initial add content',
158
('adh',): 'initial adh content',
159
('adl',): 'initial adl content',
160
}, search_key_func=search_key_func)
162
def make_one_deep_one_prefix_map(self, search_key_func=None):
163
"""Create a map with one internal node, but references are extra long.
165
Similar to make_one_deep_two_prefix_map, except the split is at the
166
first char, rather than the second.
168
return self.get_map({
169
('add',): 'initial add content',
170
('adh',): 'initial adh content',
171
('adl',): 'initial adl content',
172
('bbb',): 'initial bbb content',
173
}, search_key_func=search_key_func)
176
class TestTestCaseWithExampleMaps(TestCaseWithExampleMaps):
177
"""Actual tests for the provided examples."""
179
def test_root_only_map_plain(self):
180
c_map = self.make_root_only_map()
181
self.assertEqualDiff(
183
" ('aaa',) 'initial aaa content'\n"
184
" ('abb',) 'initial abb content'\n",
187
def test_root_only_map_16(self):
188
c_map = self.make_root_only_map(search_key_func=chk_map._search_key_16)
189
self.assertEqualDiff(
191
" ('aaa',) 'initial aaa content'\n"
192
" ('abb',) 'initial abb content'\n",
195
def test_one_deep_map_plain(self):
196
c_map = self.make_one_deep_map()
197
self.assertEqualDiff(
200
" ('aaa',) 'initial aaa content'\n"
201
" ('abb',) 'initial abb content'\n"
203
" ('ccc',) 'initial ccc content'\n"
205
" ('ddd',) 'initial ddd content'\n",
208
def test_one_deep_map_16(self):
209
c_map = self.make_one_deep_map(search_key_func=chk_map._search_key_16)
210
self.assertEqualDiff(
213
" ('ccc',) 'initial ccc content'\n"
215
" ('abb',) 'initial abb content'\n"
217
" ('aaa',) 'initial aaa content'\n"
218
" ('ddd',) 'initial ddd content'\n",
221
def test_root_only_aaa_ddd_plain(self):
222
c_map = self.make_root_only_aaa_ddd_map()
223
self.assertEqualDiff(
225
" ('aaa',) 'initial aaa content'\n"
226
" ('ddd',) 'initial ddd content'\n",
229
def test_one_deep_map_16(self):
230
c_map = self.make_root_only_aaa_ddd_map(
231
search_key_func=chk_map._search_key_16)
232
# We use 'aaa' and 'ddd' because they happen to map to 'F' when using
234
self.assertEqualDiff(
236
" ('aaa',) 'initial aaa content'\n"
237
" ('ddd',) 'initial ddd content'\n",
240
def test_two_deep_map_plain(self):
241
c_map = self.make_two_deep_map()
242
self.assertEqualDiff(
244
" 'a' InternalNode\n"
246
" ('aaa',) 'initial aaa content'\n"
248
" ('abb',) 'initial abb content'\n"
250
" ('acc',) 'initial acc content'\n"
251
" ('ace',) 'initial ace content'\n"
253
" ('add',) 'initial add content'\n"
254
" ('adh',) 'initial adh content'\n"
255
" ('adl',) 'initial adl content'\n"
257
" ('ccc',) 'initial ccc content'\n"
259
" ('ddd',) 'initial ddd content'\n",
262
def test_two_deep_map_16(self):
263
c_map = self.make_two_deep_map(search_key_func=chk_map._search_key_16)
264
self.assertEqualDiff(
267
" ('acc',) 'initial acc content'\n"
268
" ('ccc',) 'initial ccc content'\n"
270
" ('abb',) 'initial abb content'\n"
272
" ('ace',) 'initial ace content'\n"
273
" 'F' InternalNode\n"
275
" ('aaa',) 'initial aaa content'\n"
277
" ('adl',) 'initial adl content'\n"
279
" ('adh',) 'initial adh content'\n"
281
" ('ddd',) 'initial ddd content'\n"
283
" ('add',) 'initial add content'\n",
286
def test_one_deep_two_prefix_map_plain(self):
287
c_map = self.make_one_deep_two_prefix_map()
288
self.assertEqualDiff(
291
" ('aaa',) 'initial aaa content'\n"
293
" ('add',) 'initial add content'\n"
294
" ('adh',) 'initial adh content'\n"
295
" ('adl',) 'initial adl content'\n",
298
def test_one_deep_two_prefix_map_16(self):
299
c_map = self.make_one_deep_two_prefix_map(
300
search_key_func=chk_map._search_key_16)
301
self.assertEqualDiff(
304
" ('aaa',) 'initial aaa content'\n"
306
" ('adl',) 'initial adl content'\n"
308
" ('adh',) 'initial adh content'\n"
310
" ('add',) 'initial add content'\n",
313
def test_one_deep_one_prefix_map_plain(self):
314
c_map = self.make_one_deep_one_prefix_map()
315
self.assertEqualDiff(
318
" ('add',) 'initial add content'\n"
319
" ('adh',) 'initial adh content'\n"
320
" ('adl',) 'initial adl content'\n"
322
" ('bbb',) 'initial bbb content'\n",
325
def test_one_deep_one_prefix_map_16(self):
326
c_map = self.make_one_deep_one_prefix_map(
327
search_key_func=chk_map._search_key_16)
328
self.assertEqualDiff(
331
" ('bbb',) 'initial bbb content'\n"
333
" ('add',) 'initial add content'\n"
334
" ('adh',) 'initial adh content'\n"
335
" ('adl',) 'initial adl content'\n",
339
class TestMap(TestCaseWithStore):
341
def assertHasABMap(self, chk_bytes):
342
ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
343
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
344
self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
345
root_key = ('sha1:' + ab_sha1,)
346
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
349
def assertHasEmptyMap(self, chk_bytes):
350
empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n'
351
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
352
self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
353
root_key = ('sha1:' + empty_sha1,)
354
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
357
def assertMapLayoutEqual(self, map_one, map_two):
358
"""Assert that the internal structure is identical between the maps."""
359
map_one._ensure_root()
360
node_one_stack = [map_one._root_node]
361
map_two._ensure_root()
362
node_two_stack = [map_two._root_node]
363
while node_one_stack:
364
node_one = node_one_stack.pop()
365
node_two = node_two_stack.pop()
366
if node_one.__class__ != node_two.__class__:
367
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
368
map_two._dump_tree(include_keys=True))
369
self.assertEqual(node_one._search_prefix,
370
node_two._search_prefix)
371
if isinstance(node_one, InternalNode):
372
# Internal nodes must have identical references
373
self.assertEqual(sorted(node_one._items.keys()),
374
sorted(node_two._items.keys()))
375
node_one_stack.extend([n for n, _ in
376
node_one._iter_nodes(map_one._store)])
377
node_two_stack.extend([n for n, _ in
378
node_two._iter_nodes(map_two._store)])
380
# Leaf nodes must have identical contents
381
self.assertEqual(node_one._items, node_two._items)
382
self.assertEquals([], node_two_stack)
384
def assertCanonicalForm(self, chkmap):
385
"""Assert that the chkmap is in 'canonical' form.
387
We do this by adding all of the key value pairs from scratch, both in
388
forward order and reverse order, and assert that the final tree layout
391
items = list(chkmap.iteritems())
392
map_forward = chk_map.CHKMap(None, None)
393
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
394
for key, value in items:
395
map_forward.map(key, value)
396
self.assertMapLayoutEqual(map_forward, chkmap)
397
map_reverse = chk_map.CHKMap(None, None)
398
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
399
for key, value in reversed(items):
400
map_reverse.map(key, value)
401
self.assertMapLayoutEqual(map_reverse, chkmap)
403
def test_assert_map_layout_equal(self):
404
store = self.get_chk_bytes()
405
map_one = CHKMap(store, None)
406
map_one._root_node.set_maximum_size(20)
407
map_two = CHKMap(store, None)
408
map_two._root_node.set_maximum_size(20)
409
self.assertMapLayoutEqual(map_one, map_two)
410
map_one.map('aaa', 'value')
411
self.assertRaises(AssertionError,
412
self.assertMapLayoutEqual, map_one, map_two)
413
map_two.map('aaa', 'value')
414
self.assertMapLayoutEqual(map_one, map_two)
415
# Split the tree, so we ensure that internal nodes and leaf nodes are
417
map_one.map('aab', 'value')
418
self.assertIsInstance(map_one._root_node, InternalNode)
419
self.assertRaises(AssertionError,
420
self.assertMapLayoutEqual, map_one, map_two)
421
map_two.map('aab', 'value')
422
self.assertMapLayoutEqual(map_one, map_two)
423
map_one.map('aac', 'value')
424
self.assertRaises(AssertionError,
425
self.assertMapLayoutEqual, map_one, map_two)
426
self.assertCanonicalForm(map_one)
428
def test_from_dict_empty(self):
429
chk_bytes = self.get_chk_bytes()
430
root_key = CHKMap.from_dict(chk_bytes, {})
431
# Check the data was saved and inserted correctly.
432
expected_root_key = self.assertHasEmptyMap(chk_bytes)
433
self.assertEqual(expected_root_key, root_key)
435
def test_from_dict_ab(self):
436
chk_bytes = self.get_chk_bytes()
437
root_key = CHKMap.from_dict(chk_bytes, {"a": "b"})
438
# Check the data was saved and inserted correctly.
439
expected_root_key = self.assertHasABMap(chk_bytes)
440
self.assertEqual(expected_root_key, root_key)
442
def test_apply_empty_ab(self):
443
# applying a delta (None, "a", "b") to an empty chkmap generates the
444
# same map as from_dict_ab.
445
chk_bytes = self.get_chk_bytes()
446
root_key = CHKMap.from_dict(chk_bytes, {})
447
chkmap = CHKMap(chk_bytes, root_key)
448
new_root = chkmap.apply_delta([(None, "a", "b")])
449
# Check the data was saved and inserted correctly.
450
expected_root_key = self.assertHasABMap(chk_bytes)
451
self.assertEqual(expected_root_key, new_root)
452
# The update should have left us with an in memory root node, with an
454
self.assertEqual(new_root, chkmap._root_node._key)
456
def test_apply_ab_empty(self):
457
# applying a delta ("a", None, None) to a map with 'a' in it generates
459
chk_bytes = self.get_chk_bytes()
460
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
461
chkmap = CHKMap(chk_bytes, root_key)
462
new_root = chkmap.apply_delta([(("a",), None, None)])
463
# Check the data was saved and inserted correctly.
464
expected_root_key = self.assertHasEmptyMap(chk_bytes)
465
self.assertEqual(expected_root_key, new_root)
466
# The update should have left us with an in memory root node, with an
468
self.assertEqual(new_root, chkmap._root_node._key)
470
def test_apply_delete_to_internal_node(self):
471
# applying a delta should be convert an internal root node to a leaf
472
# node if the delta shrinks the map enough.
473
store = self.get_chk_bytes()
474
chkmap = CHKMap(store, None)
475
# Add three items: 2 small enough to fit in one node, and one huge to
476
# force multiple nodes.
477
chkmap._root_node.set_maximum_size(100)
478
chkmap.map(('small',), 'value')
479
chkmap.map(('little',), 'value')
480
chkmap.map(('very-big',), 'x' * 100)
481
# (Check that we have constructed the scenario we want to test)
482
self.assertIsInstance(chkmap._root_node, InternalNode)
483
# Delete the huge item so that the map fits in one node again.
484
delta = [(('very-big',), None, None)]
485
chkmap.apply_delta(delta)
486
self.assertCanonicalForm(chkmap)
487
self.assertIsInstance(chkmap._root_node, LeafNode)
489
def test_apply_new_keys_must_be_new(self):
490
# applying a delta (None, "a", "b") to a map with 'a' in it generates
492
chk_bytes = self.get_chk_bytes()
493
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
494
chkmap = CHKMap(chk_bytes, root_key)
495
self.assertRaises(errors.InconsistentDelta, chkmap.apply_delta,
496
[(None, ("a",), "b")])
497
# As an error occured, the update should have left us without changing
498
# anything (the root should be unchanged).
499
self.assertEqual(root_key, chkmap._root_node._key)
501
def test_apply_delta_is_deterministic(self):
502
chk_bytes = self.get_chk_bytes()
503
chkmap1 = CHKMap(chk_bytes, None)
504
chkmap1._root_node.set_maximum_size(10)
505
chkmap1.apply_delta([(None, ('aaa',), 'common'),
506
(None, ('bba',), 'target2'),
507
(None, ('bbb',), 'common')])
508
root_key1 = chkmap1._save()
509
self.assertCanonicalForm(chkmap1)
511
chkmap2 = CHKMap(chk_bytes, None)
512
chkmap2._root_node.set_maximum_size(10)
513
chkmap2.apply_delta([(None, ('bbb',), 'common'),
514
(None, ('bba',), 'target2'),
515
(None, ('aaa',), 'common')])
516
root_key2 = chkmap2._save()
517
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
518
chkmap2._dump_tree(include_keys=True))
519
self.assertEqual(root_key1, root_key2)
520
self.assertCanonicalForm(chkmap2)
522
def test_stable_splitting(self):
523
store = self.get_chk_bytes()
524
chkmap = CHKMap(store, None)
525
# Should fit 2 keys per LeafNode
526
chkmap._root_node.set_maximum_size(35)
527
chkmap.map(('aaa',), 'v')
528
self.assertEqualDiff("'' LeafNode\n"
531
chkmap.map(('aab',), 'v')
532
self.assertEqualDiff("'' LeafNode\n"
536
self.assertCanonicalForm(chkmap)
538
# Creates a new internal node, and splits the others into leaves
539
chkmap.map(('aac',), 'v')
540
self.assertEqualDiff("'' InternalNode\n"
548
self.assertCanonicalForm(chkmap)
550
# Splits again, because it can't fit in the current structure
551
chkmap.map(('bbb',), 'v')
552
self.assertEqualDiff("'' InternalNode\n"
553
" 'a' InternalNode\n"
563
self.assertCanonicalForm(chkmap)
565
def test_map_splits_with_longer_key(self):
566
store = self.get_chk_bytes()
567
chkmap = CHKMap(store, None)
568
# Should fit 1 key per LeafNode
569
chkmap._root_node.set_maximum_size(10)
570
chkmap.map(('aaa',), 'v')
571
chkmap.map(('aaaa',), 'v')
572
self.assertCanonicalForm(chkmap)
573
self.assertIsInstance(chkmap._root_node, InternalNode)
575
def test_with_linefeed_in_key(self):
576
store = self.get_chk_bytes()
577
chkmap = CHKMap(store, None)
578
# Should fit 1 key per LeafNode
579
chkmap._root_node.set_maximum_size(10)
580
chkmap.map(('a\ra',), 'val1')
581
chkmap.map(('a\rb',), 'val2')
582
chkmap.map(('ac',), 'val3')
583
self.assertCanonicalForm(chkmap)
584
self.assertEqualDiff("'' InternalNode\n"
585
" 'a\\r' InternalNode\n"
586
" 'a\\ra' LeafNode\n"
587
" ('a\\ra',) 'val1'\n"
588
" 'a\\rb' LeafNode\n"
589
" ('a\\rb',) 'val2'\n"
593
# We should also successfully serialise and deserialise these items
594
root_key = chkmap._save()
595
chkmap = CHKMap(store, root_key)
596
self.assertEqualDiff("'' InternalNode\n"
597
" 'a\\r' InternalNode\n"
598
" 'a\\ra' LeafNode\n"
599
" ('a\\ra',) 'val1'\n"
600
" 'a\\rb' LeafNode\n"
601
" ('a\\rb',) 'val2'\n"
606
def test_deep_splitting(self):
607
store = self.get_chk_bytes()
608
chkmap = CHKMap(store, None)
609
# Should fit 2 keys per LeafNode
610
chkmap._root_node.set_maximum_size(40)
611
chkmap.map(('aaaaaaaa',), 'v')
612
chkmap.map(('aaaaabaa',), 'v')
613
self.assertEqualDiff("'' LeafNode\n"
614
" ('aaaaaaaa',) 'v'\n"
615
" ('aaaaabaa',) 'v'\n",
617
chkmap.map(('aaabaaaa',), 'v')
618
chkmap.map(('aaababaa',), 'v')
619
self.assertEqualDiff("'' InternalNode\n"
621
" ('aaaaaaaa',) 'v'\n"
622
" ('aaaaabaa',) 'v'\n"
624
" ('aaabaaaa',) 'v'\n"
625
" ('aaababaa',) 'v'\n",
627
chkmap.map(('aaabacaa',), 'v')
628
chkmap.map(('aaabadaa',), 'v')
629
self.assertEqualDiff("'' InternalNode\n"
631
" ('aaaaaaaa',) 'v'\n"
632
" ('aaaaabaa',) 'v'\n"
633
" 'aaab' InternalNode\n"
634
" 'aaabaa' LeafNode\n"
635
" ('aaabaaaa',) 'v'\n"
636
" 'aaabab' LeafNode\n"
637
" ('aaababaa',) 'v'\n"
638
" 'aaabac' LeafNode\n"
639
" ('aaabacaa',) 'v'\n"
640
" 'aaabad' LeafNode\n"
641
" ('aaabadaa',) 'v'\n",
643
chkmap.map(('aaababba',), 'val')
644
chkmap.map(('aaababca',), 'val')
645
self.assertEqualDiff("'' InternalNode\n"
647
" ('aaaaaaaa',) 'v'\n"
648
" ('aaaaabaa',) 'v'\n"
649
" 'aaab' InternalNode\n"
650
" 'aaabaa' LeafNode\n"
651
" ('aaabaaaa',) 'v'\n"
652
" 'aaabab' InternalNode\n"
653
" 'aaababa' LeafNode\n"
654
" ('aaababaa',) 'v'\n"
655
" 'aaababb' LeafNode\n"
656
" ('aaababba',) 'val'\n"
657
" 'aaababc' LeafNode\n"
658
" ('aaababca',) 'val'\n"
659
" 'aaabac' LeafNode\n"
660
" ('aaabacaa',) 'v'\n"
661
" 'aaabad' LeafNode\n"
662
" ('aaabadaa',) 'v'\n",
664
# Now we add a node that should fit around an existing InternalNode,
665
# but has a slightly different key prefix, which causes a new
667
chkmap.map(('aaabDaaa',), 'v')
668
self.assertEqualDiff("'' InternalNode\n"
670
" ('aaaaaaaa',) 'v'\n"
671
" ('aaaaabaa',) 'v'\n"
672
" 'aaab' InternalNode\n"
673
" 'aaabD' LeafNode\n"
674
" ('aaabDaaa',) 'v'\n"
675
" 'aaaba' InternalNode\n"
676
" 'aaabaa' LeafNode\n"
677
" ('aaabaaaa',) 'v'\n"
678
" 'aaabab' InternalNode\n"
679
" 'aaababa' LeafNode\n"
680
" ('aaababaa',) 'v'\n"
681
" 'aaababb' LeafNode\n"
682
" ('aaababba',) 'val'\n"
683
" 'aaababc' LeafNode\n"
684
" ('aaababca',) 'val'\n"
685
" 'aaabac' LeafNode\n"
686
" ('aaabacaa',) 'v'\n"
687
" 'aaabad' LeafNode\n"
688
" ('aaabadaa',) 'v'\n",
691
def test_map_collapses_if_size_changes(self):
692
store = self.get_chk_bytes()
693
chkmap = CHKMap(store, None)
694
# Should fit 2 keys per LeafNode
695
chkmap._root_node.set_maximum_size(35)
696
chkmap.map(('aaa',), 'v')
697
chkmap.map(('aab',), 'very long value that splits')
698
self.assertEqualDiff("'' InternalNode\n"
702
" ('aab',) 'very long value that splits'\n",
704
self.assertCanonicalForm(chkmap)
705
# Now changing the value to something small should cause a rebuild
706
chkmap.map(('aab',), 'v')
707
self.assertEqualDiff("'' LeafNode\n"
711
self.assertCanonicalForm(chkmap)
713
def test_map_double_deep_collapses(self):
714
store = self.get_chk_bytes()
715
chkmap = CHKMap(store, None)
716
# Should fit 3 small keys per LeafNode
717
chkmap._root_node.set_maximum_size(40)
718
chkmap.map(('aaa',), 'v')
719
chkmap.map(('aab',), 'very long value that splits')
720
chkmap.map(('abc',), 'v')
721
self.assertEqualDiff("'' InternalNode\n"
722
" 'aa' InternalNode\n"
726
" ('aab',) 'very long value that splits'\n"
730
chkmap.map(('aab',), 'v')
731
self.assertCanonicalForm(chkmap)
732
self.assertEqualDiff("'' LeafNode\n"
738
def test_stable_unmap(self):
739
store = self.get_chk_bytes()
740
chkmap = CHKMap(store, None)
741
# Should fit 2 keys per LeafNode
742
chkmap._root_node.set_maximum_size(35)
743
chkmap.map(('aaa',), 'v')
744
chkmap.map(('aab',), 'v')
745
self.assertEqualDiff("'' LeafNode\n"
749
# Creates a new internal node, and splits the others into leaves
750
chkmap.map(('aac',), 'v')
751
self.assertEqualDiff("'' InternalNode\n"
759
self.assertCanonicalForm(chkmap)
760
# Now lets unmap one of the keys, and assert that we collapse the
762
chkmap.unmap(('aac',))
763
self.assertEqualDiff("'' LeafNode\n"
767
self.assertCanonicalForm(chkmap)
769
def test_unmap_double_deep(self):
770
store = self.get_chk_bytes()
771
chkmap = CHKMap(store, None)
772
# Should fit 3 keys per LeafNode
773
chkmap._root_node.set_maximum_size(40)
774
chkmap.map(('aaa',), 'v')
775
chkmap.map(('aaab',), 'v')
776
chkmap.map(('aab',), 'very long value')
777
chkmap.map(('abc',), 'v')
778
self.assertEqualDiff("'' InternalNode\n"
779
" 'aa' InternalNode\n"
784
" ('aab',) 'very long value'\n"
788
# Removing the 'aab' key should cause everything to collapse back to a
790
chkmap.unmap(('aab',))
791
self.assertEqualDiff("'' LeafNode\n"
797
def test_unmap_double_deep_non_empty_leaf(self):
798
store = self.get_chk_bytes()
799
chkmap = CHKMap(store, None)
800
# Should fit 3 keys per LeafNode
801
chkmap._root_node.set_maximum_size(40)
802
chkmap.map(('aaa',), 'v')
803
chkmap.map(('aab',), 'long value')
804
chkmap.map(('aabb',), 'v')
805
chkmap.map(('abc',), 'v')
806
self.assertEqualDiff("'' InternalNode\n"
807
" 'aa' InternalNode\n"
811
" ('aab',) 'long value'\n"
816
# Removing the 'aab' key should cause everything to collapse back to a
818
chkmap.unmap(('aab',))
819
self.assertEqualDiff("'' LeafNode\n"
825
def test_unmap_with_known_internal_node_doesnt_page(self):
826
store = self.get_chk_bytes()
827
chkmap = CHKMap(store, None)
828
# Should fit 3 keys per LeafNode
829
chkmap._root_node.set_maximum_size(30)
830
chkmap.map(('aaa',), 'v')
831
chkmap.map(('aab',), 'v')
832
chkmap.map(('aac',), 'v')
833
chkmap.map(('abc',), 'v')
834
chkmap.map(('acd',), 'v')
835
self.assertEqualDiff("'' InternalNode\n"
836
" 'aa' InternalNode\n"
848
# Save everything to the map, and start over
849
chkmap = CHKMap(store, chkmap._save())
850
# Mapping an 'aa' key loads the internal node, but should not map the
851
# 'ab' and 'ac' nodes
852
chkmap.map(('aad',), 'v')
853
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
854
self.assertIsInstance(chkmap._root_node._items['ab'], StaticTuple)
855
self.assertIsInstance(chkmap._root_node._items['ac'], StaticTuple)
856
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
858
chkmap.unmap(('acd',))
859
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
860
self.assertIsInstance(chkmap._root_node._items['ab'], StaticTuple)
862
def test_unmap_without_fitting_doesnt_page_in(self):
863
store = self.get_chk_bytes()
864
chkmap = CHKMap(store, None)
865
# Should fit 2 keys per LeafNode
866
chkmap._root_node.set_maximum_size(20)
867
chkmap.map(('aaa',), 'v')
868
chkmap.map(('aab',), 'v')
869
self.assertEqualDiff("'' InternalNode\n"
875
# Save everything to the map, and start over
876
chkmap = CHKMap(store, chkmap._save())
877
chkmap.map(('aac',), 'v')
878
chkmap.map(('aad',), 'v')
879
chkmap.map(('aae',), 'v')
880
chkmap.map(('aaf',), 'v')
881
# At this point, the previous nodes should not be paged in, but the
882
# newly added nodes would be
883
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
884
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
885
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
886
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
887
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
888
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
889
# Now unmapping one of the new nodes will use only the already-paged-in
890
# nodes to determine that we don't need to do more.
891
chkmap.unmap(('aaf',))
892
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
893
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
894
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
895
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
896
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
898
def test_unmap_pages_in_if_necessary(self):
899
store = self.get_chk_bytes()
900
chkmap = CHKMap(store, None)
901
# Should fit 2 keys per LeafNode
902
chkmap._root_node.set_maximum_size(30)
903
chkmap.map(('aaa',), 'val')
904
chkmap.map(('aab',), 'val')
905
chkmap.map(('aac',), 'val')
906
self.assertEqualDiff("'' InternalNode\n"
914
root_key = chkmap._save()
915
# Save everything to the map, and start over
916
chkmap = CHKMap(store, root_key)
917
chkmap.map(('aad',), 'v')
918
# At this point, the previous nodes should not be paged in, but the
919
# newly added node would be
920
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
921
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
922
self.assertIsInstance(chkmap._root_node._items['aac'], StaticTuple)
923
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
924
# Unmapping the new node will check the existing nodes to see if they
926
# Clear the page cache so we ensure we have to read all the children
927
chk_map.clear_cache()
928
chkmap.unmap(('aad',))
929
self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
930
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
931
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
933
def test_unmap_pages_in_from_page_cache(self):
934
store = self.get_chk_bytes()
935
chkmap = CHKMap(store, None)
936
# Should fit 2 keys per LeafNode
937
chkmap._root_node.set_maximum_size(30)
938
chkmap.map(('aaa',), 'val')
939
chkmap.map(('aab',), 'val')
940
chkmap.map(('aac',), 'val')
941
root_key = chkmap._save()
942
# Save everything to the map, and start over
943
chkmap = CHKMap(store, root_key)
944
chkmap.map(('aad',), 'val')
945
self.assertEqualDiff("'' InternalNode\n"
955
# Save everything to the map, start over after _dump_tree
956
chkmap = CHKMap(store, root_key)
957
chkmap.map(('aad',), 'v')
958
# At this point, the previous nodes should not be paged in, but the
959
# newly added node would be
960
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
961
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
962
self.assertIsInstance(chkmap._root_node._items['aac'], StaticTuple)
963
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
964
# Now clear the page cache, and only include 2 of the children in the
966
aab_key = chkmap._root_node._items['aab']
967
aab_bytes = chk_map._get_cache()[aab_key]
968
aac_key = chkmap._root_node._items['aac']
969
aac_bytes = chk_map._get_cache()[aac_key]
970
chk_map.clear_cache()
971
chk_map._get_cache()[aab_key] = aab_bytes
972
chk_map._get_cache()[aac_key] = aac_bytes
974
# Unmapping the new node will check the nodes from the page cache
975
# first, and not have to read in 'aaa'
976
chkmap.unmap(('aad',))
977
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
978
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
979
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
981
def test_unmap_uses_existing_items(self):
982
store = self.get_chk_bytes()
983
chkmap = CHKMap(store, None)
984
# Should fit 2 keys per LeafNode
985
chkmap._root_node.set_maximum_size(30)
986
chkmap.map(('aaa',), 'val')
987
chkmap.map(('aab',), 'val')
988
chkmap.map(('aac',), 'val')
989
root_key = chkmap._save()
990
# Save everything to the map, and start over
991
chkmap = CHKMap(store, root_key)
992
chkmap.map(('aad',), 'val')
993
chkmap.map(('aae',), 'val')
994
chkmap.map(('aaf',), 'val')
995
# At this point, the previous nodes should not be paged in, but the
996
# newly added node would be
997
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
998
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
999
self.assertIsInstance(chkmap._root_node._items['aac'], StaticTuple)
1000
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
1001
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
1002
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
1004
# Unmapping a new node will see the other nodes that are already in
1005
# memory, and not need to page in anything else
1006
chkmap.unmap(('aad',))
1007
self.assertIsInstance(chkmap._root_node._items['aaa'], StaticTuple)
1008
self.assertIsInstance(chkmap._root_node._items['aab'], StaticTuple)
1009
self.assertIsInstance(chkmap._root_node._items['aac'], StaticTuple)
1010
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
1011
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
1013
def test_iter_changes_empty_ab(self):
1014
# Asking for changes between an empty dict to a dict with keys returns
1016
basis = self._get_map({}, maximum_size=10)
1017
target = self._get_map(
1018
{('a',): 'content here', ('b',): 'more content'},
1019
chk_bytes=basis._store, maximum_size=10)
1020
self.assertEqual([(('a',), None, 'content here'),
1021
(('b',), None, 'more content')],
1022
sorted(list(target.iter_changes(basis))))
1024
def test_iter_changes_ab_empty(self):
1025
# Asking for changes between a dict with keys to an empty dict returns
1027
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1029
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1030
self.assertEqual([(('a',), 'content here', None),
1031
(('b',), 'more content', None)],
1032
sorted(list(target.iter_changes(basis))))
1034
def test_iter_changes_empty_empty_is_empty(self):
1035
basis = self._get_map({}, maximum_size=10)
1036
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1037
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1039
def test_iter_changes_ab_ab_is_empty(self):
1040
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1042
target = self._get_map(
1043
{('a',): 'content here', ('b',): 'more content'},
1044
chk_bytes=basis._store, maximum_size=10)
1045
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1047
def test_iter_changes_ab_ab_nodes_not_loaded(self):
1048
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1050
target = self._get_map(
1051
{('a',): 'content here', ('b',): 'more content'},
1052
chk_bytes=basis._store, maximum_size=10)
1053
list(target.iter_changes(basis))
1054
self.assertIsInstance(target._root_node, StaticTuple)
1055
self.assertIsInstance(basis._root_node, StaticTuple)
1057
def test_iter_changes_ab_ab_changed_values_shown(self):
1058
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1060
target = self._get_map(
1061
{('a',): 'content here', ('b',): 'different content'},
1062
chk_bytes=basis._store, maximum_size=10)
1063
result = sorted(list(target.iter_changes(basis)))
1064
self.assertEqual([(('b',), 'more content', 'different content')],
1067
def test_iter_changes_mixed_node_length(self):
1068
# When one side has different node lengths than the other, common
1069
# but different keys still need to be show, and new-and-old included
1071
# aaa - common unaltered
1072
# aab - common altered
1076
# aaa to be not loaded (later test)
1077
# aab, b, at to be returned.
1078
# basis splits at byte 0,1,2, aaa is commonb is basis only
1079
basis_dict = {('aaa',): 'foo bar',
1080
('aab',): 'common altered a', ('b',): 'foo bar b'}
1081
# target splits at byte 1,2, at is target only
1082
target_dict = {('aaa',): 'foo bar',
1083
('aab',): 'common altered b', ('at',): 'foo bar t'}
1085
(('aab',), 'common altered a', 'common altered b'),
1086
(('at',), None, 'foo bar t'),
1087
(('b',), 'foo bar b', None),
1089
basis = self._get_map(basis_dict, maximum_size=10)
1090
target = self._get_map(target_dict, maximum_size=10,
1091
chk_bytes=basis._store)
1092
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1094
def test_iter_changes_common_pages_not_loaded(self):
1095
# aaa - common unaltered
1096
# aab - common altered
1100
# aaa to be not loaded
1101
# aaa not to be in result.
1102
basis_dict = {('aaa',): 'foo bar',
1103
('aab',): 'common altered a', ('b',): 'foo bar b'}
1104
# target splits at byte 1, at is target only
1105
target_dict = {('aaa',): 'foo bar',
1106
('aab',): 'common altered b', ('at',): 'foo bar t'}
1107
basis = self._get_map(basis_dict, maximum_size=10)
1108
target = self._get_map(target_dict, maximum_size=10,
1109
chk_bytes=basis._store)
1110
basis_get = basis._store.get_record_stream
1111
def get_record_stream(keys, order, fulltext):
1112
if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
1113
self.fail("'aaa' pointer was followed %r" % keys)
1114
return basis_get(keys, order, fulltext)
1115
basis._store.get_record_stream = get_record_stream
1116
result = sorted(list(target.iter_changes(basis)))
1117
for change in result:
1118
if change[0] == ('aaa',):
1119
self.fail("Found unexpected change: %s" % change)
1121
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
1122
# Within a leaf there are no hash's to exclude keys, make sure multi
1123
# value leaf nodes are handled well.
1124
basis_dict = {('aaa',): 'foo bar',
1125
('aab',): 'common altered a', ('b',): 'foo bar b'}
1126
target_dict = {('aaa',): 'foo bar',
1127
('aab',): 'common altered b', ('at',): 'foo bar t'}
1129
(('aab',), 'common altered a', 'common altered b'),
1130
(('at',), None, 'foo bar t'),
1131
(('b',), 'foo bar b', None),
1133
basis = self._get_map(basis_dict)
1134
target = self._get_map(target_dict, chk_bytes=basis._store)
1135
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1137
def test_iteritems_empty(self):
1138
chk_bytes = self.get_chk_bytes()
1139
root_key = CHKMap.from_dict(chk_bytes, {})
1140
chkmap = CHKMap(chk_bytes, root_key)
1141
self.assertEqual([], list(chkmap.iteritems()))
1143
def test_iteritems_two_items(self):
1144
chk_bytes = self.get_chk_bytes()
1145
root_key = CHKMap.from_dict(chk_bytes,
1146
{"a":"content here", "b":"more content"})
1147
chkmap = CHKMap(chk_bytes, root_key)
1148
self.assertEqual([(("a",), "content here"), (("b",), "more content")],
1149
sorted(list(chkmap.iteritems())))
1151
def test_iteritems_selected_one_of_two_items(self):
1152
chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
1153
self.assertEqual({("a",): "content here"},
1154
self.to_dict(chkmap, [("a",)]))
1156
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
1157
chkmap = self._get_map(
1158
{("a","a"):"content here", ("a", "b",):"more content",
1159
("b", ""): 'boring content'},
1160
maximum_size=10, key_width=2)
1162
{("a", "a"): "content here", ("a", "b"): 'more content'},
1163
self.to_dict(chkmap, [("a",)]))
1165
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
1166
search_key_func = chk_map.search_key_registry.get('hash-16-way')
1167
self.assertEqual('E8B7BE43\x00E8B7BE43',
1168
search_key_func(StaticTuple('a', 'a')))
1169
self.assertEqual('E8B7BE43\x0071BEEFF9',
1170
search_key_func(StaticTuple('a', 'b')))
1171
self.assertEqual('71BEEFF9\x0000000000',
1172
search_key_func(StaticTuple('b', '')))
1173
chkmap = self._get_map(
1174
{("a","a"):"content here", ("a", "b",):"more content",
1175
("b", ""): 'boring content'},
1176
maximum_size=10, key_width=2, search_key_func=search_key_func)
1178
{("a", "a"): "content here", ("a", "b"): 'more content'},
1179
self.to_dict(chkmap, [("a",)]))
1181
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
1182
chkmap = self._get_map(
1183
{("a","a"):"content here", ("a", "b",):"more content",
1184
("b", ""): 'boring content'}, key_width=2)
1186
{("a", "a"): "content here", ("a", "b"): 'more content'},
1187
self.to_dict(chkmap, [("a",)]))
1189
def test___len__empty(self):
1190
chkmap = self._get_map({})
1191
self.assertEqual(0, len(chkmap))
1193
def test___len__2(self):
1194
chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"})
1195
self.assertEqual(2, len(chkmap))
1197
def test_max_size_100_bytes_new(self):
1198
# When there is a 100 byte upper node limit, a tree is formed.
1199
chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
1200
# We expect three nodes:
1201
# A root, with two children, and with two key prefixes - k1 to one, and
1202
# k2 to the other as our node splitting is only just being developed.
1203
# The maximum size should be embedded
1204
chkmap._ensure_root()
1205
self.assertEqual(100, chkmap._root_node.maximum_size)
1206
self.assertEqual(1, chkmap._root_node._key_width)
1207
# There should be two child nodes, and prefix of 2(bytes):
1208
self.assertEqual(2, len(chkmap._root_node._items))
1209
self.assertEqual("k", chkmap._root_node._compute_search_prefix())
1210
# The actual nodes pointed at will change as serialisers change; so
1211
# here we test that the key prefix is correct; then load the nodes and
1212
# check they have the right pointed at key; whether they have the
1213
# pointed at value inline or not is also unrelated to this test so we
1214
# don't check that in detail - rather we just check the aggregate
1216
nodes = sorted(chkmap._root_node._items.items())
1219
self.assertEqual('k1', ptr1[0])
1220
self.assertEqual('k2', ptr2[0])
1221
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
1222
self.assertIsInstance(node1, LeafNode)
1223
self.assertEqual(1, len(node1))
1224
self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
1225
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
1226
self.assertIsInstance(node2, LeafNode)
1227
self.assertEqual(1, len(node2))
1228
self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
1229
# Having checked we have a good structure, check that the content is
1231
self.assertEqual(2, len(chkmap))
1232
self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
1233
self.to_dict(chkmap))
1235
def test_init_root_is_LeafNode_new(self):
1236
chk_bytes = self.get_chk_bytes()
1237
chkmap = CHKMap(chk_bytes, None)
1238
self.assertIsInstance(chkmap._root_node, LeafNode)
1239
self.assertEqual({}, self.to_dict(chkmap))
1240
self.assertEqual(0, len(chkmap))
1242
def test_init_and_save_new(self):
1243
chk_bytes = self.get_chk_bytes()
1244
chkmap = CHKMap(chk_bytes, None)
1245
key = chkmap._save()
1246
leaf_node = LeafNode()
1247
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1249
def test_map_first_item_new(self):
1250
chk_bytes = self.get_chk_bytes()
1251
chkmap = CHKMap(chk_bytes, None)
1252
chkmap.map(("foo,",), "bar")
1253
self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
1254
self.assertEqual(1, len(chkmap))
1255
key = chkmap._save()
1256
leaf_node = LeafNode()
1257
leaf_node.map(chk_bytes, ("foo,",), "bar")
1258
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1260
def test_unmap_last_item_root_is_leaf_new(self):
1261
chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
1262
chkmap.unmap(("k1"*50,))
1263
chkmap.unmap(("k2"*50,))
1264
self.assertEqual(0, len(chkmap))
1265
self.assertEqual({}, self.to_dict(chkmap))
1266
key = chkmap._save()
1267
leaf_node = LeafNode()
1268
self.assertEqual([key], leaf_node.serialise(chkmap._store))
1270
def test__dump_tree(self):
1271
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2",
1272
("bbb",): "value3",},
1274
self.assertEqualDiff("'' InternalNode\n"
1275
" 'a' InternalNode\n"
1277
" ('aaa',) 'value1'\n"
1279
" ('aab',) 'value2'\n"
1281
" ('bbb',) 'value3'\n",
1282
chkmap._dump_tree())
1283
self.assertEqualDiff("'' InternalNode\n"
1284
" 'a' InternalNode\n"
1286
" ('aaa',) 'value1'\n"
1288
" ('aab',) 'value2'\n"
1290
" ('bbb',) 'value3'\n",
1291
chkmap._dump_tree())
1292
self.assertEqualDiff(
1293
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1294
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1295
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1296
" ('aaa',) 'value1'\n"
1297
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1298
" ('aab',) 'value2'\n"
1299
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1300
" ('bbb',) 'value3'\n",
1301
chkmap._dump_tree(include_keys=True))
1303
def test__dump_tree_in_progress(self):
1304
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"},
1306
chkmap.map(('bbb',), 'value3')
1307
self.assertEqualDiff("'' InternalNode\n"
1308
" 'a' InternalNode\n"
1310
" ('aaa',) 'value1'\n"
1312
" ('aab',) 'value2'\n"
1314
" ('bbb',) 'value3'\n",
1315
chkmap._dump_tree())
1316
# For things that are updated by adding 'bbb', we don't have a sha key
1317
# for them yet, so they are listed as None
1318
self.assertEqualDiff(
1319
"'' InternalNode None\n"
1320
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1321
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1322
" ('aaa',) 'value1'\n"
1323
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1324
" ('aab',) 'value2'\n"
1325
" 'b' LeafNode None\n"
1326
" ('bbb',) 'value3'\n",
1327
chkmap._dump_tree(include_keys=True))
1330
def _search_key_single(key):
1331
"""A search key function that maps all nodes to the same value"""
1334
def _test_search_key(key):
1335
return 'test:' + '\x00'.join(key)
1338
class TestMapSearchKeys(TestCaseWithStore):
1340
def test_default_chk_map_uses_flat_search_key(self):
1341
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1342
self.assertEqual('1',
1343
chkmap._search_key_func(('1',)))
1344
self.assertEqual('1\x002',
1345
chkmap._search_key_func(('1', '2')))
1346
self.assertEqual('1\x002\x003',
1347
chkmap._search_key_func(('1', '2', '3')))
1349
def test_search_key_is_passed_to_root_node(self):
1350
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1351
search_key_func=_test_search_key)
1352
self.assertIs(_test_search_key, chkmap._search_key_func)
1353
self.assertEqual('test:1\x002\x003',
1354
chkmap._search_key_func(('1', '2', '3')))
1355
self.assertEqual('test:1\x002\x003',
1356
chkmap._root_node._search_key(('1', '2', '3')))
1358
def test_search_key_passed_via__ensure_root(self):
1359
chk_bytes = self.get_chk_bytes()
1360
chkmap = chk_map.CHKMap(chk_bytes, None,
1361
search_key_func=_test_search_key)
1362
root_key = chkmap._save()
1363
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1364
search_key_func=_test_search_key)
1365
chkmap._ensure_root()
1366
self.assertEqual('test:1\x002\x003',
1367
chkmap._root_node._search_key(('1', '2', '3')))
1369
def test_search_key_with_internal_node(self):
1370
chk_bytes = self.get_chk_bytes()
1371
chkmap = chk_map.CHKMap(chk_bytes, None,
1372
search_key_func=_test_search_key)
1373
chkmap._root_node.set_maximum_size(10)
1374
chkmap.map(('1',), 'foo')
1375
chkmap.map(('2',), 'bar')
1376
chkmap.map(('3',), 'baz')
1377
self.assertEqualDiff("'' InternalNode\n"
1378
" 'test:1' LeafNode\n"
1380
" 'test:2' LeafNode\n"
1382
" 'test:3' LeafNode\n"
1384
, chkmap._dump_tree())
1385
root_key = chkmap._save()
1386
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1387
search_key_func=_test_search_key)
1388
self.assertEqualDiff("'' InternalNode\n"
1389
" 'test:1' LeafNode\n"
1391
" 'test:2' LeafNode\n"
1393
" 'test:3' LeafNode\n"
1395
, chkmap._dump_tree())
1397
def test_search_key_16(self):
1398
chk_bytes = self.get_chk_bytes()
1399
chkmap = chk_map.CHKMap(chk_bytes, None,
1400
search_key_func=chk_map._search_key_16)
1401
chkmap._root_node.set_maximum_size(10)
1402
chkmap.map(('1',), 'foo')
1403
chkmap.map(('2',), 'bar')
1404
chkmap.map(('3',), 'baz')
1405
self.assertEqualDiff("'' InternalNode\n"
1412
, chkmap._dump_tree())
1413
root_key = chkmap._save()
1414
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1415
search_key_func=chk_map._search_key_16)
1416
# We can get the values back correctly
1417
self.assertEqual([(('1',), 'foo')],
1418
list(chkmap.iteritems([('1',)])))
1419
self.assertEqualDiff("'' InternalNode\n"
1426
, chkmap._dump_tree())
1428
def test_search_key_255(self):
1429
chk_bytes = self.get_chk_bytes()
1430
chkmap = chk_map.CHKMap(chk_bytes, None,
1431
search_key_func=chk_map._search_key_255)
1432
chkmap._root_node.set_maximum_size(10)
1433
chkmap.map(('1',), 'foo')
1434
chkmap.map(('2',), 'bar')
1435
chkmap.map(('3',), 'baz')
1436
self.assertEqualDiff("'' InternalNode\n"
1437
" '\\x1a' LeafNode\n"
1441
" '\\x83' LeafNode\n"
1443
, chkmap._dump_tree())
1444
root_key = chkmap._save()
1445
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1446
search_key_func=chk_map._search_key_255)
1447
# We can get the values back correctly
1448
self.assertEqual([(('1',), 'foo')],
1449
list(chkmap.iteritems([('1',)])))
1450
self.assertEqualDiff("'' InternalNode\n"
1451
" '\\x1a' LeafNode\n"
1455
" '\\x83' LeafNode\n"
1457
, chkmap._dump_tree())
1459
def test_search_key_collisions(self):
1460
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1461
search_key_func=_search_key_single)
1462
# The node will want to expand, but it cannot, because it knows that
1463
# all the keys must map to this node
1464
chkmap._root_node.set_maximum_size(20)
1465
chkmap.map(('1',), 'foo')
1466
chkmap.map(('2',), 'bar')
1467
chkmap.map(('3',), 'baz')
1468
self.assertEqualDiff("'' LeafNode\n"
1472
, chkmap._dump_tree())
1475
class TestLeafNode(TestCaseWithStore):
1477
def test_current_size_empty(self):
1479
self.assertEqual(16, node._current_size())
1481
def test_current_size_size_changed(self):
1483
node.set_maximum_size(10)
1484
self.assertEqual(17, node._current_size())
1486
def test_current_size_width_changed(self):
1488
node._key_width = 10
1489
self.assertEqual(17, node._current_size())
1491
def test_current_size_items(self):
1493
base_size = node._current_size()
1494
node.map(None, ("foo bar",), "baz")
1495
self.assertEqual(base_size + 14, node._current_size())
1497
def test_deserialise_empty(self):
1498
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1499
self.assertEqual(0, len(node))
1500
self.assertEqual(10, node.maximum_size)
1501
self.assertEqual(("sha1:1234",), node.key())
1502
self.assertIs(None, node._search_prefix)
1503
self.assertIs(None, node._common_serialised_prefix)
1505
def test_deserialise_items(self):
1506
node = LeafNode.deserialise(
1507
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1509
self.assertEqual(2, len(node))
1510
self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
1511
sorted(node.iteritems(None)))
1513
def test_deserialise_item_with_null_width_1(self):
1514
node = LeafNode.deserialise(
1515
"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1517
self.assertEqual(2, len(node))
1518
self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
1519
sorted(node.iteritems(None)))
1521
def test_deserialise_item_with_null_width_2(self):
1522
node = LeafNode.deserialise(
1523
"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1524
"quux\x00\x001\nblarh\n",
1526
self.assertEqual(2, len(node))
1527
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
1528
sorted(node.iteritems(None)))
1530
def test_iteritems_selected_one_of_two_items(self):
1531
node = LeafNode.deserialise(
1532
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1534
self.assertEqual(2, len(node))
1535
self.assertEqual([(("quux",), "blarh")],
1536
sorted(node.iteritems(None, [("quux",), ("qaz",)])))
1538
def test_deserialise_item_with_common_prefix(self):
1539
node = LeafNode.deserialise(
1540
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1542
self.assertEqual(2, len(node))
1543
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")],
1544
sorted(node.iteritems(None)))
1545
self.assertIs(chk_map._unknown, node._search_prefix)
1546
self.assertEqual('foo\x00', node._common_serialised_prefix)
1548
def test_deserialise_multi_line(self):
1549
node = LeafNode.deserialise(
1550
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1552
self.assertEqual(2, len(node))
1553
self.assertEqual([(("foo", "1"), "bar\nbaz"),
1554
(("foo", "2"), "blarh\n"),
1555
], sorted(node.iteritems(None)))
1556
self.assertIs(chk_map._unknown, node._search_prefix)
1557
self.assertEqual('foo\x00', node._common_serialised_prefix)
1559
def test_key_new(self):
1561
self.assertEqual(None, node.key())
1563
def test_key_after_map(self):
1564
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1565
node.map(None, ("foo bar",), "baz quux")
1566
self.assertEqual(None, node.key())
1568
def test_key_after_unmap(self):
1569
node = LeafNode.deserialise(
1570
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1572
node.unmap(None, ("foo bar",))
1573
self.assertEqual(None, node.key())
1575
def test_map_exceeding_max_size_only_entry_new(self):
1577
node.set_maximum_size(10)
1578
result = node.map(None, ("foo bar",), "baz quux")
1579
self.assertEqual(("foo bar", [("", node)]), result)
1580
self.assertTrue(10 < node._current_size())
1582
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1584
node.set_maximum_size(10)
1585
node.map(None, ("foo bar",), "baz quux")
1586
prefix, result = list(node.map(None, ("blue",), "red"))
1587
self.assertEqual("", prefix)
1588
self.assertEqual(2, len(result))
1589
split_chars = set([result[0][0], result[1][0]])
1590
self.assertEqual(set(["f", "b"]), split_chars)
1591
nodes = dict(result)
1593
self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
1594
self.assertEqual(10, node.maximum_size)
1595
self.assertEqual(1, node._key_width)
1597
self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
1598
self.assertEqual(10, node.maximum_size)
1599
self.assertEqual(1, node._key_width)
1601
def test_map_first(self):
1603
result = node.map(None, ("foo bar",), "baz quux")
1604
self.assertEqual(("foo bar", [("", node)]), result)
1605
self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
1606
self.assertEqual(1, len(node))
1608
def test_map_second(self):
1610
node.map(None, ("foo bar",), "baz quux")
1611
result = node.map(None, ("bingo",), "bango")
1612
self.assertEqual(("", [("", node)]), result)
1613
self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
1614
self.to_dict(node, None))
1615
self.assertEqual(2, len(node))
1617
def test_map_replacement(self):
1619
node.map(None, ("foo bar",), "baz quux")
1620
result = node.map(None, ("foo bar",), "bango")
1621
self.assertEqual(("foo bar", [("", node)]), result)
1622
self.assertEqual({("foo bar",): "bango"},
1623
self.to_dict(node, None))
1624
self.assertEqual(1, len(node))
1626
def test_serialise_empty(self):
1627
store = self.get_chk_bytes()
1629
node.set_maximum_size(10)
1630
expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1631
self.assertEqual([expected_key],
1632
list(node.serialise(store)))
1633
self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
1634
self.assertEqual(expected_key, node.key())
1636
def test_serialise_items(self):
1637
store = self.get_chk_bytes()
1639
node.set_maximum_size(10)
1640
node.map(None, ("foo bar",), "baz quux")
1641
expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1642
self.assertEqual('foo bar', node._common_serialised_prefix)
1643
self.assertEqual([expected_key],
1644
list(node.serialise(store)))
1645
self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1646
self.read_bytes(store, expected_key))
1647
self.assertEqual(expected_key, node.key())
1649
def test_unique_serialised_prefix_empty_new(self):
1651
self.assertIs(None, node._compute_search_prefix())
1653
def test_unique_serialised_prefix_one_item_new(self):
1655
node.map(None, ("foo bar", "baz"), "baz quux")
1656
self.assertEqual("foo bar\x00baz", node._compute_search_prefix())
1658
def test_unmap_missing(self):
1660
self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
1662
def test_unmap_present(self):
1664
node.map(None, ("foo bar",), "baz quux")
1665
result = node.unmap(None, ("foo bar",))
1666
self.assertEqual(node, result)
1667
self.assertEqual({}, self.to_dict(node, None))
1668
self.assertEqual(0, len(node))
1670
def test_map_maintains_common_prefixes(self):
1673
node.map(None, ("foo bar", "baz"), "baz quux")
1674
self.assertEqual('foo bar\x00baz', node._search_prefix)
1675
self.assertEqual('foo bar\x00baz', node._common_serialised_prefix)
1676
node.map(None, ("foo bar", "bing"), "baz quux")
1677
self.assertEqual('foo bar\x00b', node._search_prefix)
1678
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1679
node.map(None, ("fool", "baby"), "baz quux")
1680
self.assertEqual('foo', node._search_prefix)
1681
self.assertEqual('foo', node._common_serialised_prefix)
1682
node.map(None, ("foo bar", "baz"), "replaced")
1683
self.assertEqual('foo', node._search_prefix)
1684
self.assertEqual('foo', node._common_serialised_prefix)
1685
node.map(None, ("very", "different"), "value")
1686
self.assertEqual('', node._search_prefix)
1687
self.assertEqual('', node._common_serialised_prefix)
1689
def test_unmap_maintains_common_prefixes(self):
1692
node.map(None, ("foo bar", "baz"), "baz quux")
1693
node.map(None, ("foo bar", "bing"), "baz quux")
1694
node.map(None, ("fool", "baby"), "baz quux")
1695
node.map(None, ("very", "different"), "value")
1696
self.assertEqual('', node._search_prefix)
1697
self.assertEqual('', node._common_serialised_prefix)
1698
node.unmap(None, ("very", "different"))
1699
self.assertEqual("foo", node._search_prefix)
1700
self.assertEqual("foo", node._common_serialised_prefix)
1701
node.unmap(None, ("fool", "baby"))
1702
self.assertEqual('foo bar\x00b', node._search_prefix)
1703
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1704
node.unmap(None, ("foo bar", "baz"))
1705
self.assertEqual('foo bar\x00bing', node._search_prefix)
1706
self.assertEqual('foo bar\x00bing', node._common_serialised_prefix)
1707
node.unmap(None, ("foo bar", "bing"))
1708
self.assertEqual(None, node._search_prefix)
1709
self.assertEqual(None, node._common_serialised_prefix)
1712
class TestInternalNode(TestCaseWithStore):
1714
def test_add_node_empty_new(self):
1715
node = InternalNode('fo')
1717
child.set_maximum_size(100)
1718
child.map(None, ("foo",), "bar")
1719
node.add_node("foo", child)
1720
# Note that node isn't strictly valid now as a tree (only one child),
1721
# but thats ok for this test.
1722
# The first child defines the node's width:
1723
self.assertEqual(3, node._node_width)
1724
# We should be able to iterate over the contents without doing IO.
1725
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
1726
# The length should be known:
1727
self.assertEqual(1, len(node))
1728
# serialising the node should serialise the child and the node.
1729
chk_bytes = self.get_chk_bytes()
1730
keys = list(node.serialise(chk_bytes))
1731
child_key = child.serialise(chk_bytes)[0]
1733
[child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1735
# We should be able to access deserialised content.
1736
bytes = self.read_bytes(chk_bytes, keys[1])
1737
node = chk_map._deserialise(bytes, keys[1], None)
1738
self.assertEqual(1, len(node))
1739
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
1740
self.assertEqual(3, node._node_width)
1742
def test_add_node_resets_key_new(self):
1743
node = InternalNode('fo')
1745
child.set_maximum_size(100)
1746
child.map(None, ("foo",), "bar")
1747
node.add_node("foo", child)
1748
chk_bytes = self.get_chk_bytes()
1749
keys = list(node.serialise(chk_bytes))
1750
self.assertEqual(keys[1], node._key)
1751
node.add_node("fos", child)
1752
self.assertEqual(None, node._key)
1754
# def test_add_node_empty_oversized_one_ok_new(self):
1755
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1756
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1757
# def test_add_node_one_oversized_second_splits_errors(self):
1759
def test__iter_nodes_no_key_filter(self):
1760
node = InternalNode('')
1762
child.set_maximum_size(100)
1763
child.map(None, ("foo",), "bar")
1764
node.add_node("f", child)
1766
child.set_maximum_size(100)
1767
child.map(None, ("bar",), "baz")
1768
node.add_node("b", child)
1770
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1771
self.assertEqual(None, node_key_filter)
1773
def test__iter_nodes_splits_key_filter(self):
1774
node = InternalNode('')
1776
child.set_maximum_size(100)
1777
child.map(None, ("foo",), "bar")
1778
node.add_node("f", child)
1780
child.set_maximum_size(100)
1781
child.map(None, ("bar",), "baz")
1782
node.add_node("b", child)
1784
# foo and bar both match exactly one leaf node, but 'cat' should not
1785
# match any, and should not be placed in one.
1786
key_filter = (('foo',), ('bar',), ('cat',))
1787
for child, node_key_filter in node._iter_nodes(None,
1788
key_filter=key_filter):
1789
# each child could only match one key filter, so make sure it was
1791
self.assertEqual(1, len(node_key_filter))
1793
def test__iter_nodes_with_multiple_matches(self):
1794
node = InternalNode('')
1796
child.set_maximum_size(100)
1797
child.map(None, ("foo",), "val")
1798
child.map(None, ("fob",), "val")
1799
node.add_node("f", child)
1801
child.set_maximum_size(100)
1802
child.map(None, ("bar",), "val")
1803
child.map(None, ("baz",), "val")
1804
node.add_node("b", child)
1806
# Note that 'ram' doesn't match anything, so it should be freely
1808
key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
1809
for child, node_key_filter in node._iter_nodes(None,
1810
key_filter=key_filter):
1811
# each child could match two key filters, so make sure they were
1813
self.assertEqual(2, len(node_key_filter))
1815
def make_fo_fa_node(self):
1816
node = InternalNode('f')
1818
child.set_maximum_size(100)
1819
child.map(None, ("foo",), "val")
1820
child.map(None, ("fob",), "val")
1821
node.add_node('fo', child)
1823
child.set_maximum_size(100)
1824
child.map(None, ("far",), "val")
1825
child.map(None, ("faz",), "val")
1826
node.add_node("fa", child)
1829
def test__iter_nodes_single_entry(self):
1830
node = self.make_fo_fa_node()
1831
key_filter = [('foo',)]
1832
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1833
self.assertEqual(1, len(nodes))
1834
self.assertEqual(key_filter, nodes[0][1])
1836
def test__iter_nodes_single_entry_misses(self):
1837
node = self.make_fo_fa_node()
1838
key_filter = [('bar',)]
1839
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1840
self.assertEqual(0, len(nodes))
1842
def test__iter_nodes_mixed_key_width(self):
1843
node = self.make_fo_fa_node()
1844
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
1845
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1846
self.assertEqual(1, len(nodes))
1847
matches = key_filter[:]
1848
matches.remove(('b',))
1849
self.assertEqual(sorted(matches), sorted(nodes[0][1]))
1851
def test__iter_nodes_match_all(self):
1852
node = self.make_fo_fa_node()
1853
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
1854
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1855
self.assertEqual(2, len(nodes))
1857
def test__iter_nodes_fixed_widths_and_misses(self):
1858
node = self.make_fo_fa_node()
1859
# foo and faa should both match one child, baz should miss
1860
key_filter = [('foo',), ('faa',), ('baz',)]
1861
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1862
self.assertEqual(2, len(nodes))
1863
for node, matches in nodes:
1864
self.assertEqual(1, len(matches))
1866
def test_iteritems_empty_new(self):
1867
node = InternalNode()
1868
self.assertEqual([], sorted(node.iteritems(None)))
1870
def test_iteritems_two_children(self):
1871
node = InternalNode()
1873
leaf1.map(None, ('foo bar',), 'quux')
1875
leaf2.map(None, ('strange',), 'beast')
1876
node.add_node("f", leaf1)
1877
node.add_node("s", leaf2)
1878
self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
1879
sorted(node.iteritems(None)))
1881
def test_iteritems_two_children_partial(self):
1882
node = InternalNode()
1884
leaf1.map(None, ('foo bar',), 'quux')
1886
leaf2.map(None, ('strange',), 'beast')
1887
node.add_node("f", leaf1)
1888
# This sets up a path that should not be followed - it will error if
1889
# the code tries to.
1890
node._items['f'] = None
1891
node.add_node("s", leaf2)
1892
self.assertEqual([(('strange',), 'beast')],
1893
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1895
def test_iteritems_two_children_with_hash(self):
1896
search_key_func = chk_map.search_key_registry.get('hash-255-way')
1897
node = InternalNode(search_key_func=search_key_func)
1898
leaf1 = LeafNode(search_key_func=search_key_func)
1899
leaf1.map(None, StaticTuple('foo bar',), 'quux')
1900
leaf2 = LeafNode(search_key_func=search_key_func)
1901
leaf2.map(None, StaticTuple('strange',), 'beast')
1902
self.assertEqual('\xbeF\x014', search_key_func(StaticTuple('foo bar',)))
1903
self.assertEqual('\x85\xfa\xf7K', search_key_func(StaticTuple('strange',)))
1904
node.add_node("\xbe", leaf1)
1905
# This sets up a path that should not be followed - it will error if
1906
# the code tries to.
1907
node._items['\xbe'] = None
1908
node.add_node("\x85", leaf2)
1909
self.assertEqual([(('strange',), 'beast')],
1910
sorted(node.iteritems(None, [StaticTuple('strange',),
1911
StaticTuple('weird',)])))
1913
def test_iteritems_partial_empty(self):
1914
node = InternalNode()
1915
self.assertEqual([], sorted(node.iteritems([('missing',)])))
1917
def test_map_to_new_child_new(self):
1918
chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
1919
chkmap._ensure_root()
1920
node = chkmap._root_node
1921
# Ensure test validity: nothing paged in below the root.
1923
len([value for value in node._items.values()
1924
if type(value) is StaticTuple]))
1925
# now, mapping to k3 should add a k3 leaf
1926
prefix, nodes = node.map(None, ('k3',), 'quux')
1927
self.assertEqual("k", prefix)
1928
self.assertEqual([("", node)], nodes)
1929
# check new child details
1930
child = node._items['k3']
1931
self.assertIsInstance(child, LeafNode)
1932
self.assertEqual(1, len(child))
1933
self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
1934
self.assertEqual(None, child._key)
1935
self.assertEqual(10, child.maximum_size)
1936
self.assertEqual(1, child._key_width)
1937
# Check overall structure:
1938
self.assertEqual(3, len(chkmap))
1939
self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
1940
self.to_dict(chkmap))
1941
# serialising should only serialise the new data - k3 and the internal
1943
keys = list(node.serialise(chkmap._store))
1944
child_key = child.serialise(chkmap._store)[0]
1945
self.assertEqual([child_key, keys[1]], keys)
1947
def test_map_to_child_child_splits_new(self):
1948
chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
1949
# Check for the canonical root value for this tree:
1950
self.assertEqualDiff("'' InternalNode\n"
1955
, chkmap._dump_tree())
1956
# _dump_tree pages everything in, so reload using just the root
1957
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1958
chkmap._ensure_root()
1959
node = chkmap._root_node
1960
# Ensure test validity: nothing paged in below the root.
1962
len([value for value in node._items.values()
1963
if type(value) is StaticTuple]))
1964
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1965
# k23, which for simplicity in the current implementation generates
1966
# a new internal node between node, and k22/k23.
1967
prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
1968
self.assertEqual("k", prefix)
1969
self.assertEqual([("", node)], nodes)
1970
# check new child details
1971
child = node._items['k2']
1972
self.assertIsInstance(child, InternalNode)
1973
self.assertEqual(2, len(child))
1974
self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
1975
self.to_dict(child, None))
1976
self.assertEqual(None, child._key)
1977
self.assertEqual(10, child.maximum_size)
1978
self.assertEqual(1, child._key_width)
1979
self.assertEqual(3, child._node_width)
1980
# Check overall structure:
1981
self.assertEqual(3, len(chkmap))
1982
self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
1983
self.to_dict(chkmap))
1984
# serialising should only serialise the new data - although k22 hasn't
1985
# changed because its a special corner case (splitting on with only one
1986
# key leaves one node unaltered), in general k22 is serialised, so we
1987
# expect k22, k23, the new internal node, and node, to be serialised.
1988
keys = list(node.serialise(chkmap._store))
1989
child_key = child._key
1990
k22_key = child._items['k22']._key
1991
k23_key = child._items['k23']._key
1992
self.assertEqual([k22_key, k23_key, child_key, node.key()], keys)
1993
self.assertEqualDiff("'' InternalNode\n"
1996
" 'k2' InternalNode\n"
2000
" ('k23',) 'quux'\n"
2001
, chkmap._dump_tree())
2003
def test__search_prefix_filter_with_hash(self):
2004
search_key_func = chk_map.search_key_registry.get('hash-16-way')
2005
node = InternalNode(search_key_func=search_key_func)
2007
node._node_width = 4
2008
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(
2009
StaticTuple('a', 'b')))
2010
self.assertEqual('E8B7', node._search_prefix_filter(
2011
StaticTuple('a', 'b')))
2012
self.assertEqual('E8B7', node._search_prefix_filter(
2015
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
2016
chkmap = self._get_map(
2017
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2018
# Check we have the expected tree.
2019
self.assertEqualDiff("'' InternalNode\n"
2022
" 'k2' InternalNode\n"
2026
" ('k23',) 'quux'\n"
2027
, chkmap._dump_tree())
2028
chkmap = CHKMap(chkmap._store, chkmap._root_node)
2029
chkmap._ensure_root()
2030
node = chkmap._root_node
2031
# unmapping k23 should give us a root, with k1 and k22 as direct
2033
result = node.unmap(chkmap._store, ('k23',))
2034
# check the pointed-at object within node - k2 should now point at the
2035
# k22 leaf (which has been paged in to see if we can collapse the tree)
2036
child = node._items['k2']
2037
self.assertIsInstance(child, LeafNode)
2038
self.assertEqual(1, len(child))
2039
self.assertEqual({('k22',): 'bar'},
2040
self.to_dict(child, None))
2041
# Check overall structure is instact:
2042
self.assertEqual(2, len(chkmap))
2043
self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
2044
self.to_dict(chkmap))
2045
# serialising should only serialise the new data - the root node.
2046
keys = list(node.serialise(chkmap._store))
2047
self.assertEqual([keys[-1]], keys)
2048
chkmap = CHKMap(chkmap._store, keys[-1])
2049
self.assertEqualDiff("'' InternalNode\n"
2054
, chkmap._dump_tree())
2056
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
2057
chkmap = self._get_map(
2058
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2059
self.assertEqualDiff("'' InternalNode\n"
2062
" 'k2' InternalNode\n"
2066
" ('k23',) 'quux'\n"
2067
, chkmap._dump_tree())
2068
orig_root = chkmap._root_node
2069
chkmap = CHKMap(chkmap._store, orig_root)
2070
chkmap._ensure_root()
2071
node = chkmap._root_node
2072
k2_ptr = node._items['k2']
2073
# unmapping k1 should give us a root, with k22 and k23 as direct
2074
# children, and should not have needed to page in the subtree.
2075
result = node.unmap(chkmap._store, ('k1',))
2076
self.assertEqual(k2_ptr, result)
2077
chkmap = CHKMap(chkmap._store, orig_root)
2078
# Unmapping at the CHKMap level should switch to the new root
2079
chkmap.unmap(('k1',))
2080
self.assertEqual(k2_ptr, chkmap._root_node)
2081
self.assertEqualDiff("'' InternalNode\n"
2085
" ('k23',) 'quux'\n"
2086
, chkmap._dump_tree())
2090
# map -> fits - done
2091
# map -> doesn't fit - shrink from left till fits
2092
# key data to return: the common prefix, new nodes.
2094
# unmap -> how to tell if siblings can be combined.
2095
# combing leaf nodes means expanding the prefix to the left; so gather the size of
2096
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
2097
# is an internal node, we know that that is a dense subtree - can't combine.
2098
# otherwise as soon as the sum of serialised values exceeds the split threshold
2099
# we know we can't combine - stop.
2100
# unmap -> key return data - space in node, common prefix length? and key count
2102
# variable length prefixes? -> later start with fixed width to get something going
2103
# map -> fits - update pointer to leaf
2104
# return [prefix and node] - seems sound.
2105
# map -> doesn't fit - find unique prefix and shift right
2106
# create internal nodes for all the partitions, return list of unique
2107
# prefixes and nodes.
2108
# map -> new prefix - create a leaf
2109
# unmap -> if child key count 0, remove
2110
# unmap -> return space in node, common prefix length? (why?), key count
2112
# map, if 1 node returned, use it, otherwise make an internal and populate.
2113
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
2115
# map inits as empty leafnode.
2121
# AA, AB, AC, AD, BA
2122
# packed internal node - ideal:
2123
# AA, AB, AC, AD, BA
2124
# single byte fanout - A,B, AA,AB,AC,AD, BA
2127
# AB - split, but we want to end up with AB, BA, in one node, with
2131
class TestCHKMapDifference(TestCaseWithExampleMaps):
2133
def get_difference(self, new_roots, old_roots,
2134
search_key_func=None):
2135
if search_key_func is None:
2136
search_key_func = chk_map._search_key_plain
2137
return chk_map.CHKMapDifference(self.get_chk_bytes(),
2138
new_roots, old_roots, search_key_func)
2140
def test__init__(self):
2141
c_map = self.make_root_only_map()
2143
c_map.map(('aaa',), 'new aaa content')
2144
key2 = c_map._save()
2145
diff = self.get_difference([key2], [key1])
2146
self.assertEqual(set([key1]), diff._all_old_chks)
2147
self.assertEqual([], diff._old_queue)
2148
self.assertEqual([], diff._new_queue)
2150
def help__read_all_roots(self, search_key_func):
2151
c_map = self.make_root_only_map(search_key_func=search_key_func)
2153
c_map.map(('aaa',), 'new aaa content')
2154
key2 = c_map._save()
2155
diff = self.get_difference([key2], [key1], search_key_func)
2156
root_results = [record.key for record in diff._read_all_roots()]
2157
self.assertEqual([key2], root_results)
2158
# We should have queued up only items that aren't in the old
2160
self.assertEqual([(('aaa',), 'new aaa content')],
2161
diff._new_item_queue)
2162
self.assertEqual([], diff._new_queue)
2163
# And there are no old references, so that queue should be
2165
self.assertEqual([], diff._old_queue)
2167
def test__read_all_roots_plain(self):
2168
self.help__read_all_roots(search_key_func=chk_map._search_key_plain)
2170
def test__read_all_roots_16(self):
2171
self.help__read_all_roots(search_key_func=chk_map._search_key_16)
2173
def test__read_all_roots_skips_known_old(self):
2174
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2176
c_map2 = self.make_root_only_map(chk_map._search_key_plain)
2178
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2179
root_results = [record.key for record in diff._read_all_roots()]
2180
# We should have no results. key2 is completely contained within key1,
2181
# and we should have seen that in the first pass
2182
self.assertEqual([], root_results)
2184
def test__read_all_roots_prepares_queues(self):
2185
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2187
c_map._dump_tree() # load everything
2188
key1_a = c_map._root_node._items['a'].key()
2189
c_map.map(('abb',), 'new abb content')
2190
key2 = c_map._save()
2191
key2_a = c_map._root_node._items['a'].key()
2192
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2193
root_results = [record.key for record in diff._read_all_roots()]
2194
self.assertEqual([key2], root_results)
2195
# At this point, we should have queued up only the 'a' Leaf on both
2196
# sides, both 'c' and 'd' are known to not have changed on both sides
2197
self.assertEqual([key2_a], diff._new_queue)
2198
self.assertEqual([], diff._new_item_queue)
2199
self.assertEqual([key1_a], diff._old_queue)
2201
def test__read_all_roots_multi_new_prepares_queues(self):
2202
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2204
c_map._dump_tree() # load everything
2205
key1_a = c_map._root_node._items['a'].key()
2206
key1_c = c_map._root_node._items['c'].key()
2207
c_map.map(('abb',), 'new abb content')
2208
key2 = c_map._save()
2209
key2_a = c_map._root_node._items['a'].key()
2210
key2_c = c_map._root_node._items['c'].key()
2211
c_map = chk_map.CHKMap(self.get_chk_bytes(), key1,
2212
chk_map._search_key_plain)
2213
c_map.map(('ccc',), 'new ccc content')
2214
key3 = c_map._save()
2215
key3_a = c_map._root_node._items['a'].key()
2216
key3_c = c_map._root_node._items['c'].key()
2217
diff = self.get_difference([key2, key3], [key1],
2218
chk_map._search_key_plain)
2219
root_results = [record.key for record in diff._read_all_roots()]
2220
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2221
# We should have queued up key2_a, and key3_c, but not key2_c or key3_c
2222
self.assertEqual([key2_a, key3_c], diff._new_queue)
2223
self.assertEqual([], diff._new_item_queue)
2224
# And we should have queued up both a and c for the old set
2225
self.assertEqual([key1_a, key1_c], diff._old_queue)
2227
def test__read_all_roots_different_depths(self):
2228
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2229
c_map._dump_tree() # load everything
2231
key1_a = c_map._root_node._items['a'].key()
2232
key1_c = c_map._root_node._items['c'].key()
2233
key1_d = c_map._root_node._items['d'].key()
2235
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2238
key2_aa = c_map2._root_node._items['aa'].key()
2239
key2_ad = c_map2._root_node._items['ad'].key()
2241
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2242
root_results = [record.key for record in diff._read_all_roots()]
2243
self.assertEqual([key2], root_results)
2244
# Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
2246
self.assertEqual([key1_a], diff._old_queue)
2247
self.assertEqual([key2_aa, key2_ad], diff._new_queue)
2248
self.assertEqual([], diff._new_item_queue)
2250
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2251
root_results = [record.key for record in diff._read_all_roots()]
2252
self.assertEqual([key1], root_results)
2254
self.assertEqual([key2_aa, key2_ad], diff._old_queue)
2255
self.assertEqual([key1_a, key1_c, key1_d], diff._new_queue)
2256
self.assertEqual([], diff._new_item_queue)
2258
def test__read_all_roots_different_depths_16(self):
2259
c_map = self.make_two_deep_map(chk_map._search_key_16)
2260
c_map._dump_tree() # load everything
2262
key1_2 = c_map._root_node._items['2'].key()
2263
key1_4 = c_map._root_node._items['4'].key()
2264
key1_C = c_map._root_node._items['C'].key()
2265
key1_F = c_map._root_node._items['F'].key()
2267
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_16)
2270
key2_F0 = c_map2._root_node._items['F0'].key()
2271
key2_F3 = c_map2._root_node._items['F3'].key()
2272
key2_F4 = c_map2._root_node._items['F4'].key()
2273
key2_FD = c_map2._root_node._items['FD'].key()
2275
diff = self.get_difference([key2], [key1], chk_map._search_key_16)
2276
root_results = [record.key for record in diff._read_all_roots()]
2277
self.assertEqual([key2], root_results)
2278
# Only the subset of keys that may be present should be queued up.
2279
self.assertEqual([key1_F], diff._old_queue)
2280
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2281
sorted(diff._new_queue))
2282
self.assertEqual([], diff._new_item_queue)
2284
diff = self.get_difference([key1], [key2], chk_map._search_key_16)
2285
root_results = [record.key for record in diff._read_all_roots()]
2286
self.assertEqual([key1], root_results)
2288
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2289
sorted(diff._old_queue))
2290
self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
2291
sorted(diff._new_queue))
2292
self.assertEqual([], diff._new_item_queue)
2294
def test__read_all_roots_mixed_depth(self):
2295
c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2296
c_map._dump_tree() # load everything
2298
key1_aa = c_map._root_node._items['aa'].key()
2299
key1_ad = c_map._root_node._items['ad'].key()
2301
c_map2 = self.make_one_deep_one_prefix_map(chk_map._search_key_plain)
2304
key2_a = c_map2._root_node._items['a'].key()
2305
key2_b = c_map2._root_node._items['b'].key()
2307
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2308
root_results = [record.key for record in diff._read_all_roots()]
2309
self.assertEqual([key2], root_results)
2310
# 'ad' matches exactly 'a' on the other side, so it should be removed,
2311
# and neither side should have it queued for walking
2312
self.assertEqual([], diff._old_queue)
2313
self.assertEqual([key2_b], diff._new_queue)
2314
self.assertEqual([], diff._new_item_queue)
2316
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2317
root_results = [record.key for record in diff._read_all_roots()]
2318
self.assertEqual([key1], root_results)
2319
# Note: This is technically not the 'true minimal' set that we could
2320
# use The reason is that 'a' was matched exactly to 'ad' (by sha
2321
# sum). However, the code gets complicated in the case of more
2322
# than one interesting key, so for now, we live with this
2323
# Consider revising, though benchmarking showing it to be a
2324
# real-world issue should be done
2325
self.assertEqual([key2_a], diff._old_queue)
2326
# self.assertEqual([], diff._old_queue)
2327
self.assertEqual([key1_aa], diff._new_queue)
2328
self.assertEqual([], diff._new_item_queue)
2330
def test__read_all_roots_yields_extra_deep_records(self):
2331
# This is slightly controversial, as we will yield a chk page that we
2332
# might later on find out could be filtered out. (If a root node is
2333
# referenced deeper in the old set.)
2334
# However, even with stacking, we always have all chk pages that we
2335
# will need. So as long as we filter out the referenced keys, we'll
2336
# never run into problems.
2337
# This allows us to yield a root node record immediately, without any
2339
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2340
c_map._dump_tree() # load all keys
2342
key1_a = c_map._root_node._items['a'].key()
2343
c_map2 = self.get_map({
2344
('acc',): 'initial acc content',
2345
('ace',): 'initial ace content',
2346
}, maximum_size=100)
2347
self.assertEqualDiff(
2349
" ('acc',) 'initial acc content'\n"
2350
" ('ace',) 'initial ace content'\n",
2351
c_map2._dump_tree())
2353
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2354
root_results = [record.key for record in diff._read_all_roots()]
2355
self.assertEqual([key2], root_results)
2356
# However, even though we have yielded the root node to be fetched,
2357
# we should have enqued all of the chk pages to be walked, so that we
2358
# can find the keys if they are present
2359
self.assertEqual([key1_a], diff._old_queue)
2360
self.assertEqual([(('acc',), 'initial acc content'),
2361
(('ace',), 'initial ace content'),
2362
], diff._new_item_queue)
2364
def test__read_all_roots_multiple_targets(self):
2365
c_map = self.make_root_only_map()
2367
c_map = self.make_one_deep_map()
2370
key2_c = c_map._root_node._items['c'].key()
2371
key2_d = c_map._root_node._items['d'].key()
2372
c_map.map(('ccc',), 'new ccc value')
2373
key3 = c_map._save()
2374
key3_c = c_map._root_node._items['c'].key()
2375
diff = self.get_difference([key2, key3], [key1],
2376
chk_map._search_key_plain)
2377
root_results = [record.key for record in diff._read_all_roots()]
2378
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2379
self.assertEqual([], diff._old_queue)
2380
# the key 'd' is interesting from key2 and key3, but should only be
2381
# entered into the queue 1 time
2382
self.assertEqual(sorted([key2_c, key3_c, key2_d]),
2383
sorted(diff._new_queue))
2384
self.assertEqual([], diff._new_item_queue)
2386
def test__read_all_roots_no_old(self):
2387
# This is the 'initial branch' case. With nothing in the old
2388
# set, we can just queue up all root nodes into interesting queue, and
2389
# then have them fast-path flushed via _flush_new_queue
2390
c_map = self.make_two_deep_map()
2392
diff = self.get_difference([key1], [], chk_map._search_key_plain)
2393
root_results = [record.key for record in diff._read_all_roots()]
2394
self.assertEqual([], root_results)
2395
self.assertEqual([], diff._old_queue)
2396
self.assertEqual([key1], diff._new_queue)
2397
self.assertEqual([], diff._new_item_queue)
2399
c_map2 = self.make_one_deep_map()
2401
diff = self.get_difference([key1, key2], [], chk_map._search_key_plain)
2402
root_results = [record.key for record in diff._read_all_roots()]
2403
self.assertEqual([], root_results)
2404
self.assertEqual([], diff._old_queue)
2405
self.assertEqual(sorted([key1, key2]), sorted(diff._new_queue))
2406
self.assertEqual([], diff._new_item_queue)
2408
def test__read_all_roots_no_old_16(self):
2409
c_map = self.make_two_deep_map(chk_map._search_key_16)
2411
diff = self.get_difference([key1], [], chk_map._search_key_16)
2412
root_results = [record.key for record in diff._read_all_roots()]
2413
self.assertEqual([], root_results)
2414
self.assertEqual([], diff._old_queue)
2415
self.assertEqual([key1], diff._new_queue)
2416
self.assertEqual([], diff._new_item_queue)
2418
c_map2 = self.make_one_deep_map(chk_map._search_key_16)
2420
diff = self.get_difference([key1, key2], [],
2421
chk_map._search_key_16)
2422
root_results = [record.key for record in diff._read_all_roots()]
2423
self.assertEqual([], root_results)
2424
self.assertEqual([], diff._old_queue)
2425
self.assertEqual(sorted([key1, key2]),
2426
sorted(diff._new_queue))
2427
self.assertEqual([], diff._new_item_queue)
2429
def test__read_all_roots_multiple_old(self):
2430
c_map = self.make_two_deep_map()
2432
c_map._dump_tree() # load everything
2433
key1_a = c_map._root_node._items['a'].key()
2434
c_map.map(('ccc',), 'new ccc value')
2435
key2 = c_map._save()
2436
key2_a = c_map._root_node._items['a'].key()
2437
c_map.map(('add',), 'new add value')
2438
key3 = c_map._save()
2439
key3_a = c_map._root_node._items['a'].key()
2440
diff = self.get_difference([key3], [key1, key2],
2441
chk_map._search_key_plain)
2442
root_results = [record.key for record in diff._read_all_roots()]
2443
self.assertEqual([key3], root_results)
2444
# the 'a' keys should not be queued up 2 times, since they are
2446
self.assertEqual([key1_a], diff._old_queue)
2447
self.assertEqual([key3_a], diff._new_queue)
2448
self.assertEqual([], diff._new_item_queue)
2450
def test__process_next_old_batched_no_dupes(self):
2451
c_map = self.make_two_deep_map()
2453
c_map._dump_tree() # load everything
2454
key1_a = c_map._root_node._items['a'].key()
2455
key1_aa = c_map._root_node._items['a']._items['aa'].key()
2456
key1_ab = c_map._root_node._items['a']._items['ab'].key()
2457
key1_ac = c_map._root_node._items['a']._items['ac'].key()
2458
key1_ad = c_map._root_node._items['a']._items['ad'].key()
2459
c_map.map(('aaa',), 'new aaa value')
2460
key2 = c_map._save()
2461
key2_a = c_map._root_node._items['a'].key()
2462
key2_aa = c_map._root_node._items['a']._items['aa'].key()
2463
c_map.map(('acc',), 'new acc content')
2464
key3 = c_map._save()
2465
key3_a = c_map._root_node._items['a'].key()
2466
key3_ac = c_map._root_node._items['a']._items['ac'].key()
2467
diff = self.get_difference([key3], [key1, key2],
2468
chk_map._search_key_plain)
2469
root_results = [record.key for record in diff._read_all_roots()]
2470
self.assertEqual([key3], root_results)
2471
self.assertEqual(sorted([key1_a, key2_a]),
2472
sorted(diff._old_queue))
2473
self.assertEqual([key3_a], diff._new_queue)
2474
self.assertEqual([], diff._new_item_queue)
2475
diff._process_next_old()
2476
# All of the old records should be brought in and queued up,
2477
# but we should not have any duplicates
2478
self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
2479
sorted(diff._old_queue))
2482
class TestIterInterestingNodes(TestCaseWithExampleMaps):
2484
def get_map_key(self, a_dict, maximum_size=10):
2485
c_map = self.get_map(a_dict, maximum_size=maximum_size)
2488
def assertIterInteresting(self, records, items, interesting_keys,
2490
"""Check the result of iter_interesting_nodes.
2492
Note that we no longer care how many steps are taken, etc, just that
2493
the right contents are returned.
2495
:param records: A list of record keys that should be yielded
2496
:param items: A list of items (key,value) that should be yielded.
2498
store = self.get_chk_bytes()
2499
store._search_key_func = chk_map._search_key_plain
2500
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
2504
for record, new_items in iter_nodes:
2505
if record is not None:
2506
record_keys.append(record.key)
2508
all_items.extend(new_items)
2509
self.assertEqual(sorted(records), sorted(record_keys))
2510
self.assertEqual(sorted(items), sorted(all_items))
2512
def test_empty_to_one_keys(self):
2513
target = self.get_map_key({('a',): 'content'})
2514
self.assertIterInteresting([target],
2515
[(('a',), 'content')],
2518
def test_none_to_one_key(self):
2519
basis = self.get_map_key({})
2520
target = self.get_map_key({('a',): 'content'})
2521
self.assertIterInteresting([target],
2522
[(('a',), 'content')],
2525
def test_one_to_none_key(self):
2526
basis = self.get_map_key({('a',): 'content'})
2527
target = self.get_map_key({})
2528
self.assertIterInteresting([target],
2532
def test_common_pages(self):
2533
basis = self.get_map_key({('a',): 'content',
2537
target = self.get_map_key({('a',): 'content',
2538
('b',): 'other content',
2541
target_map = CHKMap(self.get_chk_bytes(), target)
2542
self.assertEqualDiff(
2545
" ('a',) 'content'\n"
2547
" ('b',) 'other content'\n"
2549
" ('c',) 'content'\n",
2550
target_map._dump_tree())
2551
b_key = target_map._root_node._items['b'].key()
2552
# This should return the root node, and the node for the 'b' key
2553
self.assertIterInteresting([target, b_key],
2554
[(('b',), 'other content')],
2557
def test_common_sub_page(self):
2558
basis = self.get_map_key({('aaa',): 'common',
2561
target = self.get_map_key({('aaa',): 'common',
2565
target_map = CHKMap(self.get_chk_bytes(), target)
2566
self.assertEqualDiff(
2568
" 'a' InternalNode\n"
2570
" ('aaa',) 'common'\n"
2574
" ('c',) 'common'\n",
2575
target_map._dump_tree())
2576
# The key for the internal aa node
2577
a_key = target_map._root_node._items['a'].key()
2578
# The key for the leaf aab node
2579
# aaa_key = target_map._root_node._items['a']._items['aaa'].key()
2580
aab_key = target_map._root_node._items['a']._items['aab'].key()
2581
self.assertIterInteresting([target, a_key, aab_key],
2582
[(('aab',), 'new')],
2585
def test_common_leaf(self):
2586
basis = self.get_map_key({})
2587
target1 = self.get_map_key({('aaa',): 'common'})
2588
target2 = self.get_map_key({('aaa',): 'common',
2591
target3 = self.get_map_key({('aaa',): 'common',
2595
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
2596
# Once as a root node, once as a second layer, and once as a third
2597
# layer. It should only be returned one time regardless
2598
target1_map = CHKMap(self.get_chk_bytes(), target1)
2599
self.assertEqualDiff(
2601
" ('aaa',) 'common'\n",
2602
target1_map._dump_tree())
2603
target2_map = CHKMap(self.get_chk_bytes(), target2)
2604
self.assertEqualDiff(
2607
" ('aaa',) 'common'\n"
2609
" ('bbb',) 'new'\n",
2610
target2_map._dump_tree())
2611
target3_map = CHKMap(self.get_chk_bytes(), target3)
2612
self.assertEqualDiff(
2614
" 'a' InternalNode\n"
2616
" ('aaa',) 'common'\n"
2618
" ('aac',) 'other'\n"
2620
" ('bbb',) 'new'\n",
2621
target3_map._dump_tree())
2622
aaa_key = target1_map._root_node.key()
2623
b_key = target2_map._root_node._items['b'].key()
2624
a_key = target3_map._root_node._items['a'].key()
2625
aac_key = target3_map._root_node._items['a']._items['aac'].key()
2626
self.assertIterInteresting(
2627
[target1, target2, target3, a_key, aac_key, b_key],
2628
[(('aaa',), 'common'), (('bbb',), 'new'), (('aac',), 'other')],
2629
[target1, target2, target3], [basis])
2631
self.assertIterInteresting(
2632
[target2, target3, a_key, aac_key, b_key],
2633
[(('bbb',), 'new'), (('aac',), 'other')],
2634
[target2, target3], [target1])
2636
# Technically, target1 could be filtered out, but since it is a root
2637
# node, we yield it immediately, rather than waiting to find out much
2639
self.assertIterInteresting(
2642
[target1], [target3])
2644
def test_multiple_maps(self):
2645
basis1 = self.get_map_key({('aaa',): 'common',
2648
basis2 = self.get_map_key({('bbb',): 'common',
2651
target1 = self.get_map_key({('aaa',): 'common',
2652
('aac',): 'target1',
2655
target2 = self.get_map_key({('aaa',): 'common',
2656
('bba',): 'target2',
2659
target1_map = CHKMap(self.get_chk_bytes(), target1)
2660
self.assertEqualDiff(
2662
" 'a' InternalNode\n"
2664
" ('aaa',) 'common'\n"
2666
" ('aac',) 'target1'\n"
2668
" ('bbb',) 'common'\n",
2669
target1_map._dump_tree())
2670
# The key for the target1 internal a node
2671
a_key = target1_map._root_node._items['a'].key()
2672
# The key for the leaf aac node
2673
aac_key = target1_map._root_node._items['a']._items['aac'].key()
2675
target2_map = CHKMap(self.get_chk_bytes(), target2)
2676
self.assertEqualDiff(
2679
" ('aaa',) 'common'\n"
2680
" 'b' InternalNode\n"
2682
" ('bba',) 'target2'\n"
2684
" ('bbb',) 'common'\n",
2685
target2_map._dump_tree())
2686
# The key for the target2 internal bb node
2687
b_key = target2_map._root_node._items['b'].key()
2688
# The key for the leaf bba node
2689
bba_key = target2_map._root_node._items['b']._items['bba'].key()
2690
self.assertIterInteresting(
2691
[target1, target2, a_key, aac_key, b_key, bba_key],
2692
[(('aac',), 'target1'), (('bba',), 'target2')],
2693
[target1, target2], [basis1, basis2])
2695
def test_multiple_maps_overlapping_common_new(self):
2696
# Test that when a node found through the interesting_keys iteration
2697
# for *some roots* and also via the old keys iteration, that
2698
# it is still scanned for old refs and items, because its
2699
# not truely new. This requires 2 levels of InternalNodes to expose,
2700
# because of the way the bootstrap in _find_children_info works.
2701
# This suggests that the code is probably amenable to/benefit from
2703
# How does this test work?
2704
# 1) We need a second level InternalNode present in a basis tree.
2705
# 2) We need a left side new tree that uses that InternalNode
2706
# 3) We need a right side new tree that does not use that InternalNode
2707
# at all but that has an unchanged *value* that was reachable inside
2709
basis = self.get_map_key({
2710
# InternalNode, unchanged in left:
2713
# Forces an internalNode at 'a'
2716
left = self.get_map_key({
2717
# All of basis unchanged
2721
# And a new top level node so the root key is different
2724
right = self.get_map_key({
2725
# A value that is unchanged from basis and thus should be filtered
2729
basis_map = CHKMap(self.get_chk_bytes(), basis)
2730
self.assertEqualDiff(
2732
" 'a' InternalNode\n"
2734
" ('aaa',) 'left'\n"
2736
" ('abb',) 'right'\n"
2738
" ('ccc',) 'common'\n",
2739
basis_map._dump_tree())
2740
# Get left expected data
2741
left_map = CHKMap(self.get_chk_bytes(), left)
2742
self.assertEqualDiff(
2744
" 'a' InternalNode\n"
2746
" ('aaa',) 'left'\n"
2748
" ('abb',) 'right'\n"
2750
" ('ccc',) 'common'\n"
2752
" ('ddd',) 'change'\n",
2753
left_map._dump_tree())
2754
# Keys from left side target
2755
l_d_key = left_map._root_node._items['d'].key()
2756
# Get right expected data
2757
right_map = CHKMap(self.get_chk_bytes(), right)
2758
self.assertEqualDiff(
2760
" ('abb',) 'right'\n",
2761
right_map._dump_tree())
2762
# Keys from the right side target - none, the root is enough.
2764
self.assertIterInteresting(
2765
[right, left, l_d_key],
2766
[(('ddd',), 'change')],
2767
[left, right], [basis])
2769
def test_multiple_maps_similar(self):
2770
# We want to have a depth=2 tree, with multiple entries in each leaf
2772
basis = self.get_map_key({
2773
('aaa',): 'unchanged',
2774
('abb',): 'will change left',
2775
('caa',): 'unchanged',
2776
('cbb',): 'will change right',
2778
left = self.get_map_key({
2779
('aaa',): 'unchanged',
2780
('abb',): 'changed left',
2781
('caa',): 'unchanged',
2782
('cbb',): 'will change right',
2784
right = self.get_map_key({
2785
('aaa',): 'unchanged',
2786
('abb',): 'will change left',
2787
('caa',): 'unchanged',
2788
('cbb',): 'changed right',
2790
basis_map = CHKMap(self.get_chk_bytes(), basis)
2791
self.assertEqualDiff(
2794
" ('aaa',) 'unchanged'\n"
2795
" ('abb',) 'will change left'\n"
2797
" ('caa',) 'unchanged'\n"
2798
" ('cbb',) 'will change right'\n",
2799
basis_map._dump_tree())
2800
# Get left expected data
2801
left_map = CHKMap(self.get_chk_bytes(), left)
2802
self.assertEqualDiff(
2805
" ('aaa',) 'unchanged'\n"
2806
" ('abb',) 'changed left'\n"
2808
" ('caa',) 'unchanged'\n"
2809
" ('cbb',) 'will change right'\n",
2810
left_map._dump_tree())
2811
# Keys from left side target
2812
l_a_key = left_map._root_node._items['a'].key()
2813
l_c_key = left_map._root_node._items['c'].key()
2814
# Get right expected data
2815
right_map = CHKMap(self.get_chk_bytes(), right)
2816
self.assertEqualDiff(
2819
" ('aaa',) 'unchanged'\n"
2820
" ('abb',) 'will change left'\n"
2822
" ('caa',) 'unchanged'\n"
2823
" ('cbb',) 'changed right'\n",
2824
right_map._dump_tree())
2825
r_a_key = right_map._root_node._items['a'].key()
2826
r_c_key = right_map._root_node._items['c'].key()
2827
self.assertIterInteresting(
2828
[right, left, l_a_key, r_c_key],
2829
[(('abb',), 'changed left'), (('cbb',), 'changed right')],
2830
[left, right], [basis])