1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
19
from itertools import izip
28
from bzrlib.chk_map import (
36
class TestNode(tests.TestCase):
38
def assertCommonPrefix(self, expected_common, prefix, key):
39
common = Node.common_prefix(prefix, key)
40
self.assertTrue(len(common) <= len(prefix))
41
self.assertTrue(len(common) <= len(key))
42
self.assertStartsWith(prefix, common)
43
self.assertStartsWith(key, common)
44
self.assertEquals(expected_common, common)
46
def test_common_prefix(self):
47
self.assertCommonPrefix('beg', 'beg', 'begin')
49
def test_no_common_prefix(self):
50
self.assertCommonPrefix('', 'begin', 'end')
53
self.assertCommonPrefix('begin', 'begin', 'begin')
55
def test_not_a_prefix(self):
56
self.assertCommonPrefix('b', 'begin', 'b')
59
self.assertCommonPrefix('', '', 'end')
60
self.assertCommonPrefix('', 'begin', '')
61
self.assertCommonPrefix('', '', '')
64
class TestCaseWithStore(tests.TestCaseWithMemoryTransport):
66
def get_chk_bytes(self):
67
# This creates a standalone CHK store.
68
factory = groupcompress.make_pack_factory(False, False, 1)
69
self.chk_bytes = factory(self.get_transport())
72
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
73
search_key_func=None):
75
chk_bytes = self.get_chk_bytes()
76
root_key = CHKMap.from_dict(chk_bytes, a_dict,
77
maximum_size=maximum_size, key_width=key_width,
78
search_key_func=search_key_func)
79
root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
80
maximum_size=maximum_size, key_width=key_width,
81
search_key_func=search_key_func)
82
self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
83
" match CHKMap._create_via_map")
84
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
87
def read_bytes(self, chk_bytes, key):
88
stream = chk_bytes.get_record_stream([key], 'unordered', True)
89
record = stream.next()
90
if record.storage_kind == 'absent':
91
self.fail('Store does not contain the key %s' % (key,))
92
return record.get_bytes_as("fulltext")
94
def to_dict(self, node, *args):
95
return dict(node.iteritems(*args))
98
class TestCaseWithExampleMaps(TestCaseWithStore):
100
def get_chk_bytes(self):
101
if getattr(self, '_chk_bytes', None) is None:
102
self._chk_bytes = super(TestCaseWithExampleMaps,
103
self).get_chk_bytes()
104
return self._chk_bytes
106
def get_map(self, a_dict, maximum_size=100, search_key_func=None):
107
c_map = self._get_map(a_dict, maximum_size=maximum_size,
108
chk_bytes=self.get_chk_bytes(),
109
search_key_func=search_key_func)
112
def make_root_only_map(self, search_key_func=None):
113
return self.get_map({
114
('aaa',): 'initial aaa content',
115
('abb',): 'initial abb content',
116
}, search_key_func=search_key_func)
118
def make_root_only_aaa_ddd_map(self, search_key_func=None):
119
return self.get_map({
120
('aaa',): 'initial aaa content',
121
('ddd',): 'initial ddd content',
122
}, search_key_func=search_key_func)
124
def make_one_deep_map(self, search_key_func=None):
125
# Same as root_only_map, except it forces an InternalNode at the root
126
return self.get_map({
127
('aaa',): 'initial aaa content',
128
('abb',): 'initial abb content',
129
('ccc',): 'initial ccc content',
130
('ddd',): 'initial ddd content',
131
}, search_key_func=search_key_func)
133
def make_two_deep_map(self, search_key_func=None):
134
# Carefully chosen so that it creates a 2-deep map for both
135
# _search_key_plain and for _search_key_16
136
# Also so that things line up with make_one_deep_two_prefix_map
137
return self.get_map({
138
('aaa',): 'initial aaa content',
139
('abb',): 'initial abb content',
140
('acc',): 'initial acc content',
141
('ace',): 'initial ace content',
142
('add',): 'initial add content',
143
('adh',): 'initial adh content',
144
('adl',): 'initial adl content',
145
('ccc',): 'initial ccc content',
146
('ddd',): 'initial ddd content',
147
}, search_key_func=search_key_func)
149
def make_one_deep_two_prefix_map(self, search_key_func=None):
150
"""Create a map with one internal node, but references are extra long.
152
Otherwise has similar content to make_two_deep_map.
154
return self.get_map({
155
('aaa',): 'initial aaa content',
156
('add',): 'initial add content',
157
('adh',): 'initial adh content',
158
('adl',): 'initial adl content',
159
}, search_key_func=search_key_func)
161
def make_one_deep_one_prefix_map(self, search_key_func=None):
162
"""Create a map with one internal node, but references are extra long.
164
Similar to make_one_deep_two_prefix_map, except the split is at the
165
first char, rather than the second.
167
return self.get_map({
168
('add',): 'initial add content',
169
('adh',): 'initial adh content',
170
('adl',): 'initial adl content',
171
('bbb',): 'initial bbb content',
172
}, search_key_func=search_key_func)
175
class TestTestCaseWithExampleMaps(TestCaseWithExampleMaps):
176
"""Actual tests for the provided examples."""
178
def test_root_only_map_plain(self):
179
c_map = self.make_root_only_map()
180
self.assertEqualDiff(
182
" ('aaa',) 'initial aaa content'\n"
183
" ('abb',) 'initial abb content'\n",
186
def test_root_only_map_16(self):
187
c_map = self.make_root_only_map(search_key_func=chk_map._search_key_16)
188
self.assertEqualDiff(
190
" ('aaa',) 'initial aaa content'\n"
191
" ('abb',) 'initial abb content'\n",
194
def test_one_deep_map_plain(self):
195
c_map = self.make_one_deep_map()
196
self.assertEqualDiff(
199
" ('aaa',) 'initial aaa content'\n"
200
" ('abb',) 'initial abb content'\n"
202
" ('ccc',) 'initial ccc content'\n"
204
" ('ddd',) 'initial ddd content'\n",
207
def test_one_deep_map_16(self):
208
c_map = self.make_one_deep_map(search_key_func=chk_map._search_key_16)
209
self.assertEqualDiff(
212
" ('ccc',) 'initial ccc content'\n"
214
" ('abb',) 'initial abb content'\n"
216
" ('aaa',) 'initial aaa content'\n"
217
" ('ddd',) 'initial ddd content'\n",
220
def test_root_only_aaa_ddd_plain(self):
221
c_map = self.make_root_only_aaa_ddd_map()
222
self.assertEqualDiff(
224
" ('aaa',) 'initial aaa content'\n"
225
" ('ddd',) 'initial ddd content'\n",
228
def test_one_deep_map_16(self):
229
c_map = self.make_root_only_aaa_ddd_map(
230
search_key_func=chk_map._search_key_16)
231
# We use 'aaa' and 'ddd' because they happen to map to 'F' when using
233
self.assertEqualDiff(
235
" ('aaa',) 'initial aaa content'\n"
236
" ('ddd',) 'initial ddd content'\n",
239
def test_two_deep_map_plain(self):
240
c_map = self.make_two_deep_map()
241
self.assertEqualDiff(
243
" 'a' InternalNode\n"
245
" ('aaa',) 'initial aaa content'\n"
247
" ('abb',) 'initial abb content'\n"
249
" ('acc',) 'initial acc content'\n"
250
" ('ace',) 'initial ace content'\n"
252
" ('add',) 'initial add content'\n"
253
" ('adh',) 'initial adh content'\n"
254
" ('adl',) 'initial adl content'\n"
256
" ('ccc',) 'initial ccc content'\n"
258
" ('ddd',) 'initial ddd content'\n",
261
def test_two_deep_map_16(self):
262
c_map = self.make_two_deep_map(search_key_func=chk_map._search_key_16)
263
self.assertEqualDiff(
266
" ('acc',) 'initial acc content'\n"
267
" ('ccc',) 'initial ccc content'\n"
269
" ('abb',) 'initial abb content'\n"
271
" ('ace',) 'initial ace content'\n"
272
" 'F' InternalNode\n"
274
" ('aaa',) 'initial aaa content'\n"
276
" ('adl',) 'initial adl content'\n"
278
" ('adh',) 'initial adh content'\n"
280
" ('ddd',) 'initial ddd content'\n"
282
" ('add',) 'initial add content'\n",
285
def test_one_deep_two_prefix_map_plain(self):
286
c_map = self.make_one_deep_two_prefix_map()
287
self.assertEqualDiff(
290
" ('aaa',) 'initial aaa content'\n"
292
" ('add',) 'initial add content'\n"
293
" ('adh',) 'initial adh content'\n"
294
" ('adl',) 'initial adl content'\n",
297
def test_one_deep_two_prefix_map_16(self):
298
c_map = self.make_one_deep_two_prefix_map(
299
search_key_func=chk_map._search_key_16)
300
self.assertEqualDiff(
303
" ('aaa',) 'initial aaa content'\n"
305
" ('adl',) 'initial adl content'\n"
307
" ('adh',) 'initial adh content'\n"
309
" ('add',) 'initial add content'\n",
312
def test_one_deep_one_prefix_map_plain(self):
313
c_map = self.make_one_deep_one_prefix_map()
314
self.assertEqualDiff(
317
" ('add',) 'initial add content'\n"
318
" ('adh',) 'initial adh content'\n"
319
" ('adl',) 'initial adl content'\n"
321
" ('bbb',) 'initial bbb content'\n",
324
def test_one_deep_one_prefix_map_16(self):
325
c_map = self.make_one_deep_one_prefix_map(
326
search_key_func=chk_map._search_key_16)
327
self.assertEqualDiff(
330
" ('bbb',) 'initial bbb content'\n"
332
" ('add',) 'initial add content'\n"
333
" ('adh',) 'initial adh content'\n"
334
" ('adl',) 'initial adl content'\n",
338
class TestMap(TestCaseWithStore):
340
def assertHasABMap(self, chk_bytes):
341
ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
342
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
343
self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
344
root_key = ('sha1:' + ab_sha1,)
345
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
348
def assertHasEmptyMap(self, chk_bytes):
349
empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n'
350
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
351
self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
352
root_key = ('sha1:' + empty_sha1,)
353
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
356
def assertMapLayoutEqual(self, map_one, map_two):
357
"""Assert that the internal structure is identical between the maps."""
358
map_one._ensure_root()
359
node_one_stack = [map_one._root_node]
360
map_two._ensure_root()
361
node_two_stack = [map_two._root_node]
362
while node_one_stack:
363
node_one = node_one_stack.pop()
364
node_two = node_two_stack.pop()
365
if node_one.__class__ != node_two.__class__:
366
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
367
map_two._dump_tree(include_keys=True))
368
self.assertEqual(node_one._search_prefix,
369
node_two._search_prefix)
370
if isinstance(node_one, InternalNode):
371
# Internal nodes must have identical references
372
self.assertEqual(sorted(node_one._items.keys()),
373
sorted(node_two._items.keys()))
374
node_one_stack.extend([n for n, _ in
375
node_one._iter_nodes(map_one._store)])
376
node_two_stack.extend([n for n, _ in
377
node_two._iter_nodes(map_two._store)])
379
# Leaf nodes must have identical contents
380
self.assertEqual(node_one._items, node_two._items)
381
self.assertEquals([], node_two_stack)
383
def assertCanonicalForm(self, chkmap):
384
"""Assert that the chkmap is in 'canonical' form.
386
We do this by adding all of the key value pairs from scratch, both in
387
forward order and reverse order, and assert that the final tree layout
390
items = list(chkmap.iteritems())
391
map_forward = chk_map.CHKMap(None, None)
392
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
393
for key, value in items:
394
map_forward.map(key, value)
395
self.assertMapLayoutEqual(map_forward, chkmap)
396
map_reverse = chk_map.CHKMap(None, None)
397
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
398
for key, value in reversed(items):
399
map_reverse.map(key, value)
400
self.assertMapLayoutEqual(map_reverse, chkmap)
402
def test_assert_map_layout_equal(self):
403
store = self.get_chk_bytes()
404
map_one = CHKMap(store, None)
405
map_one._root_node.set_maximum_size(20)
406
map_two = CHKMap(store, None)
407
map_two._root_node.set_maximum_size(20)
408
self.assertMapLayoutEqual(map_one, map_two)
409
map_one.map('aaa', 'value')
410
self.assertRaises(AssertionError,
411
self.assertMapLayoutEqual, map_one, map_two)
412
map_two.map('aaa', 'value')
413
self.assertMapLayoutEqual(map_one, map_two)
414
# Split the tree, so we ensure that internal nodes and leaf nodes are
416
map_one.map('aab', 'value')
417
self.assertIsInstance(map_one._root_node, InternalNode)
418
self.assertRaises(AssertionError,
419
self.assertMapLayoutEqual, map_one, map_two)
420
map_two.map('aab', 'value')
421
self.assertMapLayoutEqual(map_one, map_two)
422
map_one.map('aac', 'value')
423
self.assertRaises(AssertionError,
424
self.assertMapLayoutEqual, map_one, map_two)
425
self.assertCanonicalForm(map_one)
427
def test_from_dict_empty(self):
428
chk_bytes = self.get_chk_bytes()
429
root_key = CHKMap.from_dict(chk_bytes, {})
430
# Check the data was saved and inserted correctly.
431
expected_root_key = self.assertHasEmptyMap(chk_bytes)
432
self.assertEqual(expected_root_key, root_key)
434
def test_from_dict_ab(self):
435
chk_bytes = self.get_chk_bytes()
436
root_key = CHKMap.from_dict(chk_bytes, {"a": "b"})
437
# Check the data was saved and inserted correctly.
438
expected_root_key = self.assertHasABMap(chk_bytes)
439
self.assertEqual(expected_root_key, root_key)
441
def test_apply_empty_ab(self):
442
# applying a delta (None, "a", "b") to an empty chkmap generates the
443
# same map as from_dict_ab.
444
chk_bytes = self.get_chk_bytes()
445
root_key = CHKMap.from_dict(chk_bytes, {})
446
chkmap = CHKMap(chk_bytes, root_key)
447
new_root = chkmap.apply_delta([(None, "a", "b")])
448
# Check the data was saved and inserted correctly.
449
expected_root_key = self.assertHasABMap(chk_bytes)
450
self.assertEqual(expected_root_key, new_root)
451
# The update should have left us with an in memory root node, with an
453
self.assertEqual(new_root, chkmap._root_node._key)
455
def test_apply_ab_empty(self):
456
# applying a delta ("a", None, None) to a map with 'a' in it generates
458
chk_bytes = self.get_chk_bytes()
459
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
460
chkmap = CHKMap(chk_bytes, root_key)
461
new_root = chkmap.apply_delta([(("a",), None, None)])
462
# Check the data was saved and inserted correctly.
463
expected_root_key = self.assertHasEmptyMap(chk_bytes)
464
self.assertEqual(expected_root_key, new_root)
465
# The update should have left us with an in memory root node, with an
467
self.assertEqual(new_root, chkmap._root_node._key)
469
def test_apply_new_keys_must_be_new(self):
470
# applying a delta (None, "a", "b") to a map with 'a' in it generates
472
chk_bytes = self.get_chk_bytes()
473
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
474
chkmap = CHKMap(chk_bytes, root_key)
475
self.assertRaises(errors.InconsistentDelta, chkmap.apply_delta,
476
[(None, ("a",), "b")])
477
# As an error occured, the update should have left us without changing
478
# anything (the root should be unchanged).
479
self.assertEqual(root_key, chkmap._root_node._key)
481
def test_apply_delta_is_deterministic(self):
482
chk_bytes = self.get_chk_bytes()
483
chkmap1 = CHKMap(chk_bytes, None)
484
chkmap1._root_node.set_maximum_size(10)
485
chkmap1.apply_delta([(None, ('aaa',), 'common'),
486
(None, ('bba',), 'target2'),
487
(None, ('bbb',), 'common')])
488
root_key1 = chkmap1._save()
489
self.assertCanonicalForm(chkmap1)
491
chkmap2 = CHKMap(chk_bytes, None)
492
chkmap2._root_node.set_maximum_size(10)
493
chkmap2.apply_delta([(None, ('bbb',), 'common'),
494
(None, ('bba',), 'target2'),
495
(None, ('aaa',), 'common')])
496
root_key2 = chkmap2._save()
497
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
498
chkmap2._dump_tree(include_keys=True))
499
self.assertEqual(root_key1, root_key2)
500
self.assertCanonicalForm(chkmap2)
502
def test_stable_splitting(self):
503
store = self.get_chk_bytes()
504
chkmap = CHKMap(store, None)
505
# Should fit 2 keys per LeafNode
506
chkmap._root_node.set_maximum_size(35)
507
chkmap.map(('aaa',), 'v')
508
self.assertEqualDiff("'' LeafNode\n"
511
chkmap.map(('aab',), 'v')
512
self.assertEqualDiff("'' LeafNode\n"
516
self.assertCanonicalForm(chkmap)
518
# Creates a new internal node, and splits the others into leaves
519
chkmap.map(('aac',), 'v')
520
self.assertEqualDiff("'' InternalNode\n"
528
self.assertCanonicalForm(chkmap)
530
# Splits again, because it can't fit in the current structure
531
chkmap.map(('bbb',), 'v')
532
self.assertEqualDiff("'' InternalNode\n"
533
" 'a' InternalNode\n"
543
self.assertCanonicalForm(chkmap)
545
def test_map_splits_with_longer_key(self):
546
store = self.get_chk_bytes()
547
chkmap = CHKMap(store, None)
548
# Should fit 1 key per LeafNode
549
chkmap._root_node.set_maximum_size(10)
550
chkmap.map(('aaa',), 'v')
551
chkmap.map(('aaaa',), 'v')
552
self.assertCanonicalForm(chkmap)
553
self.assertIsInstance(chkmap._root_node, InternalNode)
555
def test_with_linefeed_in_key(self):
556
store = self.get_chk_bytes()
557
chkmap = CHKMap(store, None)
558
# Should fit 1 key per LeafNode
559
chkmap._root_node.set_maximum_size(10)
560
chkmap.map(('a\ra',), 'val1')
561
chkmap.map(('a\rb',), 'val2')
562
chkmap.map(('ac',), 'val3')
563
self.assertCanonicalForm(chkmap)
564
self.assertEqualDiff("'' InternalNode\n"
565
" 'a\\r' InternalNode\n"
566
" 'a\\ra' LeafNode\n"
567
" ('a\\ra',) 'val1'\n"
568
" 'a\\rb' LeafNode\n"
569
" ('a\\rb',) 'val2'\n"
573
# We should also successfully serialise and deserialise these items
574
root_key = chkmap._save()
575
chkmap = CHKMap(store, root_key)
576
self.assertEqualDiff("'' InternalNode\n"
577
" 'a\\r' InternalNode\n"
578
" 'a\\ra' LeafNode\n"
579
" ('a\\ra',) 'val1'\n"
580
" 'a\\rb' LeafNode\n"
581
" ('a\\rb',) 'val2'\n"
586
def test_deep_splitting(self):
587
store = self.get_chk_bytes()
588
chkmap = CHKMap(store, None)
589
# Should fit 2 keys per LeafNode
590
chkmap._root_node.set_maximum_size(40)
591
chkmap.map(('aaaaaaaa',), 'v')
592
chkmap.map(('aaaaabaa',), 'v')
593
self.assertEqualDiff("'' LeafNode\n"
594
" ('aaaaaaaa',) 'v'\n"
595
" ('aaaaabaa',) 'v'\n",
597
chkmap.map(('aaabaaaa',), 'v')
598
chkmap.map(('aaababaa',), 'v')
599
self.assertEqualDiff("'' InternalNode\n"
601
" ('aaaaaaaa',) 'v'\n"
602
" ('aaaaabaa',) 'v'\n"
604
" ('aaabaaaa',) 'v'\n"
605
" ('aaababaa',) 'v'\n",
607
chkmap.map(('aaabacaa',), 'v')
608
chkmap.map(('aaabadaa',), 'v')
609
self.assertEqualDiff("'' InternalNode\n"
611
" ('aaaaaaaa',) 'v'\n"
612
" ('aaaaabaa',) 'v'\n"
613
" 'aaab' InternalNode\n"
614
" 'aaabaa' LeafNode\n"
615
" ('aaabaaaa',) 'v'\n"
616
" 'aaabab' LeafNode\n"
617
" ('aaababaa',) 'v'\n"
618
" 'aaabac' LeafNode\n"
619
" ('aaabacaa',) 'v'\n"
620
" 'aaabad' LeafNode\n"
621
" ('aaabadaa',) 'v'\n",
623
chkmap.map(('aaababba',), 'val')
624
chkmap.map(('aaababca',), 'val')
625
self.assertEqualDiff("'' InternalNode\n"
627
" ('aaaaaaaa',) 'v'\n"
628
" ('aaaaabaa',) 'v'\n"
629
" 'aaab' InternalNode\n"
630
" 'aaabaa' LeafNode\n"
631
" ('aaabaaaa',) 'v'\n"
632
" 'aaabab' InternalNode\n"
633
" 'aaababa' LeafNode\n"
634
" ('aaababaa',) 'v'\n"
635
" 'aaababb' LeafNode\n"
636
" ('aaababba',) 'val'\n"
637
" 'aaababc' LeafNode\n"
638
" ('aaababca',) 'val'\n"
639
" 'aaabac' LeafNode\n"
640
" ('aaabacaa',) 'v'\n"
641
" 'aaabad' LeafNode\n"
642
" ('aaabadaa',) 'v'\n",
644
# Now we add a node that should fit around an existing InternalNode,
645
# but has a slightly different key prefix, which causes a new
647
chkmap.map(('aaabDaaa',), 'v')
648
self.assertEqualDiff("'' InternalNode\n"
650
" ('aaaaaaaa',) 'v'\n"
651
" ('aaaaabaa',) 'v'\n"
652
" 'aaab' InternalNode\n"
653
" 'aaabD' LeafNode\n"
654
" ('aaabDaaa',) 'v'\n"
655
" 'aaaba' InternalNode\n"
656
" 'aaabaa' LeafNode\n"
657
" ('aaabaaaa',) 'v'\n"
658
" 'aaabab' InternalNode\n"
659
" 'aaababa' LeafNode\n"
660
" ('aaababaa',) 'v'\n"
661
" 'aaababb' LeafNode\n"
662
" ('aaababba',) 'val'\n"
663
" 'aaababc' LeafNode\n"
664
" ('aaababca',) 'val'\n"
665
" 'aaabac' LeafNode\n"
666
" ('aaabacaa',) 'v'\n"
667
" 'aaabad' LeafNode\n"
668
" ('aaabadaa',) 'v'\n",
671
def test_map_collapses_if_size_changes(self):
672
store = self.get_chk_bytes()
673
chkmap = CHKMap(store, None)
674
# Should fit 2 keys per LeafNode
675
chkmap._root_node.set_maximum_size(35)
676
chkmap.map(('aaa',), 'v')
677
chkmap.map(('aab',), 'very long value that splits')
678
self.assertEqualDiff("'' InternalNode\n"
682
" ('aab',) 'very long value that splits'\n",
684
self.assertCanonicalForm(chkmap)
685
# Now changing the value to something small should cause a rebuild
686
chkmap.map(('aab',), 'v')
687
self.assertEqualDiff("'' LeafNode\n"
691
self.assertCanonicalForm(chkmap)
693
def test_map_double_deep_collapses(self):
694
store = self.get_chk_bytes()
695
chkmap = CHKMap(store, None)
696
# Should fit 3 small keys per LeafNode
697
chkmap._root_node.set_maximum_size(40)
698
chkmap.map(('aaa',), 'v')
699
chkmap.map(('aab',), 'very long value that splits')
700
chkmap.map(('abc',), 'v')
701
self.assertEqualDiff("'' InternalNode\n"
702
" 'aa' InternalNode\n"
706
" ('aab',) 'very long value that splits'\n"
710
chkmap.map(('aab',), 'v')
711
self.assertCanonicalForm(chkmap)
712
self.assertEqualDiff("'' LeafNode\n"
718
def test_stable_unmap(self):
719
store = self.get_chk_bytes()
720
chkmap = CHKMap(store, None)
721
# Should fit 2 keys per LeafNode
722
chkmap._root_node.set_maximum_size(35)
723
chkmap.map(('aaa',), 'v')
724
chkmap.map(('aab',), 'v')
725
self.assertEqualDiff("'' LeafNode\n"
729
# Creates a new internal node, and splits the others into leaves
730
chkmap.map(('aac',), 'v')
731
self.assertEqualDiff("'' InternalNode\n"
739
self.assertCanonicalForm(chkmap)
740
# Now lets unmap one of the keys, and assert that we collapse the
742
chkmap.unmap(('aac',))
743
self.assertEqualDiff("'' LeafNode\n"
747
self.assertCanonicalForm(chkmap)
749
def test_unmap_double_deep(self):
750
store = self.get_chk_bytes()
751
chkmap = CHKMap(store, None)
752
# Should fit 3 keys per LeafNode
753
chkmap._root_node.set_maximum_size(40)
754
chkmap.map(('aaa',), 'v')
755
chkmap.map(('aaab',), 'v')
756
chkmap.map(('aab',), 'very long value')
757
chkmap.map(('abc',), 'v')
758
self.assertEqualDiff("'' InternalNode\n"
759
" 'aa' InternalNode\n"
764
" ('aab',) 'very long value'\n"
768
# Removing the 'aab' key should cause everything to collapse back to a
770
chkmap.unmap(('aab',))
771
self.assertEqualDiff("'' LeafNode\n"
777
def test_unmap_double_deep_non_empty_leaf(self):
778
store = self.get_chk_bytes()
779
chkmap = CHKMap(store, None)
780
# Should fit 3 keys per LeafNode
781
chkmap._root_node.set_maximum_size(40)
782
chkmap.map(('aaa',), 'v')
783
chkmap.map(('aab',), 'long value')
784
chkmap.map(('aabb',), 'v')
785
chkmap.map(('abc',), 'v')
786
self.assertEqualDiff("'' InternalNode\n"
787
" 'aa' InternalNode\n"
791
" ('aab',) 'long value'\n"
796
# Removing the 'aab' key should cause everything to collapse back to a
798
chkmap.unmap(('aab',))
799
self.assertEqualDiff("'' LeafNode\n"
805
def test_unmap_with_known_internal_node_doesnt_page(self):
806
store = self.get_chk_bytes()
807
chkmap = CHKMap(store, None)
808
# Should fit 3 keys per LeafNode
809
chkmap._root_node.set_maximum_size(30)
810
chkmap.map(('aaa',), 'v')
811
chkmap.map(('aab',), 'v')
812
chkmap.map(('aac',), 'v')
813
chkmap.map(('abc',), 'v')
814
chkmap.map(('acd',), 'v')
815
self.assertEqualDiff("'' InternalNode\n"
816
" 'aa' InternalNode\n"
828
# Save everything to the map, and start over
829
chkmap = CHKMap(store, chkmap._save())
830
# Mapping an 'aa' key loads the internal node, but should not map the
831
# 'ab' and 'ac' nodes
832
chkmap.map(('aad',), 'v')
833
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
834
self.assertIsInstance(chkmap._root_node._items['ab'], (tuple, chk_map._key_type))
835
self.assertIsInstance(chkmap._root_node._items['ac'], (tuple, chk_map._key_type))
836
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
838
chkmap.unmap(('acd',))
839
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
840
self.assertIsInstance(chkmap._root_node._items['ab'], (tuple, chk_map._key_type))
842
def test_unmap_without_fitting_doesnt_page_in(self):
843
store = self.get_chk_bytes()
844
chkmap = CHKMap(store, None)
845
# Should fit 2 keys per LeafNode
846
chkmap._root_node.set_maximum_size(20)
847
chkmap.map(('aaa',), 'v')
848
chkmap.map(('aab',), 'v')
849
self.assertEqualDiff("'' InternalNode\n"
855
# Save everything to the map, and start over
856
chkmap = CHKMap(store, chkmap._save())
857
chkmap.map(('aac',), 'v')
858
chkmap.map(('aad',), 'v')
859
chkmap.map(('aae',), 'v')
860
chkmap.map(('aaf',), 'v')
861
# At this point, the previous nodes should not be paged in, but the
862
# newly added nodes would be
863
self.assertIsInstance(chkmap._root_node._items['aaa'], (tuple,
865
self.assertIsInstance(chkmap._root_node._items['aab'], (tuple, chk_map._key_type))
866
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
867
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
868
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
869
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
870
# Now unmapping one of the new nodes will use only the already-paged-in
871
# nodes to determine that we don't need to do more.
872
chkmap.unmap(('aaf',))
873
self.assertIsInstance(chkmap._root_node._items['aaa'], (tuple, chk_map._key_type))
874
self.assertIsInstance(chkmap._root_node._items['aab'], (tuple, chk_map._key_type))
875
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
876
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
877
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
879
def test_unmap_pages_in_if_necessary(self):
880
store = self.get_chk_bytes()
881
chkmap = CHKMap(store, None)
882
# Should fit 2 keys per LeafNode
883
chkmap._root_node.set_maximum_size(30)
884
chkmap.map(('aaa',), 'val')
885
chkmap.map(('aab',), 'val')
886
chkmap.map(('aac',), 'val')
887
self.assertEqualDiff("'' InternalNode\n"
895
root_key = chkmap._save()
896
# Save everything to the map, and start over
897
chkmap = CHKMap(store, root_key)
898
chkmap.map(('aad',), 'v')
899
# At this point, the previous nodes should not be paged in, but the
900
# newly added node would be
901
self.assertIsInstance(chkmap._root_node._items['aaa'], (tuple,
903
self.assertIsInstance(chkmap._root_node._items['aab'], (tuple,
905
self.assertIsInstance(chkmap._root_node._items['aac'], (tuple,
907
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
908
# Unmapping the new node will check the existing nodes to see if they
910
# Clear the page cache so we ensure we have to read all the children
911
chk_map._page_cache.clear()
912
chkmap.unmap(('aad',))
913
self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
914
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
915
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
917
def test_unmap_pages_in_from_page_cache(self):
918
store = self.get_chk_bytes()
919
chkmap = CHKMap(store, None)
920
# Should fit 2 keys per LeafNode
921
chkmap._root_node.set_maximum_size(30)
922
chkmap.map(('aaa',), 'val')
923
chkmap.map(('aab',), 'val')
924
chkmap.map(('aac',), 'val')
925
root_key = chkmap._save()
926
# Save everything to the map, and start over
927
chkmap = CHKMap(store, root_key)
928
chkmap.map(('aad',), 'val')
929
self.assertEqualDiff("'' InternalNode\n"
939
# Save everything to the map, start over after _dump_tree
940
chkmap = CHKMap(store, root_key)
941
chkmap.map(('aad',), 'v')
942
# At this point, the previous nodes should not be paged in, but the
943
# newly added node would be
944
self.assertIsInstance(chkmap._root_node._items['aaa'], (tuple,
946
self.assertIsInstance(chkmap._root_node._items['aab'], (tuple,
948
self.assertIsInstance(chkmap._root_node._items['aac'], (tuple,
950
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
951
# Now clear the page cache, and only include 2 of the children in the
953
aab_key = chkmap._root_node._items['aab']
954
aab_bytes = chk_map._page_cache[aab_key]
955
aac_key = chkmap._root_node._items['aac']
956
aac_bytes = chk_map._page_cache[aac_key]
957
chk_map._page_cache.clear()
958
chk_map._page_cache[aab_key] = aab_bytes
959
chk_map._page_cache[aac_key] = aac_bytes
961
# Unmapping the new node will check the nodes from the page cache
962
# first, and not have to read in 'aaa'
963
chkmap.unmap(('aad',))
964
self.assertIsInstance(chkmap._root_node._items['aaa'], (tuple,
966
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
967
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
969
def test_unmap_uses_existing_items(self):
970
store = self.get_chk_bytes()
971
chkmap = CHKMap(store, None)
972
# Should fit 2 keys per LeafNode
973
chkmap._root_node.set_maximum_size(30)
974
chkmap.map(('aaa',), 'val')
975
chkmap.map(('aab',), 'val')
976
chkmap.map(('aac',), 'val')
977
root_key = chkmap._save()
978
# Save everything to the map, and start over
979
chkmap = CHKMap(store, root_key)
980
chkmap.map(('aad',), 'val')
981
chkmap.map(('aae',), 'val')
982
chkmap.map(('aaf',), 'val')
983
# At this point, the previous nodes should not be paged in, but the
984
# newly added node would be
985
self.assertIsInstance(chkmap._root_node._items['aaa'], (tuple,
987
self.assertIsInstance(chkmap._root_node._items['aab'], (tuple,
989
self.assertIsInstance(chkmap._root_node._items['aac'], (tuple,
991
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
992
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
993
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
995
# Unmapping a new node will see the other nodes that are already in
996
# memory, and not need to page in anything else
997
chkmap.unmap(('aad',))
998
self.assertIsInstance(chkmap._root_node._items['aaa'], (tuple,
1000
self.assertIsInstance(chkmap._root_node._items['aab'], (tuple,
1002
self.assertIsInstance(chkmap._root_node._items['aac'], (tuple,
1004
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
1005
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
1007
def test_iter_changes_empty_ab(self):
1008
# Asking for changes between an empty dict to a dict with keys returns
1010
basis = self._get_map({}, maximum_size=10)
1011
target = self._get_map(
1012
{('a',): 'content here', ('b',): 'more content'},
1013
chk_bytes=basis._store, maximum_size=10)
1014
self.assertEqual([(('a',), None, 'content here'),
1015
(('b',), None, 'more content')],
1016
sorted(list(target.iter_changes(basis))))
1018
def test_iter_changes_ab_empty(self):
1019
# Asking for changes between a dict with keys to an empty dict returns
1021
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1023
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1024
self.assertEqual([(('a',), 'content here', None),
1025
(('b',), 'more content', None)],
1026
sorted(list(target.iter_changes(basis))))
1028
def test_iter_changes_empty_empty_is_empty(self):
1029
basis = self._get_map({}, maximum_size=10)
1030
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1031
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1033
def test_iter_changes_ab_ab_is_empty(self):
1034
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1036
target = self._get_map(
1037
{('a',): 'content here', ('b',): 'more content'},
1038
chk_bytes=basis._store, maximum_size=10)
1039
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1041
def test_iter_changes_ab_ab_nodes_not_loaded(self):
1042
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1044
target = self._get_map(
1045
{('a',): 'content here', ('b',): 'more content'},
1046
chk_bytes=basis._store, maximum_size=10)
1047
list(target.iter_changes(basis))
1048
self.assertIsInstance(target._root_node, (tuple, chk_map._key_type))
1049
self.assertIsInstance(basis._root_node, (tuple, chk_map._key_type))
1051
def test_iter_changes_ab_ab_changed_values_shown(self):
1052
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1054
target = self._get_map(
1055
{('a',): 'content here', ('b',): 'different content'},
1056
chk_bytes=basis._store, maximum_size=10)
1057
result = sorted(list(target.iter_changes(basis)))
1058
self.assertEqual([(('b',), 'more content', 'different content')],
1061
def test_iter_changes_mixed_node_length(self):
1062
# When one side has different node lengths than the other, common
1063
# but different keys still need to be show, and new-and-old included
1065
# aaa - common unaltered
1066
# aab - common altered
1070
# aaa to be not loaded (later test)
1071
# aab, b, at to be returned.
1072
# basis splits at byte 0,1,2, aaa is commonb is basis only
1073
basis_dict = {('aaa',): 'foo bar',
1074
('aab',): 'common altered a', ('b',): 'foo bar b'}
1075
# target splits at byte 1,2, at is target only
1076
target_dict = {('aaa',): 'foo bar',
1077
('aab',): 'common altered b', ('at',): 'foo bar t'}
1079
(('aab',), 'common altered a', 'common altered b'),
1080
(('at',), None, 'foo bar t'),
1081
(('b',), 'foo bar b', None),
1083
basis = self._get_map(basis_dict, maximum_size=10)
1084
target = self._get_map(target_dict, maximum_size=10,
1085
chk_bytes=basis._store)
1086
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1088
def test_iter_changes_common_pages_not_loaded(self):
1089
# aaa - common unaltered
1090
# aab - common altered
1094
# aaa to be not loaded
1095
# aaa not to be in result.
1096
basis_dict = {('aaa',): 'foo bar',
1097
('aab',): 'common altered a', ('b',): 'foo bar b'}
1098
# target splits at byte 1, at is target only
1099
target_dict = {('aaa',): 'foo bar',
1100
('aab',): 'common altered b', ('at',): 'foo bar t'}
1101
basis = self._get_map(basis_dict, maximum_size=10)
1102
target = self._get_map(target_dict, maximum_size=10,
1103
chk_bytes=basis._store)
1104
basis_get = basis._store.get_record_stream
1105
def get_record_stream(keys, order, fulltext):
1106
if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
1107
self.fail("'aaa' pointer was followed %r" % keys)
1108
return basis_get(keys, order, fulltext)
1109
basis._store.get_record_stream = get_record_stream
1110
result = sorted(list(target.iter_changes(basis)))
1111
for change in result:
1112
if change[0] == ('aaa',):
1113
self.fail("Found unexpected change: %s" % change)
1115
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
1116
# Within a leaf there are no hash's to exclude keys, make sure multi
1117
# value leaf nodes are handled well.
1118
basis_dict = {('aaa',): 'foo bar',
1119
('aab',): 'common altered a', ('b',): 'foo bar b'}
1120
target_dict = {('aaa',): 'foo bar',
1121
('aab',): 'common altered b', ('at',): 'foo bar t'}
1123
(('aab',), 'common altered a', 'common altered b'),
1124
(('at',), None, 'foo bar t'),
1125
(('b',), 'foo bar b', None),
1127
basis = self._get_map(basis_dict)
1128
target = self._get_map(target_dict, chk_bytes=basis._store)
1129
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1131
def test_iteritems_empty(self):
1132
chk_bytes = self.get_chk_bytes()
1133
root_key = CHKMap.from_dict(chk_bytes, {})
1134
chkmap = CHKMap(chk_bytes, root_key)
1135
self.assertEqual([], list(chkmap.iteritems()))
1137
def test_iteritems_two_items(self):
1138
chk_bytes = self.get_chk_bytes()
1139
root_key = CHKMap.from_dict(chk_bytes,
1140
{"a":"content here", "b":"more content"})
1141
chkmap = CHKMap(chk_bytes, root_key)
1142
self.assertEqual([(("a",), "content here"), (("b",), "more content")],
1143
sorted(list(chkmap.iteritems())))
1145
def test_iteritems_selected_one_of_two_items(self):
1146
chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
1147
self.assertEqual({("a",): "content here"},
1148
self.to_dict(chkmap, [("a",)]))
1150
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
1151
chkmap = self._get_map(
1152
{("a","a"):"content here", ("a", "b",):"more content",
1153
("b", ""): 'boring content'},
1154
maximum_size=10, key_width=2)
1156
{("a", "a"): "content here", ("a", "b"): 'more content'},
1157
self.to_dict(chkmap, [("a",)]))
1159
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
1160
search_key_func = chk_map.search_key_registry.get('hash-16-way')
1161
self.assertEqual('E8B7BE43\x00E8B7BE43', search_key_func(('a', 'a')))
1162
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
1163
self.assertEqual('71BEEFF9\x0000000000', search_key_func(('b', '')))
1164
chkmap = self._get_map(
1165
{("a","a"):"content here", ("a", "b",):"more content",
1166
("b", ""): 'boring content'},
1167
maximum_size=10, key_width=2, search_key_func=search_key_func)
1169
{("a", "a"): "content here", ("a", "b"): 'more content'},
1170
self.to_dict(chkmap, [("a",)]))
1172
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
1173
chkmap = self._get_map(
1174
{("a","a"):"content here", ("a", "b",):"more content",
1175
("b", ""): 'boring content'}, key_width=2)
1177
{("a", "a"): "content here", ("a", "b"): 'more content'},
1178
self.to_dict(chkmap, [("a",)]))
1180
def test___len__empty(self):
1181
chkmap = self._get_map({})
1182
self.assertEqual(0, len(chkmap))
1184
def test___len__2(self):
1185
chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"})
1186
self.assertEqual(2, len(chkmap))
1188
def test_max_size_100_bytes_new(self):
1189
# When there is a 100 byte upper node limit, a tree is formed.
1190
chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
1191
# We expect three nodes:
1192
# A root, with two children, and with two key prefixes - k1 to one, and
1193
# k2 to the other as our node splitting is only just being developed.
1194
# The maximum size should be embedded
1195
chkmap._ensure_root()
1196
self.assertEqual(100, chkmap._root_node.maximum_size)
1197
self.assertEqual(1, chkmap._root_node._key_width)
1198
# There should be two child nodes, and prefix of 2(bytes):
1199
self.assertEqual(2, len(chkmap._root_node._items))
1200
self.assertEqual("k", chkmap._root_node._compute_search_prefix())
1201
# The actual nodes pointed at will change as serialisers change; so
1202
# here we test that the key prefix is correct; then load the nodes and
1203
# check they have the right pointed at key; whether they have the
1204
# pointed at value inline or not is also unrelated to this test so we
1205
# don't check that in detail - rather we just check the aggregate
1207
nodes = sorted(chkmap._root_node._items.items())
1210
self.assertEqual('k1', ptr1[0])
1211
self.assertEqual('k2', ptr2[0])
1212
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
1213
self.assertIsInstance(node1, LeafNode)
1214
self.assertEqual(1, len(node1))
1215
self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
1216
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
1217
self.assertIsInstance(node2, LeafNode)
1218
self.assertEqual(1, len(node2))
1219
self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
1220
# Having checked we have a good structure, check that the content is
1222
self.assertEqual(2, len(chkmap))
1223
self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
1224
self.to_dict(chkmap))
1226
def test_init_root_is_LeafNode_new(self):
1227
chk_bytes = self.get_chk_bytes()
1228
chkmap = CHKMap(chk_bytes, None)
1229
self.assertIsInstance(chkmap._root_node, LeafNode)
1230
self.assertEqual({}, self.to_dict(chkmap))
1231
self.assertEqual(0, len(chkmap))
1233
def test_init_and_save_new(self):
1234
chk_bytes = self.get_chk_bytes()
1235
chkmap = CHKMap(chk_bytes, None)
1236
key = chkmap._save()
1237
leaf_node = LeafNode()
1238
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1240
def test_map_first_item_new(self):
1241
chk_bytes = self.get_chk_bytes()
1242
chkmap = CHKMap(chk_bytes, None)
1243
chkmap.map(("foo,",), "bar")
1244
self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
1245
self.assertEqual(1, len(chkmap))
1246
key = chkmap._save()
1247
leaf_node = LeafNode()
1248
leaf_node.map(chk_bytes, ("foo,",), "bar")
1249
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1251
def test_unmap_last_item_root_is_leaf_new(self):
1252
chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
1253
chkmap.unmap(("k1"*50,))
1254
chkmap.unmap(("k2"*50,))
1255
self.assertEqual(0, len(chkmap))
1256
self.assertEqual({}, self.to_dict(chkmap))
1257
key = chkmap._save()
1258
leaf_node = LeafNode()
1259
self.assertEqual([key], leaf_node.serialise(chkmap._store))
1261
def test__dump_tree(self):
1262
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2",
1263
("bbb",): "value3",},
1265
self.assertEqualDiff("'' InternalNode\n"
1266
" 'a' InternalNode\n"
1268
" ('aaa',) 'value1'\n"
1270
" ('aab',) 'value2'\n"
1272
" ('bbb',) 'value3'\n",
1273
chkmap._dump_tree())
1274
self.assertEqualDiff("'' InternalNode\n"
1275
" 'a' InternalNode\n"
1277
" ('aaa',) 'value1'\n"
1279
" ('aab',) 'value2'\n"
1281
" ('bbb',) 'value3'\n",
1282
chkmap._dump_tree())
1283
self.assertEqualDiff(
1284
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1285
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1286
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1287
" ('aaa',) 'value1'\n"
1288
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1289
" ('aab',) 'value2'\n"
1290
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1291
" ('bbb',) 'value3'\n",
1292
chkmap._dump_tree(include_keys=True))
1294
def test__dump_tree_in_progress(self):
1295
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"},
1297
chkmap.map(('bbb',), 'value3')
1298
self.assertEqualDiff("'' InternalNode\n"
1299
" 'a' InternalNode\n"
1301
" ('aaa',) 'value1'\n"
1303
" ('aab',) 'value2'\n"
1305
" ('bbb',) 'value3'\n",
1306
chkmap._dump_tree())
1307
# For things that are updated by adding 'bbb', we don't have a sha key
1308
# for them yet, so they are listed as None
1309
self.assertEqualDiff(
1310
"'' InternalNode None\n"
1311
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1312
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1313
" ('aaa',) 'value1'\n"
1314
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1315
" ('aab',) 'value2'\n"
1316
" 'b' LeafNode None\n"
1317
" ('bbb',) 'value3'\n",
1318
chkmap._dump_tree(include_keys=True))
1321
def _search_key_single(key):
1322
"""A search key function that maps all nodes to the same value"""
1325
def _test_search_key(key):
1326
return 'test:' + '\x00'.join(key)
1329
class TestMapSearchKeys(TestCaseWithStore):
1331
def test_default_chk_map_uses_flat_search_key(self):
1332
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1333
self.assertEqual('1',
1334
chkmap._search_key_func(('1',)))
1335
self.assertEqual('1\x002',
1336
chkmap._search_key_func(('1', '2')))
1337
self.assertEqual('1\x002\x003',
1338
chkmap._search_key_func(('1', '2', '3')))
1340
def test_search_key_is_passed_to_root_node(self):
1341
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1342
search_key_func=_test_search_key)
1343
self.assertIs(_test_search_key, chkmap._search_key_func)
1344
self.assertEqual('test:1\x002\x003',
1345
chkmap._search_key_func(('1', '2', '3')))
1346
self.assertEqual('test:1\x002\x003',
1347
chkmap._root_node._search_key(('1', '2', '3')))
1349
def test_search_key_passed_via__ensure_root(self):
1350
chk_bytes = self.get_chk_bytes()
1351
chkmap = chk_map.CHKMap(chk_bytes, None,
1352
search_key_func=_test_search_key)
1353
root_key = chkmap._save()
1354
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1355
search_key_func=_test_search_key)
1356
chkmap._ensure_root()
1357
self.assertEqual('test:1\x002\x003',
1358
chkmap._root_node._search_key(('1', '2', '3')))
1360
def test_search_key_with_internal_node(self):
1361
chk_bytes = self.get_chk_bytes()
1362
chkmap = chk_map.CHKMap(chk_bytes, None,
1363
search_key_func=_test_search_key)
1364
chkmap._root_node.set_maximum_size(10)
1365
chkmap.map(('1',), 'foo')
1366
chkmap.map(('2',), 'bar')
1367
chkmap.map(('3',), 'baz')
1368
self.assertEqualDiff("'' InternalNode\n"
1369
" 'test:1' LeafNode\n"
1371
" 'test:2' LeafNode\n"
1373
" 'test:3' LeafNode\n"
1375
, chkmap._dump_tree())
1376
root_key = chkmap._save()
1377
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1378
search_key_func=_test_search_key)
1379
self.assertEqualDiff("'' InternalNode\n"
1380
" 'test:1' LeafNode\n"
1382
" 'test:2' LeafNode\n"
1384
" 'test:3' LeafNode\n"
1386
, chkmap._dump_tree())
1388
def test_search_key_16(self):
1389
chk_bytes = self.get_chk_bytes()
1390
chkmap = chk_map.CHKMap(chk_bytes, None,
1391
search_key_func=chk_map._search_key_16)
1392
chkmap._root_node.set_maximum_size(10)
1393
chkmap.map(('1',), 'foo')
1394
chkmap.map(('2',), 'bar')
1395
chkmap.map(('3',), 'baz')
1396
self.assertEqualDiff("'' InternalNode\n"
1403
, chkmap._dump_tree())
1404
root_key = chkmap._save()
1405
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1406
search_key_func=chk_map._search_key_16)
1407
# We can get the values back correctly
1408
self.assertEqual([(('1',), 'foo')],
1409
list(chkmap.iteritems([('1',)])))
1410
self.assertEqualDiff("'' InternalNode\n"
1417
, chkmap._dump_tree())
1419
def test_search_key_255(self):
1420
chk_bytes = self.get_chk_bytes()
1421
chkmap = chk_map.CHKMap(chk_bytes, None,
1422
search_key_func=chk_map._search_key_255)
1423
chkmap._root_node.set_maximum_size(10)
1424
chkmap.map(('1',), 'foo')
1425
chkmap.map(('2',), 'bar')
1426
chkmap.map(('3',), 'baz')
1427
self.assertEqualDiff("'' InternalNode\n"
1428
" '\\x1a' LeafNode\n"
1432
" '\\x83' LeafNode\n"
1434
, chkmap._dump_tree())
1435
root_key = chkmap._save()
1436
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1437
search_key_func=chk_map._search_key_255)
1438
# We can get the values back correctly
1439
self.assertEqual([(('1',), 'foo')],
1440
list(chkmap.iteritems([('1',)])))
1441
self.assertEqualDiff("'' InternalNode\n"
1442
" '\\x1a' LeafNode\n"
1446
" '\\x83' LeafNode\n"
1448
, chkmap._dump_tree())
1450
def test_search_key_collisions(self):
1451
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1452
search_key_func=_search_key_single)
1453
# The node will want to expand, but it cannot, because it knows that
1454
# all the keys must map to this node
1455
chkmap._root_node.set_maximum_size(20)
1456
chkmap.map(('1',), 'foo')
1457
chkmap.map(('2',), 'bar')
1458
chkmap.map(('3',), 'baz')
1459
self.assertEqualDiff("'' LeafNode\n"
1463
, chkmap._dump_tree())
1466
class TestSearchKeyFuncs(tests.TestCase):
1468
def assertSearchKey16(self, expected, key):
1469
self.assertEqual(expected, chk_map._search_key_16(key))
1471
def assertSearchKey255(self, expected, key):
1472
actual = chk_map._search_key_255(key)
1473
self.assertEqual(expected, actual, 'actual: %r' % (actual,))
1475
def test_simple_16(self):
1476
self.assertSearchKey16('8C736521', ('foo',))
1477
self.assertSearchKey16('8C736521\x008C736521', ('foo', 'foo'))
1478
self.assertSearchKey16('8C736521\x0076FF8CAA', ('foo', 'bar'))
1479
self.assertSearchKey16('ED82CD11', ('abcd',))
1481
def test_simple_255(self):
1482
self.assertSearchKey255('\x8cse!', ('foo',))
1483
self.assertSearchKey255('\x8cse!\x00\x8cse!', ('foo', 'foo'))
1484
self.assertSearchKey255('\x8cse!\x00v\xff\x8c\xaa', ('foo', 'bar'))
1485
# The standard mapping for these would include '\n', so it should be
1487
self.assertSearchKey255('\xfdm\x93_\x00P_\x1bL', ('<', 'V'))
1489
def test_255_does_not_include_newline(self):
1490
# When mapping via _search_key_255, we should never have the '\n'
1491
# character, but all other 255 values should be present
1493
for char_in in range(256):
1494
search_key = chk_map._search_key_255((chr(char_in),))
1495
chars_used.update(search_key)
1496
all_chars = set([chr(x) for x in range(256)])
1497
unused_chars = all_chars.symmetric_difference(chars_used)
1498
self.assertEqual(set('\n'), unused_chars)
1501
class TestLeafNode(TestCaseWithStore):
1503
def test_current_size_empty(self):
1505
self.assertEqual(16, node._current_size())
1507
def test_current_size_size_changed(self):
1509
node.set_maximum_size(10)
1510
self.assertEqual(17, node._current_size())
1512
def test_current_size_width_changed(self):
1514
node._key_width = 10
1515
self.assertEqual(17, node._current_size())
1517
def test_current_size_items(self):
1519
base_size = node._current_size()
1520
node.map(None, ("foo bar",), "baz")
1521
self.assertEqual(base_size + 14, node._current_size())
1523
def test_deserialise_empty(self):
1524
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1525
self.assertEqual(0, len(node))
1526
self.assertEqual(10, node.maximum_size)
1527
self.assertEqual(("sha1:1234",), node.key())
1528
self.assertIs(None, node._search_prefix)
1529
self.assertIs(None, node._common_serialised_prefix)
1531
def test_deserialise_items(self):
1532
node = LeafNode.deserialise(
1533
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1535
self.assertEqual(2, len(node))
1536
self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
1537
sorted(node.iteritems(None)))
1539
def test_deserialise_item_with_null_width_1(self):
1540
node = LeafNode.deserialise(
1541
"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1543
self.assertEqual(2, len(node))
1544
self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
1545
sorted(node.iteritems(None)))
1547
def test_deserialise_item_with_null_width_2(self):
1548
node = LeafNode.deserialise(
1549
"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1550
"quux\x00\x001\nblarh\n",
1552
self.assertEqual(2, len(node))
1553
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
1554
sorted(node.iteritems(None)))
1556
def test_iteritems_selected_one_of_two_items(self):
1557
node = LeafNode.deserialise(
1558
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1560
self.assertEqual(2, len(node))
1561
self.assertEqual([(("quux",), "blarh")],
1562
sorted(node.iteritems(None, [("quux",), ("qaz",)])))
1564
def test_deserialise_item_with_common_prefix(self):
1565
node = LeafNode.deserialise(
1566
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1568
self.assertEqual(2, len(node))
1569
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")],
1570
sorted(node.iteritems(None)))
1571
self.assertIs(chk_map._unknown, node._search_prefix)
1572
self.assertEqual('foo\x00', node._common_serialised_prefix)
1574
def test_deserialise_multi_line(self):
1575
node = LeafNode.deserialise(
1576
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1578
self.assertEqual(2, len(node))
1579
self.assertEqual([(("foo", "1"), "bar\nbaz"),
1580
(("foo", "2"), "blarh\n"),
1581
], sorted(node.iteritems(None)))
1582
self.assertIs(chk_map._unknown, node._search_prefix)
1583
self.assertEqual('foo\x00', node._common_serialised_prefix)
1585
def test_key_new(self):
1587
self.assertEqual(None, node.key())
1589
def test_key_after_map(self):
1590
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1591
node.map(None, ("foo bar",), "baz quux")
1592
self.assertEqual(None, node.key())
1594
def test_key_after_unmap(self):
1595
node = LeafNode.deserialise(
1596
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1598
node.unmap(None, ("foo bar",))
1599
self.assertEqual(None, node.key())
1601
def test_map_exceeding_max_size_only_entry_new(self):
1603
node.set_maximum_size(10)
1604
result = node.map(None, ("foo bar",), "baz quux")
1605
self.assertEqual(("foo bar", [("", node)]), result)
1606
self.assertTrue(10 < node._current_size())
1608
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1610
node.set_maximum_size(10)
1611
node.map(None, ("foo bar",), "baz quux")
1612
prefix, result = list(node.map(None, ("blue",), "red"))
1613
self.assertEqual("", prefix)
1614
self.assertEqual(2, len(result))
1615
split_chars = set([result[0][0], result[1][0]])
1616
self.assertEqual(set(["f", "b"]), split_chars)
1617
nodes = dict(result)
1619
self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
1620
self.assertEqual(10, node.maximum_size)
1621
self.assertEqual(1, node._key_width)
1623
self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
1624
self.assertEqual(10, node.maximum_size)
1625
self.assertEqual(1, node._key_width)
1627
def test_map_first(self):
1629
result = node.map(None, ("foo bar",), "baz quux")
1630
self.assertEqual(("foo bar", [("", node)]), result)
1631
self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
1632
self.assertEqual(1, len(node))
1634
def test_map_second(self):
1636
node.map(None, ("foo bar",), "baz quux")
1637
result = node.map(None, ("bingo",), "bango")
1638
self.assertEqual(("", [("", node)]), result)
1639
self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
1640
self.to_dict(node, None))
1641
self.assertEqual(2, len(node))
1643
def test_map_replacement(self):
1645
node.map(None, ("foo bar",), "baz quux")
1646
result = node.map(None, ("foo bar",), "bango")
1647
self.assertEqual(("foo bar", [("", node)]), result)
1648
self.assertEqual({("foo bar",): "bango"},
1649
self.to_dict(node, None))
1650
self.assertEqual(1, len(node))
1652
def test_serialise_empty(self):
1653
store = self.get_chk_bytes()
1655
node.set_maximum_size(10)
1656
expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1657
self.assertEqual([expected_key],
1658
list(node.serialise(store)))
1659
self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
1660
self.assertEqual(expected_key, node.key())
1662
def test_serialise_items(self):
1663
store = self.get_chk_bytes()
1665
node.set_maximum_size(10)
1666
node.map(None, ("foo bar",), "baz quux")
1667
expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1668
self.assertEqual('foo bar', node._common_serialised_prefix)
1669
self.assertEqual([expected_key],
1670
list(node.serialise(store)))
1671
self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1672
self.read_bytes(store, expected_key))
1673
self.assertEqual(expected_key, node.key())
1675
def test_unique_serialised_prefix_empty_new(self):
1677
self.assertIs(None, node._compute_search_prefix())
1679
def test_unique_serialised_prefix_one_item_new(self):
1681
node.map(None, ("foo bar", "baz"), "baz quux")
1682
self.assertEqual("foo bar\x00baz", node._compute_search_prefix())
1684
def test_unmap_missing(self):
1686
self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
1688
def test_unmap_present(self):
1690
node.map(None, ("foo bar",), "baz quux")
1691
result = node.unmap(None, ("foo bar",))
1692
self.assertEqual(node, result)
1693
self.assertEqual({}, self.to_dict(node, None))
1694
self.assertEqual(0, len(node))
1696
def test_map_maintains_common_prefixes(self):
1699
node.map(None, ("foo bar", "baz"), "baz quux")
1700
self.assertEqual('foo bar\x00baz', node._search_prefix)
1701
self.assertEqual('foo bar\x00baz', node._common_serialised_prefix)
1702
node.map(None, ("foo bar", "bing"), "baz quux")
1703
self.assertEqual('foo bar\x00b', node._search_prefix)
1704
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1705
node.map(None, ("fool", "baby"), "baz quux")
1706
self.assertEqual('foo', node._search_prefix)
1707
self.assertEqual('foo', node._common_serialised_prefix)
1708
node.map(None, ("foo bar", "baz"), "replaced")
1709
self.assertEqual('foo', node._search_prefix)
1710
self.assertEqual('foo', node._common_serialised_prefix)
1711
node.map(None, ("very", "different"), "value")
1712
self.assertEqual('', node._search_prefix)
1713
self.assertEqual('', node._common_serialised_prefix)
1715
def test_unmap_maintains_common_prefixes(self):
1718
node.map(None, ("foo bar", "baz"), "baz quux")
1719
node.map(None, ("foo bar", "bing"), "baz quux")
1720
node.map(None, ("fool", "baby"), "baz quux")
1721
node.map(None, ("very", "different"), "value")
1722
self.assertEqual('', node._search_prefix)
1723
self.assertEqual('', node._common_serialised_prefix)
1724
node.unmap(None, ("very", "different"))
1725
self.assertEqual("foo", node._search_prefix)
1726
self.assertEqual("foo", node._common_serialised_prefix)
1727
node.unmap(None, ("fool", "baby"))
1728
self.assertEqual('foo bar\x00b', node._search_prefix)
1729
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1730
node.unmap(None, ("foo bar", "baz"))
1731
self.assertEqual('foo bar\x00bing', node._search_prefix)
1732
self.assertEqual('foo bar\x00bing', node._common_serialised_prefix)
1733
node.unmap(None, ("foo bar", "bing"))
1734
self.assertEqual(None, node._search_prefix)
1735
self.assertEqual(None, node._common_serialised_prefix)
1738
class TestInternalNode(TestCaseWithStore):
1740
def test_add_node_empty_new(self):
1741
node = InternalNode('fo')
1743
child.set_maximum_size(100)
1744
child.map(None, ("foo",), "bar")
1745
node.add_node("foo", child)
1746
# Note that node isn't strictly valid now as a tree (only one child),
1747
# but thats ok for this test.
1748
# The first child defines the node's width:
1749
self.assertEqual(3, node._node_width)
1750
# We should be able to iterate over the contents without doing IO.
1751
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
1752
# The length should be known:
1753
self.assertEqual(1, len(node))
1754
# serialising the node should serialise the child and the node.
1755
chk_bytes = self.get_chk_bytes()
1756
keys = list(node.serialise(chk_bytes))
1757
child_key = child.serialise(chk_bytes)[0]
1759
[child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1761
# We should be able to access deserialised content.
1762
bytes = self.read_bytes(chk_bytes, keys[1])
1763
node = chk_map._deserialise(bytes, keys[1], None)
1764
self.assertEqual(1, len(node))
1765
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
1766
self.assertEqual(3, node._node_width)
1768
def test_add_node_resets_key_new(self):
1769
node = InternalNode('fo')
1771
child.set_maximum_size(100)
1772
child.map(None, ("foo",), "bar")
1773
node.add_node("foo", child)
1774
chk_bytes = self.get_chk_bytes()
1775
keys = list(node.serialise(chk_bytes))
1776
self.assertEqual(keys[1], node._key)
1777
node.add_node("fos", child)
1778
self.assertEqual(None, node._key)
1780
# def test_add_node_empty_oversized_one_ok_new(self):
1781
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1782
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1783
# def test_add_node_one_oversized_second_splits_errors(self):
1785
def test__iter_nodes_no_key_filter(self):
1786
node = InternalNode('')
1788
child.set_maximum_size(100)
1789
child.map(None, ("foo",), "bar")
1790
node.add_node("f", child)
1792
child.set_maximum_size(100)
1793
child.map(None, ("bar",), "baz")
1794
node.add_node("b", child)
1796
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1797
self.assertEqual(None, node_key_filter)
1799
def test__iter_nodes_splits_key_filter(self):
1800
node = InternalNode('')
1802
child.set_maximum_size(100)
1803
child.map(None, ("foo",), "bar")
1804
node.add_node("f", child)
1806
child.set_maximum_size(100)
1807
child.map(None, ("bar",), "baz")
1808
node.add_node("b", child)
1810
# foo and bar both match exactly one leaf node, but 'cat' should not
1811
# match any, and should not be placed in one.
1812
key_filter = (('foo',), ('bar',), ('cat',))
1813
for child, node_key_filter in node._iter_nodes(None,
1814
key_filter=key_filter):
1815
# each child could only match one key filter, so make sure it was
1817
self.assertEqual(1, len(node_key_filter))
1819
def test__iter_nodes_with_multiple_matches(self):
1820
node = InternalNode('')
1822
child.set_maximum_size(100)
1823
child.map(None, ("foo",), "val")
1824
child.map(None, ("fob",), "val")
1825
node.add_node("f", child)
1827
child.set_maximum_size(100)
1828
child.map(None, ("bar",), "val")
1829
child.map(None, ("baz",), "val")
1830
node.add_node("b", child)
1832
# Note that 'ram' doesn't match anything, so it should be freely
1834
key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
1835
for child, node_key_filter in node._iter_nodes(None,
1836
key_filter=key_filter):
1837
# each child could match two key filters, so make sure they were
1839
self.assertEqual(2, len(node_key_filter))
1841
def make_fo_fa_node(self):
1842
node = InternalNode('f')
1844
child.set_maximum_size(100)
1845
child.map(None, ("foo",), "val")
1846
child.map(None, ("fob",), "val")
1847
node.add_node('fo', child)
1849
child.set_maximum_size(100)
1850
child.map(None, ("far",), "val")
1851
child.map(None, ("faz",), "val")
1852
node.add_node("fa", child)
1855
def test__iter_nodes_single_entry(self):
1856
node = self.make_fo_fa_node()
1857
key_filter = [('foo',)]
1858
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1859
self.assertEqual(1, len(nodes))
1860
self.assertEqual(key_filter, nodes[0][1])
1862
def test__iter_nodes_single_entry_misses(self):
1863
node = self.make_fo_fa_node()
1864
key_filter = [('bar',)]
1865
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1866
self.assertEqual(0, len(nodes))
1868
def test__iter_nodes_mixed_key_width(self):
1869
node = self.make_fo_fa_node()
1870
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
1871
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1872
self.assertEqual(1, len(nodes))
1873
matches = key_filter[:]
1874
matches.remove(('b',))
1875
self.assertEqual(sorted(matches), sorted(nodes[0][1]))
1877
def test__iter_nodes_match_all(self):
1878
node = self.make_fo_fa_node()
1879
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
1880
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1881
self.assertEqual(2, len(nodes))
1883
def test__iter_nodes_fixed_widths_and_misses(self):
1884
node = self.make_fo_fa_node()
1885
# foo and faa should both match one child, baz should miss
1886
key_filter = [('foo',), ('faa',), ('baz',)]
1887
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1888
self.assertEqual(2, len(nodes))
1889
for node, matches in nodes:
1890
self.assertEqual(1, len(matches))
1892
def test_iteritems_empty_new(self):
1893
node = InternalNode()
1894
self.assertEqual([], sorted(node.iteritems(None)))
1896
def test_iteritems_two_children(self):
1897
node = InternalNode()
1899
leaf1.map(None, ('foo bar',), 'quux')
1901
leaf2.map(None, ('strange',), 'beast')
1902
node.add_node("f", leaf1)
1903
node.add_node("s", leaf2)
1904
self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
1905
sorted(node.iteritems(None)))
1907
def test_iteritems_two_children_partial(self):
1908
node = InternalNode()
1910
leaf1.map(None, ('foo bar',), 'quux')
1912
leaf2.map(None, ('strange',), 'beast')
1913
node.add_node("f", leaf1)
1914
# This sets up a path that should not be followed - it will error if
1915
# the code tries to.
1916
node._items['f'] = None
1917
node.add_node("s", leaf2)
1918
self.assertEqual([(('strange',), 'beast')],
1919
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1921
def test_iteritems_two_children_with_hash(self):
1922
search_key_func = chk_map.search_key_registry.get('hash-255-way')
1923
node = InternalNode(search_key_func=search_key_func)
1924
leaf1 = LeafNode(search_key_func=search_key_func)
1925
leaf1.map(None, ('foo bar',), 'quux')
1926
leaf2 = LeafNode(search_key_func=search_key_func)
1927
leaf2.map(None, ('strange',), 'beast')
1928
self.assertEqual('\xbeF\x014', search_key_func(('foo bar',)))
1929
self.assertEqual('\x85\xfa\xf7K', search_key_func(('strange',)))
1930
node.add_node("\xbe", leaf1)
1931
# This sets up a path that should not be followed - it will error if
1932
# the code tries to.
1933
node._items['\xbe'] = None
1934
node.add_node("\x85", leaf2)
1935
self.assertEqual([(('strange',), 'beast')],
1936
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1938
def test_iteritems_partial_empty(self):
1939
node = InternalNode()
1940
self.assertEqual([], sorted(node.iteritems([('missing',)])))
1942
def test_map_to_new_child_new(self):
1943
chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
1944
chkmap._ensure_root()
1945
node = chkmap._root_node
1946
# Ensure test validity: nothing paged in below the root.
1948
len([value for value in node._items.values()
1949
if type(value) in (tuple, chk_map._key_type)]))
1950
# now, mapping to k3 should add a k3 leaf
1951
prefix, nodes = node.map(None, ('k3',), 'quux')
1952
self.assertEqual("k", prefix)
1953
self.assertEqual([("", node)], nodes)
1954
# check new child details
1955
child = node._items['k3']
1956
self.assertIsInstance(child, LeafNode)
1957
self.assertEqual(1, len(child))
1958
self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
1959
self.assertEqual(None, child._key)
1960
self.assertEqual(10, child.maximum_size)
1961
self.assertEqual(1, child._key_width)
1962
# Check overall structure:
1963
self.assertEqual(3, len(chkmap))
1964
self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
1965
self.to_dict(chkmap))
1966
# serialising should only serialise the new data - k3 and the internal
1968
keys = list(node.serialise(chkmap._store))
1969
child_key = child.serialise(chkmap._store)[0]
1970
self.assertEqual([child_key, keys[1]], keys)
1972
def test_map_to_child_child_splits_new(self):
1973
chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
1974
# Check for the canonical root value for this tree:
1975
self.assertEqualDiff("'' InternalNode\n"
1980
, chkmap._dump_tree())
1981
# _dump_tree pages everything in, so reload using just the root
1982
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1983
chkmap._ensure_root()
1984
node = chkmap._root_node
1985
# Ensure test validity: nothing paged in below the root.
1987
len([value for value in node._items.values()
1988
if type(value) in (tuple, chk_map._key_type)]))
1989
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1990
# k23, which for simplicity in the current implementation generates
1991
# a new internal node between node, and k22/k23.
1992
prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
1993
self.assertEqual("k", prefix)
1994
self.assertEqual([("", node)], nodes)
1995
# check new child details
1996
child = node._items['k2']
1997
self.assertIsInstance(child, InternalNode)
1998
self.assertEqual(2, len(child))
1999
self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
2000
self.to_dict(child, None))
2001
self.assertEqual(None, child._key)
2002
self.assertEqual(10, child.maximum_size)
2003
self.assertEqual(1, child._key_width)
2004
self.assertEqual(3, child._node_width)
2005
# Check overall structure:
2006
self.assertEqual(3, len(chkmap))
2007
self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
2008
self.to_dict(chkmap))
2009
# serialising should only serialise the new data - although k22 hasn't
2010
# changed because its a special corner case (splitting on with only one
2011
# key leaves one node unaltered), in general k22 is serialised, so we
2012
# expect k22, k23, the new internal node, and node, to be serialised.
2013
keys = list(node.serialise(chkmap._store))
2014
child_key = child._key
2015
k22_key = child._items['k22']._key
2016
k23_key = child._items['k23']._key
2017
self.assertEqual([k22_key, k23_key, child_key, node.key()], keys)
2018
self.assertEqualDiff("'' InternalNode\n"
2021
" 'k2' InternalNode\n"
2025
" ('k23',) 'quux'\n"
2026
, chkmap._dump_tree())
2028
def test__search_prefix_filter_with_hash(self):
2029
search_key_func = chk_map.search_key_registry.get('hash-16-way')
2030
node = InternalNode(search_key_func=search_key_func)
2032
node._node_width = 4
2033
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
2034
self.assertEqual('E8B7', node._search_prefix_filter(('a', 'b')))
2035
self.assertEqual('E8B7', node._search_prefix_filter(('a',)))
2037
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
2038
chkmap = self._get_map(
2039
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2040
# Check we have the expected tree.
2041
self.assertEqualDiff("'' InternalNode\n"
2044
" 'k2' InternalNode\n"
2048
" ('k23',) 'quux'\n"
2049
, chkmap._dump_tree())
2050
chkmap = CHKMap(chkmap._store, chkmap._root_node)
2051
chkmap._ensure_root()
2052
node = chkmap._root_node
2053
# unmapping k23 should give us a root, with k1 and k22 as direct
2055
result = node.unmap(chkmap._store, ('k23',))
2056
# check the pointed-at object within node - k2 should now point at the
2057
# k22 leaf (which has been paged in to see if we can collapse the tree)
2058
child = node._items['k2']
2059
self.assertIsInstance(child, LeafNode)
2060
self.assertEqual(1, len(child))
2061
self.assertEqual({('k22',): 'bar'},
2062
self.to_dict(child, None))
2063
# Check overall structure is instact:
2064
self.assertEqual(2, len(chkmap))
2065
self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
2066
self.to_dict(chkmap))
2067
# serialising should only serialise the new data - the root node.
2068
keys = list(node.serialise(chkmap._store))
2069
self.assertEqual([keys[-1]], keys)
2070
chkmap = CHKMap(chkmap._store, keys[-1])
2071
self.assertEqualDiff("'' InternalNode\n"
2076
, chkmap._dump_tree())
2078
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
2079
chkmap = self._get_map(
2080
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2081
self.assertEqualDiff("'' InternalNode\n"
2084
" 'k2' InternalNode\n"
2088
" ('k23',) 'quux'\n"
2089
, chkmap._dump_tree())
2090
orig_root = chkmap._root_node
2091
chkmap = CHKMap(chkmap._store, orig_root)
2092
chkmap._ensure_root()
2093
node = chkmap._root_node
2094
k2_ptr = node._items['k2']
2095
# unmapping k1 should give us a root, with k22 and k23 as direct
2096
# children, and should not have needed to page in the subtree.
2097
result = node.unmap(chkmap._store, ('k1',))
2098
self.assertEqual(k2_ptr, result)
2099
chkmap = CHKMap(chkmap._store, orig_root)
2100
# Unmapping at the CHKMap level should switch to the new root
2101
chkmap.unmap(('k1',))
2102
self.assertEqual(k2_ptr, chkmap._root_node)
2103
self.assertEqualDiff("'' InternalNode\n"
2107
" ('k23',) 'quux'\n"
2108
, chkmap._dump_tree())
2112
# map -> fits - done
2113
# map -> doesn't fit - shrink from left till fits
2114
# key data to return: the common prefix, new nodes.
2116
# unmap -> how to tell if siblings can be combined.
2117
# combing leaf nodes means expanding the prefix to the left; so gather the size of
2118
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
2119
# is an internal node, we know that that is a dense subtree - can't combine.
2120
# otherwise as soon as the sum of serialised values exceeds the split threshold
2121
# we know we can't combine - stop.
2122
# unmap -> key return data - space in node, common prefix length? and key count
2124
# variable length prefixes? -> later start with fixed width to get something going
2125
# map -> fits - update pointer to leaf
2126
# return [prefix and node] - seems sound.
2127
# map -> doesn't fit - find unique prefix and shift right
2128
# create internal nodes for all the partitions, return list of unique
2129
# prefixes and nodes.
2130
# map -> new prefix - create a leaf
2131
# unmap -> if child key count 0, remove
2132
# unmap -> return space in node, common prefix length? (why?), key count
2134
# map, if 1 node returned, use it, otherwise make an internal and populate.
2135
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
2137
# map inits as empty leafnode.
2143
# AA, AB, AC, AD, BA
2144
# packed internal node - ideal:
2145
# AA, AB, AC, AD, BA
2146
# single byte fanout - A,B, AA,AB,AC,AD, BA
2149
# AB - split, but we want to end up with AB, BA, in one node, with
2153
class TestCHKMapDifference(TestCaseWithExampleMaps):
2155
def get_difference(self, new_roots, old_roots,
2156
search_key_func=None):
2157
if search_key_func is None:
2158
search_key_func = chk_map._search_key_plain
2159
return chk_map.CHKMapDifference(self.get_chk_bytes(),
2160
new_roots, old_roots, search_key_func)
2162
def test__init__(self):
2163
c_map = self.make_root_only_map()
2165
c_map.map(('aaa',), 'new aaa content')
2166
key2 = c_map._save()
2167
diff = self.get_difference([key2], [key1])
2168
self.assertEqual(set([key1]), diff._all_old_chks)
2169
self.assertEqual([], diff._old_queue)
2170
self.assertEqual([], diff._new_queue)
2172
def help__read_all_roots(self, search_key_func):
2173
c_map = self.make_root_only_map(search_key_func=search_key_func)
2175
c_map.map(('aaa',), 'new aaa content')
2176
key2 = c_map._save()
2177
diff = self.get_difference([key2], [key1], search_key_func)
2178
root_results = [record.key for record in diff._read_all_roots()]
2179
self.assertEqual([key2], root_results)
2180
# We should have queued up only items that aren't in the old
2182
self.assertEqual([(('aaa',), 'new aaa content')],
2183
diff._new_item_queue)
2184
self.assertEqual([], diff._new_queue)
2185
# And there are no old references, so that queue should be
2187
self.assertEqual([], diff._old_queue)
2189
def test__read_all_roots_plain(self):
2190
self.help__read_all_roots(search_key_func=chk_map._search_key_plain)
2192
def test__read_all_roots_16(self):
2193
self.help__read_all_roots(search_key_func=chk_map._search_key_16)
2195
def test__read_all_roots_skips_known_old(self):
2196
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2198
c_map2 = self.make_root_only_map(chk_map._search_key_plain)
2200
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2201
root_results = [record.key for record in diff._read_all_roots()]
2202
# We should have no results. key2 is completely contained within key1,
2203
# and we should have seen that in the first pass
2204
self.assertEqual([], root_results)
2206
def test__read_all_roots_prepares_queues(self):
2207
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2209
c_map._dump_tree() # load everything
2210
key1_a = c_map._root_node._items['a'].key()
2211
c_map.map(('abb',), 'new abb content')
2212
key2 = c_map._save()
2213
key2_a = c_map._root_node._items['a'].key()
2214
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2215
root_results = [record.key for record in diff._read_all_roots()]
2216
self.assertEqual([key2], root_results)
2217
# At this point, we should have queued up only the 'a' Leaf on both
2218
# sides, both 'c' and 'd' are known to not have changed on both sides
2219
self.assertEqual([key2_a], diff._new_queue)
2220
self.assertEqual([], diff._new_item_queue)
2221
self.assertEqual([key1_a], diff._old_queue)
2223
def test__read_all_roots_multi_new_prepares_queues(self):
2224
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2226
c_map._dump_tree() # load everything
2227
key1_a = c_map._root_node._items['a'].key()
2228
key1_c = c_map._root_node._items['c'].key()
2229
c_map.map(('abb',), 'new abb content')
2230
key2 = c_map._save()
2231
key2_a = c_map._root_node._items['a'].key()
2232
key2_c = c_map._root_node._items['c'].key()
2233
c_map = chk_map.CHKMap(self.get_chk_bytes(), key1,
2234
chk_map._search_key_plain)
2235
c_map.map(('ccc',), 'new ccc content')
2236
key3 = c_map._save()
2237
key3_a = c_map._root_node._items['a'].key()
2238
key3_c = c_map._root_node._items['c'].key()
2239
diff = self.get_difference([key2, key3], [key1],
2240
chk_map._search_key_plain)
2241
root_results = [record.key for record in diff._read_all_roots()]
2242
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2243
# We should have queued up key2_a, and key3_c, but not key2_c or key3_c
2244
self.assertEqual([key2_a, key3_c], diff._new_queue)
2245
self.assertEqual([], diff._new_item_queue)
2246
# And we should have queued up both a and c for the old set
2247
self.assertEqual([key1_a, key1_c], diff._old_queue)
2249
def test__read_all_roots_different_depths(self):
2250
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2251
c_map._dump_tree() # load everything
2253
key1_a = c_map._root_node._items['a'].key()
2254
key1_c = c_map._root_node._items['c'].key()
2255
key1_d = c_map._root_node._items['d'].key()
2257
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2260
key2_aa = c_map2._root_node._items['aa'].key()
2261
key2_ad = c_map2._root_node._items['ad'].key()
2263
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2264
root_results = [record.key for record in diff._read_all_roots()]
2265
self.assertEqual([key2], root_results)
2266
# Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
2268
self.assertEqual([key1_a], diff._old_queue)
2269
self.assertEqual([key2_aa, key2_ad], diff._new_queue)
2270
self.assertEqual([], diff._new_item_queue)
2272
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2273
root_results = [record.key for record in diff._read_all_roots()]
2274
self.assertEqual([key1], root_results)
2276
self.assertEqual([key2_aa, key2_ad], diff._old_queue)
2277
self.assertEqual([key1_a, key1_c, key1_d], diff._new_queue)
2278
self.assertEqual([], diff._new_item_queue)
2280
def test__read_all_roots_different_depths_16(self):
2281
c_map = self.make_two_deep_map(chk_map._search_key_16)
2282
c_map._dump_tree() # load everything
2284
key1_2 = c_map._root_node._items['2'].key()
2285
key1_4 = c_map._root_node._items['4'].key()
2286
key1_C = c_map._root_node._items['C'].key()
2287
key1_F = c_map._root_node._items['F'].key()
2289
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_16)
2292
key2_F0 = c_map2._root_node._items['F0'].key()
2293
key2_F3 = c_map2._root_node._items['F3'].key()
2294
key2_F4 = c_map2._root_node._items['F4'].key()
2295
key2_FD = c_map2._root_node._items['FD'].key()
2297
diff = self.get_difference([key2], [key1], chk_map._search_key_16)
2298
root_results = [record.key for record in diff._read_all_roots()]
2299
self.assertEqual([key2], root_results)
2300
# Only the subset of keys that may be present should be queued up.
2301
self.assertEqual([key1_F], diff._old_queue)
2302
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2303
sorted(diff._new_queue))
2304
self.assertEqual([], diff._new_item_queue)
2306
diff = self.get_difference([key1], [key2], chk_map._search_key_16)
2307
root_results = [record.key for record in diff._read_all_roots()]
2308
self.assertEqual([key1], root_results)
2310
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2311
sorted(diff._old_queue))
2312
self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
2313
sorted(diff._new_queue))
2314
self.assertEqual([], diff._new_item_queue)
2316
def test__read_all_roots_mixed_depth(self):
2317
c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2318
c_map._dump_tree() # load everything
2320
key1_aa = c_map._root_node._items['aa'].key()
2321
key1_ad = c_map._root_node._items['ad'].key()
2323
c_map2 = self.make_one_deep_one_prefix_map(chk_map._search_key_plain)
2326
key2_a = c_map2._root_node._items['a'].key()
2327
key2_b = c_map2._root_node._items['b'].key()
2329
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2330
root_results = [record.key for record in diff._read_all_roots()]
2331
self.assertEqual([key2], root_results)
2332
# 'ad' matches exactly 'a' on the other side, so it should be removed,
2333
# and neither side should have it queued for walking
2334
self.assertEqual([], diff._old_queue)
2335
self.assertEqual([key2_b], diff._new_queue)
2336
self.assertEqual([], diff._new_item_queue)
2338
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2339
root_results = [record.key for record in diff._read_all_roots()]
2340
self.assertEqual([key1], root_results)
2341
# Note: This is technically not the 'true minimal' set that we could
2342
# use The reason is that 'a' was matched exactly to 'ad' (by sha
2343
# sum). However, the code gets complicated in the case of more
2344
# than one interesting key, so for now, we live with this
2345
# Consider revising, though benchmarking showing it to be a
2346
# real-world issue should be done
2347
self.assertEqual([key2_a], diff._old_queue)
2348
# self.assertEqual([], diff._old_queue)
2349
self.assertEqual([key1_aa], diff._new_queue)
2350
self.assertEqual([], diff._new_item_queue)
2352
def test__read_all_roots_yields_extra_deep_records(self):
2353
# This is slightly controversial, as we will yield a chk page that we
2354
# might later on find out could be filtered out. (If a root node is
2355
# referenced deeper in the old set.)
2356
# However, even with stacking, we always have all chk pages that we
2357
# will need. So as long as we filter out the referenced keys, we'll
2358
# never run into problems.
2359
# This allows us to yield a root node record immediately, without any
2361
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2362
c_map._dump_tree() # load all keys
2364
key1_a = c_map._root_node._items['a'].key()
2365
c_map2 = self.get_map({
2366
('acc',): 'initial acc content',
2367
('ace',): 'initial ace content',
2368
}, maximum_size=100)
2369
self.assertEqualDiff(
2371
" ('acc',) 'initial acc content'\n"
2372
" ('ace',) 'initial ace content'\n",
2373
c_map2._dump_tree())
2375
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2376
root_results = [record.key for record in diff._read_all_roots()]
2377
self.assertEqual([key2], root_results)
2378
# However, even though we have yielded the root node to be fetched,
2379
# we should have enqued all of the chk pages to be walked, so that we
2380
# can find the keys if they are present
2381
self.assertEqual([key1_a], diff._old_queue)
2382
self.assertEqual([(('acc',), 'initial acc content'),
2383
(('ace',), 'initial ace content'),
2384
], diff._new_item_queue)
2386
def test__read_all_roots_multiple_targets(self):
2387
c_map = self.make_root_only_map()
2389
c_map = self.make_one_deep_map()
2392
key2_c = c_map._root_node._items['c'].key()
2393
key2_d = c_map._root_node._items['d'].key()
2394
c_map.map(('ccc',), 'new ccc value')
2395
key3 = c_map._save()
2396
key3_c = c_map._root_node._items['c'].key()
2397
diff = self.get_difference([key2, key3], [key1],
2398
chk_map._search_key_plain)
2399
root_results = [record.key for record in diff._read_all_roots()]
2400
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2401
self.assertEqual([], diff._old_queue)
2402
# the key 'd' is interesting from key2 and key3, but should only be
2403
# entered into the queue 1 time
2404
self.assertEqual(sorted([key2_c, key3_c, key2_d]),
2405
sorted(diff._new_queue))
2406
self.assertEqual([], diff._new_item_queue)
2408
def test__read_all_roots_no_old(self):
2409
# This is the 'initial branch' case. With nothing in the old
2410
# set, we can just queue up all root nodes into interesting queue, and
2411
# then have them fast-path flushed via _flush_new_queue
2412
c_map = self.make_two_deep_map()
2414
diff = self.get_difference([key1], [], chk_map._search_key_plain)
2415
root_results = [record.key for record in diff._read_all_roots()]
2416
self.assertEqual([], root_results)
2417
self.assertEqual([], diff._old_queue)
2418
self.assertEqual([key1], diff._new_queue)
2419
self.assertEqual([], diff._new_item_queue)
2421
c_map2 = self.make_one_deep_map()
2423
diff = self.get_difference([key1, key2], [], chk_map._search_key_plain)
2424
root_results = [record.key for record in diff._read_all_roots()]
2425
self.assertEqual([], root_results)
2426
self.assertEqual([], diff._old_queue)
2427
self.assertEqual(sorted([key1, key2]), sorted(diff._new_queue))
2428
self.assertEqual([], diff._new_item_queue)
2430
def test__read_all_roots_no_old_16(self):
2431
c_map = self.make_two_deep_map(chk_map._search_key_16)
2433
diff = self.get_difference([key1], [], chk_map._search_key_16)
2434
root_results = [record.key for record in diff._read_all_roots()]
2435
self.assertEqual([], root_results)
2436
self.assertEqual([], diff._old_queue)
2437
self.assertEqual([key1], diff._new_queue)
2438
self.assertEqual([], diff._new_item_queue)
2440
c_map2 = self.make_one_deep_map(chk_map._search_key_16)
2442
diff = self.get_difference([key1, key2], [],
2443
chk_map._search_key_16)
2444
root_results = [record.key for record in diff._read_all_roots()]
2445
self.assertEqual([], root_results)
2446
self.assertEqual([], diff._old_queue)
2447
self.assertEqual(sorted([key1, key2]),
2448
sorted(diff._new_queue))
2449
self.assertEqual([], diff._new_item_queue)
2451
def test__read_all_roots_multiple_old(self):
2452
c_map = self.make_two_deep_map()
2454
c_map._dump_tree() # load everything
2455
key1_a = c_map._root_node._items['a'].key()
2456
c_map.map(('ccc',), 'new ccc value')
2457
key2 = c_map._save()
2458
key2_a = c_map._root_node._items['a'].key()
2459
c_map.map(('add',), 'new add value')
2460
key3 = c_map._save()
2461
key3_a = c_map._root_node._items['a'].key()
2462
diff = self.get_difference([key3], [key1, key2],
2463
chk_map._search_key_plain)
2464
root_results = [record.key for record in diff._read_all_roots()]
2465
self.assertEqual([key3], root_results)
2466
# the 'a' keys should not be queued up 2 times, since they are
2468
self.assertEqual([key1_a], diff._old_queue)
2469
self.assertEqual([key3_a], diff._new_queue)
2470
self.assertEqual([], diff._new_item_queue)
2472
def test__process_next_old_batched_no_dupes(self):
2473
c_map = self.make_two_deep_map()
2475
c_map._dump_tree() # load everything
2476
key1_a = c_map._root_node._items['a'].key()
2477
key1_aa = c_map._root_node._items['a']._items['aa'].key()
2478
key1_ab = c_map._root_node._items['a']._items['ab'].key()
2479
key1_ac = c_map._root_node._items['a']._items['ac'].key()
2480
key1_ad = c_map._root_node._items['a']._items['ad'].key()
2481
c_map.map(('aaa',), 'new aaa value')
2482
key2 = c_map._save()
2483
key2_a = c_map._root_node._items['a'].key()
2484
key2_aa = c_map._root_node._items['a']._items['aa'].key()
2485
c_map.map(('acc',), 'new acc content')
2486
key3 = c_map._save()
2487
key3_a = c_map._root_node._items['a'].key()
2488
key3_ac = c_map._root_node._items['a']._items['ac'].key()
2489
diff = self.get_difference([key3], [key1, key2],
2490
chk_map._search_key_plain)
2491
root_results = [record.key for record in diff._read_all_roots()]
2492
self.assertEqual([key3], root_results)
2493
self.assertEqual(sorted([key1_a, key2_a]),
2494
sorted(diff._old_queue))
2495
self.assertEqual([key3_a], diff._new_queue)
2496
self.assertEqual([], diff._new_item_queue)
2497
diff._process_next_old()
2498
# All of the old records should be brought in and queued up,
2499
# but we should not have any duplicates
2500
self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
2501
sorted(diff._old_queue))
2504
class TestIterInterestingNodes(TestCaseWithExampleMaps):
2506
def get_map_key(self, a_dict, maximum_size=10):
2507
c_map = self.get_map(a_dict, maximum_size=maximum_size)
2510
def assertIterInteresting(self, records, items, interesting_keys,
2512
"""Check the result of iter_interesting_nodes.
2514
Note that we no longer care how many steps are taken, etc, just that
2515
the right contents are returned.
2517
:param records: A list of record keys that should be yielded
2518
:param items: A list of items (key,value) that should be yielded.
2520
store = self.get_chk_bytes()
2521
store._search_key_func = chk_map._search_key_plain
2522
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
2526
for record, new_items in iter_nodes:
2527
if record is not None:
2528
record_keys.append(record.key)
2530
all_items.extend(new_items)
2531
self.assertEqual(sorted(records), sorted(record_keys))
2532
self.assertEqual(sorted(items), sorted(all_items))
2534
def test_empty_to_one_keys(self):
2535
target = self.get_map_key({('a',): 'content'})
2536
self.assertIterInteresting([target],
2537
[(('a',), 'content')],
2540
def test_none_to_one_key(self):
2541
basis = self.get_map_key({})
2542
target = self.get_map_key({('a',): 'content'})
2543
self.assertIterInteresting([target],
2544
[(('a',), 'content')],
2547
def test_one_to_none_key(self):
2548
basis = self.get_map_key({('a',): 'content'})
2549
target = self.get_map_key({})
2550
self.assertIterInteresting([target],
2554
def test_common_pages(self):
2555
basis = self.get_map_key({('a',): 'content',
2559
target = self.get_map_key({('a',): 'content',
2560
('b',): 'other content',
2563
target_map = CHKMap(self.get_chk_bytes(), target)
2564
self.assertEqualDiff(
2567
" ('a',) 'content'\n"
2569
" ('b',) 'other content'\n"
2571
" ('c',) 'content'\n",
2572
target_map._dump_tree())
2573
b_key = target_map._root_node._items['b'].key()
2574
# This should return the root node, and the node for the 'b' key
2575
self.assertIterInteresting([target, b_key],
2576
[(('b',), 'other content')],
2579
def test_common_sub_page(self):
2580
basis = self.get_map_key({('aaa',): 'common',
2583
target = self.get_map_key({('aaa',): 'common',
2587
target_map = CHKMap(self.get_chk_bytes(), target)
2588
self.assertEqualDiff(
2590
" 'a' InternalNode\n"
2592
" ('aaa',) 'common'\n"
2596
" ('c',) 'common'\n",
2597
target_map._dump_tree())
2598
# The key for the internal aa node
2599
a_key = target_map._root_node._items['a'].key()
2600
# The key for the leaf aab node
2601
# aaa_key = target_map._root_node._items['a']._items['aaa'].key()
2602
aab_key = target_map._root_node._items['a']._items['aab'].key()
2603
self.assertIterInteresting([target, a_key, aab_key],
2604
[(('aab',), 'new')],
2607
def test_common_leaf(self):
2608
basis = self.get_map_key({})
2609
target1 = self.get_map_key({('aaa',): 'common'})
2610
target2 = self.get_map_key({('aaa',): 'common',
2613
target3 = self.get_map_key({('aaa',): 'common',
2617
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
2618
# Once as a root node, once as a second layer, and once as a third
2619
# layer. It should only be returned one time regardless
2620
target1_map = CHKMap(self.get_chk_bytes(), target1)
2621
self.assertEqualDiff(
2623
" ('aaa',) 'common'\n",
2624
target1_map._dump_tree())
2625
target2_map = CHKMap(self.get_chk_bytes(), target2)
2626
self.assertEqualDiff(
2629
" ('aaa',) 'common'\n"
2631
" ('bbb',) 'new'\n",
2632
target2_map._dump_tree())
2633
target3_map = CHKMap(self.get_chk_bytes(), target3)
2634
self.assertEqualDiff(
2636
" 'a' InternalNode\n"
2638
" ('aaa',) 'common'\n"
2640
" ('aac',) 'other'\n"
2642
" ('bbb',) 'new'\n",
2643
target3_map._dump_tree())
2644
aaa_key = target1_map._root_node.key()
2645
b_key = target2_map._root_node._items['b'].key()
2646
a_key = target3_map._root_node._items['a'].key()
2647
aac_key = target3_map._root_node._items['a']._items['aac'].key()
2648
self.assertIterInteresting(
2649
[target1, target2, target3, a_key, aac_key, b_key],
2650
[(('aaa',), 'common'), (('bbb',), 'new'), (('aac',), 'other')],
2651
[target1, target2, target3], [basis])
2653
self.assertIterInteresting(
2654
[target2, target3, a_key, aac_key, b_key],
2655
[(('bbb',), 'new'), (('aac',), 'other')],
2656
[target2, target3], [target1])
2658
# Technically, target1 could be filtered out, but since it is a root
2659
# node, we yield it immediately, rather than waiting to find out much
2661
self.assertIterInteresting(
2664
[target1], [target3])
2666
def test_multiple_maps(self):
2667
basis1 = self.get_map_key({('aaa',): 'common',
2670
basis2 = self.get_map_key({('bbb',): 'common',
2673
target1 = self.get_map_key({('aaa',): 'common',
2674
('aac',): 'target1',
2677
target2 = self.get_map_key({('aaa',): 'common',
2678
('bba',): 'target2',
2681
target1_map = CHKMap(self.get_chk_bytes(), target1)
2682
self.assertEqualDiff(
2684
" 'a' InternalNode\n"
2686
" ('aaa',) 'common'\n"
2688
" ('aac',) 'target1'\n"
2690
" ('bbb',) 'common'\n",
2691
target1_map._dump_tree())
2692
# The key for the target1 internal a node
2693
a_key = target1_map._root_node._items['a'].key()
2694
# The key for the leaf aac node
2695
aac_key = target1_map._root_node._items['a']._items['aac'].key()
2697
target2_map = CHKMap(self.get_chk_bytes(), target2)
2698
self.assertEqualDiff(
2701
" ('aaa',) 'common'\n"
2702
" 'b' InternalNode\n"
2704
" ('bba',) 'target2'\n"
2706
" ('bbb',) 'common'\n",
2707
target2_map._dump_tree())
2708
# The key for the target2 internal bb node
2709
b_key = target2_map._root_node._items['b'].key()
2710
# The key for the leaf bba node
2711
bba_key = target2_map._root_node._items['b']._items['bba'].key()
2712
self.assertIterInteresting(
2713
[target1, target2, a_key, aac_key, b_key, bba_key],
2714
[(('aac',), 'target1'), (('bba',), 'target2')],
2715
[target1, target2], [basis1, basis2])
2717
def test_multiple_maps_overlapping_common_new(self):
2718
# Test that when a node found through the interesting_keys iteration
2719
# for *some roots* and also via the old keys iteration, that
2720
# it is still scanned for old refs and items, because its
2721
# not truely new. This requires 2 levels of InternalNodes to expose,
2722
# because of the way the bootstrap in _find_children_info works.
2723
# This suggests that the code is probably amenable to/benefit from
2725
# How does this test work?
2726
# 1) We need a second level InternalNode present in a basis tree.
2727
# 2) We need a left side new tree that uses that InternalNode
2728
# 3) We need a right side new tree that does not use that InternalNode
2729
# at all but that has an unchanged *value* that was reachable inside
2731
basis = self.get_map_key({
2732
# InternalNode, unchanged in left:
2735
# Forces an internalNode at 'a'
2738
left = self.get_map_key({
2739
# All of basis unchanged
2743
# And a new top level node so the root key is different
2746
right = self.get_map_key({
2747
# A value that is unchanged from basis and thus should be filtered
2751
basis_map = CHKMap(self.get_chk_bytes(), basis)
2752
self.assertEqualDiff(
2754
" 'a' InternalNode\n"
2756
" ('aaa',) 'left'\n"
2758
" ('abb',) 'right'\n"
2760
" ('ccc',) 'common'\n",
2761
basis_map._dump_tree())
2762
# Get left expected data
2763
left_map = CHKMap(self.get_chk_bytes(), left)
2764
self.assertEqualDiff(
2766
" 'a' InternalNode\n"
2768
" ('aaa',) 'left'\n"
2770
" ('abb',) 'right'\n"
2772
" ('ccc',) 'common'\n"
2774
" ('ddd',) 'change'\n",
2775
left_map._dump_tree())
2776
# Keys from left side target
2777
l_d_key = left_map._root_node._items['d'].key()
2778
# Get right expected data
2779
right_map = CHKMap(self.get_chk_bytes(), right)
2780
self.assertEqualDiff(
2782
" ('abb',) 'right'\n",
2783
right_map._dump_tree())
2784
# Keys from the right side target - none, the root is enough.
2786
self.assertIterInteresting(
2787
[right, left, l_d_key],
2788
[(('ddd',), 'change')],
2789
[left, right], [basis])
2791
def test_multiple_maps_similar(self):
2792
# We want to have a depth=2 tree, with multiple entries in each leaf
2794
basis = self.get_map_key({
2795
('aaa',): 'unchanged',
2796
('abb',): 'will change left',
2797
('caa',): 'unchanged',
2798
('cbb',): 'will change right',
2800
left = self.get_map_key({
2801
('aaa',): 'unchanged',
2802
('abb',): 'changed left',
2803
('caa',): 'unchanged',
2804
('cbb',): 'will change right',
2806
right = self.get_map_key({
2807
('aaa',): 'unchanged',
2808
('abb',): 'will change left',
2809
('caa',): 'unchanged',
2810
('cbb',): 'changed right',
2812
basis_map = CHKMap(self.get_chk_bytes(), basis)
2813
self.assertEqualDiff(
2816
" ('aaa',) 'unchanged'\n"
2817
" ('abb',) 'will change left'\n"
2819
" ('caa',) 'unchanged'\n"
2820
" ('cbb',) 'will change right'\n",
2821
basis_map._dump_tree())
2822
# Get left expected data
2823
left_map = CHKMap(self.get_chk_bytes(), left)
2824
self.assertEqualDiff(
2827
" ('aaa',) 'unchanged'\n"
2828
" ('abb',) 'changed left'\n"
2830
" ('caa',) 'unchanged'\n"
2831
" ('cbb',) 'will change right'\n",
2832
left_map._dump_tree())
2833
# Keys from left side target
2834
l_a_key = left_map._root_node._items['a'].key()
2835
l_c_key = left_map._root_node._items['c'].key()
2836
# Get right expected data
2837
right_map = CHKMap(self.get_chk_bytes(), right)
2838
self.assertEqualDiff(
2841
" ('aaa',) 'unchanged'\n"
2842
" ('abb',) 'will change left'\n"
2844
" ('caa',) 'unchanged'\n"
2845
" ('cbb',) 'changed right'\n",
2846
right_map._dump_tree())
2847
r_a_key = right_map._root_node._items['a'].key()
2848
r_c_key = right_map._root_node._items['c'].key()
2849
self.assertIterInteresting(
2850
[right, left, l_a_key, r_c_key],
2851
[(('abb',), 'changed left'), (('cbb',), 'changed right')],
2852
[left, right], [basis])