1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
19
from itertools import izip
28
from bzrlib.chk_map import (
36
class TestNode(tests.TestCase):
38
def assertCommonPrefix(self, expected_common, prefix, key):
39
common = Node.common_prefix(prefix, key)
40
self.assertTrue(len(common) <= len(prefix))
41
self.assertTrue(len(common) <= len(key))
42
self.assertStartsWith(prefix, common)
43
self.assertStartsWith(key, common)
44
self.assertEquals(expected_common, common)
46
def test_common_prefix(self):
47
self.assertCommonPrefix('beg', 'beg', 'begin')
49
def test_no_common_prefix(self):
50
self.assertCommonPrefix('', 'begin', 'end')
53
self.assertCommonPrefix('begin', 'begin', 'begin')
55
def test_not_a_prefix(self):
56
self.assertCommonPrefix('b', 'begin', 'b')
59
self.assertCommonPrefix('', '', 'end')
60
self.assertCommonPrefix('', 'begin', '')
61
self.assertCommonPrefix('', '', '')
64
class TestCaseWithStore(tests.TestCaseWithMemoryTransport):
66
def get_chk_bytes(self):
67
# This creates a standalone CHK store.
68
factory = groupcompress.make_pack_factory(False, False, 1)
69
self.chk_bytes = factory(self.get_transport())
72
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
73
search_key_func=None):
75
chk_bytes = self.get_chk_bytes()
76
root_key = CHKMap.from_dict(chk_bytes, a_dict,
77
maximum_size=maximum_size, key_width=key_width,
78
search_key_func=search_key_func)
79
root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
80
maximum_size=maximum_size, key_width=key_width,
81
search_key_func=search_key_func)
82
self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
83
" match CHKMap._create_via_map")
84
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
87
def read_bytes(self, chk_bytes, key):
88
stream = chk_bytes.get_record_stream([key], 'unordered', True)
89
record = stream.next()
90
if record.storage_kind == 'absent':
91
self.fail('Store does not contain the key %s' % (key,))
92
return record.get_bytes_as("fulltext")
94
def to_dict(self, node, *args):
95
return dict(node.iteritems(*args))
98
class TestCaseWithExampleMaps(TestCaseWithStore):
100
def get_chk_bytes(self):
101
if getattr(self, '_chk_bytes', None) is None:
102
self._chk_bytes = super(TestCaseWithExampleMaps,
103
self).get_chk_bytes()
104
return self._chk_bytes
106
def get_map(self, a_dict, maximum_size=100, search_key_func=None):
107
c_map = self._get_map(a_dict, maximum_size=maximum_size,
108
chk_bytes=self.get_chk_bytes(),
109
search_key_func=search_key_func)
112
def make_root_only_map(self, search_key_func=None):
113
return self.get_map({
114
('aaa',): 'initial aaa content',
115
('abb',): 'initial abb content',
116
}, search_key_func=search_key_func)
118
def make_root_only_aaa_ddd_map(self, search_key_func=None):
119
return self.get_map({
120
('aaa',): 'initial aaa content',
121
('ddd',): 'initial ddd content',
122
}, search_key_func=search_key_func)
124
def make_one_deep_map(self, search_key_func=None):
125
# Same as root_only_map, except it forces an InternalNode at the root
126
return self.get_map({
127
('aaa',): 'initial aaa content',
128
('abb',): 'initial abb content',
129
('ccc',): 'initial ccc content',
130
('ddd',): 'initial ddd content',
131
}, search_key_func=search_key_func)
133
def make_two_deep_map(self, search_key_func=None):
134
# Carefully chosen so that it creates a 2-deep map for both
135
# _search_key_plain and for _search_key_16
136
# Also so that things line up with make_one_deep_two_prefix_map
137
return self.get_map({
138
('aaa',): 'initial aaa content',
139
('abb',): 'initial abb content',
140
('acc',): 'initial acc content',
141
('ace',): 'initial ace content',
142
('add',): 'initial add content',
143
('adh',): 'initial adh content',
144
('adl',): 'initial adl content',
145
('ccc',): 'initial ccc content',
146
('ddd',): 'initial ddd content',
147
}, search_key_func=search_key_func)
149
def make_one_deep_two_prefix_map(self, search_key_func=None):
150
"""Create a map with one internal node, but references are extra long.
152
Otherwise has similar content to make_two_deep_map.
154
return self.get_map({
155
('aaa',): 'initial aaa content',
156
('add',): 'initial add content',
157
('adh',): 'initial adh content',
158
('adl',): 'initial adl content',
159
}, search_key_func=search_key_func)
161
def make_one_deep_one_prefix_map(self, search_key_func=None):
162
"""Create a map with one internal node, but references are extra long.
164
Similar to make_one_deep_two_prefix_map, except the split is at the
165
first char, rather than the second.
167
return self.get_map({
168
('add',): 'initial add content',
169
('adh',): 'initial adh content',
170
('adl',): 'initial adl content',
171
('bbb',): 'initial bbb content',
172
}, search_key_func=search_key_func)
175
class TestTestCaseWithExampleMaps(TestCaseWithExampleMaps):
176
"""Actual tests for the provided examples."""
178
def test_root_only_map_plain(self):
179
c_map = self.make_root_only_map()
180
self.assertEqualDiff(
182
" ('aaa',) 'initial aaa content'\n"
183
" ('abb',) 'initial abb content'\n",
186
def test_root_only_map_16(self):
187
c_map = self.make_root_only_map(search_key_func=chk_map._search_key_16)
188
self.assertEqualDiff(
190
" ('aaa',) 'initial aaa content'\n"
191
" ('abb',) 'initial abb content'\n",
194
def test_one_deep_map_plain(self):
195
c_map = self.make_one_deep_map()
196
self.assertEqualDiff(
199
" ('aaa',) 'initial aaa content'\n"
200
" ('abb',) 'initial abb content'\n"
202
" ('ccc',) 'initial ccc content'\n"
204
" ('ddd',) 'initial ddd content'\n",
207
def test_one_deep_map_16(self):
208
c_map = self.make_one_deep_map(search_key_func=chk_map._search_key_16)
209
self.assertEqualDiff(
212
" ('ccc',) 'initial ccc content'\n"
214
" ('abb',) 'initial abb content'\n"
216
" ('aaa',) 'initial aaa content'\n"
217
" ('ddd',) 'initial ddd content'\n",
220
def test_root_only_aaa_ddd_plain(self):
221
c_map = self.make_root_only_aaa_ddd_map()
222
self.assertEqualDiff(
224
" ('aaa',) 'initial aaa content'\n"
225
" ('ddd',) 'initial ddd content'\n",
228
def test_one_deep_map_16(self):
229
c_map = self.make_root_only_aaa_ddd_map(
230
search_key_func=chk_map._search_key_16)
231
# We use 'aaa' and 'ddd' because they happen to map to 'F' when using
233
self.assertEqualDiff(
235
" ('aaa',) 'initial aaa content'\n"
236
" ('ddd',) 'initial ddd content'\n",
239
def test_two_deep_map_plain(self):
240
c_map = self.make_two_deep_map()
241
self.assertEqualDiff(
243
" 'a' InternalNode\n"
245
" ('aaa',) 'initial aaa content'\n"
247
" ('abb',) 'initial abb content'\n"
249
" ('acc',) 'initial acc content'\n"
250
" ('ace',) 'initial ace content'\n"
252
" ('add',) 'initial add content'\n"
253
" ('adh',) 'initial adh content'\n"
254
" ('adl',) 'initial adl content'\n"
256
" ('ccc',) 'initial ccc content'\n"
258
" ('ddd',) 'initial ddd content'\n",
261
def test_two_deep_map_16(self):
262
c_map = self.make_two_deep_map(search_key_func=chk_map._search_key_16)
263
self.assertEqualDiff(
266
" ('acc',) 'initial acc content'\n"
267
" ('ccc',) 'initial ccc content'\n"
269
" ('abb',) 'initial abb content'\n"
271
" ('ace',) 'initial ace content'\n"
272
" 'F' InternalNode\n"
274
" ('aaa',) 'initial aaa content'\n"
276
" ('adl',) 'initial adl content'\n"
278
" ('adh',) 'initial adh content'\n"
280
" ('ddd',) 'initial ddd content'\n"
282
" ('add',) 'initial add content'\n",
285
def test_one_deep_two_prefix_map_plain(self):
286
c_map = self.make_one_deep_two_prefix_map()
287
self.assertEqualDiff(
290
" ('aaa',) 'initial aaa content'\n"
292
" ('add',) 'initial add content'\n"
293
" ('adh',) 'initial adh content'\n"
294
" ('adl',) 'initial adl content'\n",
297
def test_one_deep_two_prefix_map_16(self):
298
c_map = self.make_one_deep_two_prefix_map(
299
search_key_func=chk_map._search_key_16)
300
self.assertEqualDiff(
303
" ('aaa',) 'initial aaa content'\n"
305
" ('adl',) 'initial adl content'\n"
307
" ('adh',) 'initial adh content'\n"
309
" ('add',) 'initial add content'\n",
312
def test_one_deep_one_prefix_map_plain(self):
313
c_map = self.make_one_deep_one_prefix_map()
314
self.assertEqualDiff(
317
" ('add',) 'initial add content'\n"
318
" ('adh',) 'initial adh content'\n"
319
" ('adl',) 'initial adl content'\n"
321
" ('bbb',) 'initial bbb content'\n",
324
def test_one_deep_one_prefix_map_16(self):
325
c_map = self.make_one_deep_one_prefix_map(
326
search_key_func=chk_map._search_key_16)
327
self.assertEqualDiff(
330
" ('bbb',) 'initial bbb content'\n"
332
" ('add',) 'initial add content'\n"
333
" ('adh',) 'initial adh content'\n"
334
" ('adl',) 'initial adl content'\n",
338
class TestMap(TestCaseWithStore):
340
def assertHasABMap(self, chk_bytes):
341
ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
342
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
343
self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
344
root_key = ('sha1:' + ab_sha1,)
345
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
348
def assertHasEmptyMap(self, chk_bytes):
349
empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n'
350
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
351
self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
352
root_key = ('sha1:' + empty_sha1,)
353
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
356
def assertMapLayoutEqual(self, map_one, map_two):
357
"""Assert that the internal structure is identical between the maps."""
358
map_one._ensure_root()
359
node_one_stack = [map_one._root_node]
360
map_two._ensure_root()
361
node_two_stack = [map_two._root_node]
362
while node_one_stack:
363
node_one = node_one_stack.pop()
364
node_two = node_two_stack.pop()
365
if node_one.__class__ != node_two.__class__:
366
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
367
map_two._dump_tree(include_keys=True))
368
self.assertEqual(node_one._search_prefix,
369
node_two._search_prefix)
370
if isinstance(node_one, InternalNode):
371
# Internal nodes must have identical references
372
self.assertEqual(sorted(node_one._items.keys()),
373
sorted(node_two._items.keys()))
374
node_one_stack.extend([n for n, _ in
375
node_one._iter_nodes(map_one._store)])
376
node_two_stack.extend([n for n, _ in
377
node_two._iter_nodes(map_two._store)])
379
# Leaf nodes must have identical contents
380
self.assertEqual(node_one._items, node_two._items)
381
self.assertEquals([], node_two_stack)
383
def assertCanonicalForm(self, chkmap):
384
"""Assert that the chkmap is in 'canonical' form.
386
We do this by adding all of the key value pairs from scratch, both in
387
forward order and reverse order, and assert that the final tree layout
390
items = list(chkmap.iteritems())
391
map_forward = chk_map.CHKMap(None, None)
392
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
393
for key, value in items:
394
map_forward.map(key, value)
395
self.assertMapLayoutEqual(map_forward, chkmap)
396
map_reverse = chk_map.CHKMap(None, None)
397
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
398
for key, value in reversed(items):
399
map_reverse.map(key, value)
400
self.assertMapLayoutEqual(map_reverse, chkmap)
402
def test_assert_map_layout_equal(self):
403
store = self.get_chk_bytes()
404
map_one = CHKMap(store, None)
405
map_one._root_node.set_maximum_size(20)
406
map_two = CHKMap(store, None)
407
map_two._root_node.set_maximum_size(20)
408
self.assertMapLayoutEqual(map_one, map_two)
409
map_one.map('aaa', 'value')
410
self.assertRaises(AssertionError,
411
self.assertMapLayoutEqual, map_one, map_two)
412
map_two.map('aaa', 'value')
413
self.assertMapLayoutEqual(map_one, map_two)
414
# Split the tree, so we ensure that internal nodes and leaf nodes are
416
map_one.map('aab', 'value')
417
self.assertIsInstance(map_one._root_node, InternalNode)
418
self.assertRaises(AssertionError,
419
self.assertMapLayoutEqual, map_one, map_two)
420
map_two.map('aab', 'value')
421
self.assertMapLayoutEqual(map_one, map_two)
422
map_one.map('aac', 'value')
423
self.assertRaises(AssertionError,
424
self.assertMapLayoutEqual, map_one, map_two)
425
self.assertCanonicalForm(map_one)
427
def test_from_dict_empty(self):
428
chk_bytes = self.get_chk_bytes()
429
root_key = CHKMap.from_dict(chk_bytes, {})
430
# Check the data was saved and inserted correctly.
431
expected_root_key = self.assertHasEmptyMap(chk_bytes)
432
self.assertEqual(expected_root_key, root_key)
434
def test_from_dict_ab(self):
435
chk_bytes = self.get_chk_bytes()
436
root_key = CHKMap.from_dict(chk_bytes, {"a": "b"})
437
# Check the data was saved and inserted correctly.
438
expected_root_key = self.assertHasABMap(chk_bytes)
439
self.assertEqual(expected_root_key, root_key)
441
def test_apply_empty_ab(self):
442
# applying a delta (None, "a", "b") to an empty chkmap generates the
443
# same map as from_dict_ab.
444
chk_bytes = self.get_chk_bytes()
445
root_key = CHKMap.from_dict(chk_bytes, {})
446
chkmap = CHKMap(chk_bytes, root_key)
447
new_root = chkmap.apply_delta([(None, "a", "b")])
448
# Check the data was saved and inserted correctly.
449
expected_root_key = self.assertHasABMap(chk_bytes)
450
self.assertEqual(expected_root_key, new_root)
451
# The update should have left us with an in memory root node, with an
453
self.assertEqual(new_root, chkmap._root_node._key)
455
def test_apply_ab_empty(self):
456
# applying a delta ("a", None, None) to a map with 'a' in it generates
458
chk_bytes = self.get_chk_bytes()
459
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
460
chkmap = CHKMap(chk_bytes, root_key)
461
new_root = chkmap.apply_delta([(("a",), None, None)])
462
# Check the data was saved and inserted correctly.
463
expected_root_key = self.assertHasEmptyMap(chk_bytes)
464
self.assertEqual(expected_root_key, new_root)
465
# The update should have left us with an in memory root node, with an
467
self.assertEqual(new_root, chkmap._root_node._key)
469
def test_apply_new_keys_must_be_new(self):
470
# applying a delta (None, "a", "b") to a map with 'a' in it generates
472
chk_bytes = self.get_chk_bytes()
473
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
474
chkmap = CHKMap(chk_bytes, root_key)
475
self.assertRaises(errors.InconsistentDelta, chkmap.apply_delta,
476
[(None, ("a",), "b")])
477
# As an error occured, the update should have left us without changing
478
# anything (the root should be unchanged).
479
self.assertEqual(root_key, chkmap._root_node._key)
481
def test_apply_delta_is_deterministic(self):
482
chk_bytes = self.get_chk_bytes()
483
chkmap1 = CHKMap(chk_bytes, None)
484
chkmap1._root_node.set_maximum_size(10)
485
chkmap1.apply_delta([(None, ('aaa',), 'common'),
486
(None, ('bba',), 'target2'),
487
(None, ('bbb',), 'common')])
488
root_key1 = chkmap1._save()
489
self.assertCanonicalForm(chkmap1)
491
chkmap2 = CHKMap(chk_bytes, None)
492
chkmap2._root_node.set_maximum_size(10)
493
chkmap2.apply_delta([(None, ('bbb',), 'common'),
494
(None, ('bba',), 'target2'),
495
(None, ('aaa',), 'common')])
496
root_key2 = chkmap2._save()
497
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
498
chkmap2._dump_tree(include_keys=True))
499
self.assertEqual(root_key1, root_key2)
500
self.assertCanonicalForm(chkmap2)
502
def test_stable_splitting(self):
503
store = self.get_chk_bytes()
504
chkmap = CHKMap(store, None)
505
# Should fit 2 keys per LeafNode
506
chkmap._root_node.set_maximum_size(35)
507
chkmap.map(('aaa',), 'v')
508
self.assertEqualDiff("'' LeafNode\n"
511
chkmap.map(('aab',), 'v')
512
self.assertEqualDiff("'' LeafNode\n"
516
self.assertCanonicalForm(chkmap)
518
# Creates a new internal node, and splits the others into leaves
519
chkmap.map(('aac',), 'v')
520
self.assertEqualDiff("'' InternalNode\n"
528
self.assertCanonicalForm(chkmap)
530
# Splits again, because it can't fit in the current structure
531
chkmap.map(('bbb',), 'v')
532
self.assertEqualDiff("'' InternalNode\n"
533
" 'a' InternalNode\n"
543
self.assertCanonicalForm(chkmap)
545
def test_map_splits_with_longer_key(self):
546
store = self.get_chk_bytes()
547
chkmap = CHKMap(store, None)
548
# Should fit 1 key per LeafNode
549
chkmap._root_node.set_maximum_size(10)
550
chkmap.map(('aaa',), 'v')
551
chkmap.map(('aaaa',), 'v')
552
self.assertCanonicalForm(chkmap)
553
self.assertIsInstance(chkmap._root_node, InternalNode)
555
def test_with_linefeed_in_key(self):
556
store = self.get_chk_bytes()
557
chkmap = CHKMap(store, None)
558
# Should fit 1 key per LeafNode
559
chkmap._root_node.set_maximum_size(10)
560
chkmap.map(('a\ra',), 'val1')
561
chkmap.map(('a\rb',), 'val2')
562
chkmap.map(('ac',), 'val3')
563
self.assertCanonicalForm(chkmap)
564
self.assertEqualDiff("'' InternalNode\n"
565
" 'a\\r' InternalNode\n"
566
" 'a\\ra' LeafNode\n"
567
" ('a\\ra',) 'val1'\n"
568
" 'a\\rb' LeafNode\n"
569
" ('a\\rb',) 'val2'\n"
573
# We should also successfully serialise and deserialise these items
574
root_key = chkmap._save()
575
chkmap = CHKMap(store, root_key)
576
self.assertEqualDiff("'' InternalNode\n"
577
" 'a\\r' InternalNode\n"
578
" 'a\\ra' LeafNode\n"
579
" ('a\\ra',) 'val1'\n"
580
" 'a\\rb' LeafNode\n"
581
" ('a\\rb',) 'val2'\n"
586
def test_deep_splitting(self):
587
store = self.get_chk_bytes()
588
chkmap = CHKMap(store, None)
589
# Should fit 2 keys per LeafNode
590
chkmap._root_node.set_maximum_size(40)
591
chkmap.map(('aaaaaaaa',), 'v')
592
chkmap.map(('aaaaabaa',), 'v')
593
self.assertEqualDiff("'' LeafNode\n"
594
" ('aaaaaaaa',) 'v'\n"
595
" ('aaaaabaa',) 'v'\n",
597
chkmap.map(('aaabaaaa',), 'v')
598
chkmap.map(('aaababaa',), 'v')
599
self.assertEqualDiff("'' InternalNode\n"
601
" ('aaaaaaaa',) 'v'\n"
602
" ('aaaaabaa',) 'v'\n"
604
" ('aaabaaaa',) 'v'\n"
605
" ('aaababaa',) 'v'\n",
607
chkmap.map(('aaabacaa',), 'v')
608
chkmap.map(('aaabadaa',), 'v')
609
self.assertEqualDiff("'' InternalNode\n"
611
" ('aaaaaaaa',) 'v'\n"
612
" ('aaaaabaa',) 'v'\n"
613
" 'aaab' InternalNode\n"
614
" 'aaabaa' LeafNode\n"
615
" ('aaabaaaa',) 'v'\n"
616
" 'aaabab' LeafNode\n"
617
" ('aaababaa',) 'v'\n"
618
" 'aaabac' LeafNode\n"
619
" ('aaabacaa',) 'v'\n"
620
" 'aaabad' LeafNode\n"
621
" ('aaabadaa',) 'v'\n",
623
chkmap.map(('aaababba',), 'val')
624
chkmap.map(('aaababca',), 'val')
625
self.assertEqualDiff("'' InternalNode\n"
627
" ('aaaaaaaa',) 'v'\n"
628
" ('aaaaabaa',) 'v'\n"
629
" 'aaab' InternalNode\n"
630
" 'aaabaa' LeafNode\n"
631
" ('aaabaaaa',) 'v'\n"
632
" 'aaabab' InternalNode\n"
633
" 'aaababa' LeafNode\n"
634
" ('aaababaa',) 'v'\n"
635
" 'aaababb' LeafNode\n"
636
" ('aaababba',) 'val'\n"
637
" 'aaababc' LeafNode\n"
638
" ('aaababca',) 'val'\n"
639
" 'aaabac' LeafNode\n"
640
" ('aaabacaa',) 'v'\n"
641
" 'aaabad' LeafNode\n"
642
" ('aaabadaa',) 'v'\n",
644
# Now we add a node that should fit around an existing InternalNode,
645
# but has a slightly different key prefix, which causes a new
647
chkmap.map(('aaabDaaa',), 'v')
648
self.assertEqualDiff("'' InternalNode\n"
650
" ('aaaaaaaa',) 'v'\n"
651
" ('aaaaabaa',) 'v'\n"
652
" 'aaab' InternalNode\n"
653
" 'aaabD' LeafNode\n"
654
" ('aaabDaaa',) 'v'\n"
655
" 'aaaba' InternalNode\n"
656
" 'aaabaa' LeafNode\n"
657
" ('aaabaaaa',) 'v'\n"
658
" 'aaabab' InternalNode\n"
659
" 'aaababa' LeafNode\n"
660
" ('aaababaa',) 'v'\n"
661
" 'aaababb' LeafNode\n"
662
" ('aaababba',) 'val'\n"
663
" 'aaababc' LeafNode\n"
664
" ('aaababca',) 'val'\n"
665
" 'aaabac' LeafNode\n"
666
" ('aaabacaa',) 'v'\n"
667
" 'aaabad' LeafNode\n"
668
" ('aaabadaa',) 'v'\n",
671
def test_map_collapses_if_size_changes(self):
672
store = self.get_chk_bytes()
673
chkmap = CHKMap(store, None)
674
# Should fit 2 keys per LeafNode
675
chkmap._root_node.set_maximum_size(35)
676
chkmap.map(('aaa',), 'v')
677
chkmap.map(('aab',), 'very long value that splits')
678
self.assertEqualDiff("'' InternalNode\n"
682
" ('aab',) 'very long value that splits'\n",
684
self.assertCanonicalForm(chkmap)
685
# Now changing the value to something small should cause a rebuild
686
chkmap.map(('aab',), 'v')
687
self.assertEqualDiff("'' LeafNode\n"
691
self.assertCanonicalForm(chkmap)
693
def test_map_double_deep_collapses(self):
694
store = self.get_chk_bytes()
695
chkmap = CHKMap(store, None)
696
# Should fit 3 small keys per LeafNode
697
chkmap._root_node.set_maximum_size(40)
698
chkmap.map(('aaa',), 'v')
699
chkmap.map(('aab',), 'very long value that splits')
700
chkmap.map(('abc',), 'v')
701
self.assertEqualDiff("'' InternalNode\n"
702
" 'aa' InternalNode\n"
706
" ('aab',) 'very long value that splits'\n"
710
chkmap.map(('aab',), 'v')
711
self.assertCanonicalForm(chkmap)
712
self.assertEqualDiff("'' LeafNode\n"
718
def test_stable_unmap(self):
719
store = self.get_chk_bytes()
720
chkmap = CHKMap(store, None)
721
# Should fit 2 keys per LeafNode
722
chkmap._root_node.set_maximum_size(35)
723
chkmap.map(('aaa',), 'v')
724
chkmap.map(('aab',), 'v')
725
self.assertEqualDiff("'' LeafNode\n"
729
# Creates a new internal node, and splits the others into leaves
730
chkmap.map(('aac',), 'v')
731
self.assertEqualDiff("'' InternalNode\n"
739
self.assertCanonicalForm(chkmap)
740
# Now lets unmap one of the keys, and assert that we collapse the
742
chkmap.unmap(('aac',))
743
self.assertEqualDiff("'' LeafNode\n"
747
self.assertCanonicalForm(chkmap)
749
def test_unmap_double_deep(self):
750
store = self.get_chk_bytes()
751
chkmap = CHKMap(store, None)
752
# Should fit 3 keys per LeafNode
753
chkmap._root_node.set_maximum_size(40)
754
chkmap.map(('aaa',), 'v')
755
chkmap.map(('aaab',), 'v')
756
chkmap.map(('aab',), 'very long value')
757
chkmap.map(('abc',), 'v')
758
self.assertEqualDiff("'' InternalNode\n"
759
" 'aa' InternalNode\n"
764
" ('aab',) 'very long value'\n"
768
# Removing the 'aab' key should cause everything to collapse back to a
770
chkmap.unmap(('aab',))
771
self.assertEqualDiff("'' LeafNode\n"
777
def test_unmap_double_deep_non_empty_leaf(self):
778
store = self.get_chk_bytes()
779
chkmap = CHKMap(store, None)
780
# Should fit 3 keys per LeafNode
781
chkmap._root_node.set_maximum_size(40)
782
chkmap.map(('aaa',), 'v')
783
chkmap.map(('aab',), 'long value')
784
chkmap.map(('aabb',), 'v')
785
chkmap.map(('abc',), 'v')
786
self.assertEqualDiff("'' InternalNode\n"
787
" 'aa' InternalNode\n"
791
" ('aab',) 'long value'\n"
796
# Removing the 'aab' key should cause everything to collapse back to a
798
chkmap.unmap(('aab',))
799
self.assertEqualDiff("'' LeafNode\n"
805
def test_unmap_with_known_internal_node_doesnt_page(self):
806
store = self.get_chk_bytes()
807
chkmap = CHKMap(store, None)
808
# Should fit 3 keys per LeafNode
809
chkmap._root_node.set_maximum_size(30)
810
chkmap.map(('aaa',), 'v')
811
chkmap.map(('aab',), 'v')
812
chkmap.map(('aac',), 'v')
813
chkmap.map(('abc',), 'v')
814
chkmap.map(('acd',), 'v')
815
self.assertEqualDiff("'' InternalNode\n"
816
" 'aa' InternalNode\n"
828
# Save everything to the map, and start over
829
chkmap = CHKMap(store, chkmap._save())
830
# Mapping an 'aa' key loads the internal node, but should not map the
831
# 'ab' and 'ac' nodes
832
chkmap.map(('aad',), 'v')
833
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
834
self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
835
self.assertIsInstance(chkmap._root_node._items['ac'], tuple)
836
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
838
chkmap.unmap(('acd',))
839
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
840
self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
842
def test_unmap_without_fitting_doesnt_page_in(self):
843
store = self.get_chk_bytes()
844
chkmap = CHKMap(store, None)
845
# Should fit 2 keys per LeafNode
846
chkmap._root_node.set_maximum_size(20)
847
chkmap.map(('aaa',), 'v')
848
chkmap.map(('aab',), 'v')
849
self.assertEqualDiff("'' InternalNode\n"
855
# Save everything to the map, and start over
856
chkmap = CHKMap(store, chkmap._save())
857
chkmap.map(('aac',), 'v')
858
chkmap.map(('aad',), 'v')
859
chkmap.map(('aae',), 'v')
860
chkmap.map(('aaf',), 'v')
861
# At this point, the previous nodes should not be paged in, but the
862
# newly added nodes would be
863
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
864
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
865
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
866
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
867
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
868
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
869
# Now unmapping one of the new nodes will use only the already-paged-in
870
# nodes to determine that we don't need to do more.
871
chkmap.unmap(('aaf',))
872
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
873
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
874
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
875
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
876
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
878
def test_unmap_pages_in_if_necessary(self):
879
store = self.get_chk_bytes()
880
chkmap = CHKMap(store, None)
881
# Should fit 2 keys per LeafNode
882
chkmap._root_node.set_maximum_size(30)
883
chkmap.map(('aaa',), 'val')
884
chkmap.map(('aab',), 'val')
885
chkmap.map(('aac',), 'val')
886
self.assertEqualDiff("'' InternalNode\n"
894
root_key = chkmap._save()
895
# Save everything to the map, and start over
896
chkmap = CHKMap(store, root_key)
897
chkmap.map(('aad',), 'v')
898
# At this point, the previous nodes should not be paged in, but the
899
# newly added node would be
900
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
901
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
902
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
903
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
904
# Unmapping the new node will check the existing nodes to see if they
906
# Clear the page cache so we ensure we have to read all the children
907
chk_map._page_cache.clear()
908
chkmap.unmap(('aad',))
909
self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
910
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
911
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
913
def test_unmap_pages_in_from_page_cache(self):
914
store = self.get_chk_bytes()
915
chkmap = CHKMap(store, None)
916
# Should fit 2 keys per LeafNode
917
chkmap._root_node.set_maximum_size(30)
918
chkmap.map(('aaa',), 'val')
919
chkmap.map(('aab',), 'val')
920
chkmap.map(('aac',), 'val')
921
root_key = chkmap._save()
922
# Save everything to the map, and start over
923
chkmap = CHKMap(store, root_key)
924
chkmap.map(('aad',), 'val')
925
self.assertEqualDiff("'' InternalNode\n"
935
# Save everything to the map, start over after _dump_tree
936
chkmap = CHKMap(store, root_key)
937
chkmap.map(('aad',), 'v')
938
# At this point, the previous nodes should not be paged in, but the
939
# newly added node would be
940
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
941
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
942
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
943
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
944
# Now clear the page cache, and only include 2 of the children in the
946
aab_key = chkmap._root_node._items['aab']
947
aab_bytes = chk_map._page_cache[aab_key]
948
aac_key = chkmap._root_node._items['aac']
949
aac_bytes = chk_map._page_cache[aac_key]
950
chk_map._page_cache.clear()
951
chk_map._page_cache[aab_key] = aab_bytes
952
chk_map._page_cache[aac_key] = aac_bytes
954
# Unmapping the new node will check the nodes from the page cache
955
# first, and not have to read in 'aaa'
956
chkmap.unmap(('aad',))
957
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
958
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
959
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
961
def test_unmap_uses_existing_items(self):
962
store = self.get_chk_bytes()
963
chkmap = CHKMap(store, None)
964
# Should fit 2 keys per LeafNode
965
chkmap._root_node.set_maximum_size(30)
966
chkmap.map(('aaa',), 'val')
967
chkmap.map(('aab',), 'val')
968
chkmap.map(('aac',), 'val')
969
root_key = chkmap._save()
970
# Save everything to the map, and start over
971
chkmap = CHKMap(store, root_key)
972
chkmap.map(('aad',), 'val')
973
chkmap.map(('aae',), 'val')
974
chkmap.map(('aaf',), 'val')
975
# At this point, the previous nodes should not be paged in, but the
976
# newly added node would be
977
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
978
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
979
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
980
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
981
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
982
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
984
# Unmapping a new node will see the other nodes that are already in
985
# memory, and not need to page in anything else
986
chkmap.unmap(('aad',))
987
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
988
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
989
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
990
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
991
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
993
def test_iter_changes_empty_ab(self):
994
# Asking for changes between an empty dict to a dict with keys returns
996
basis = self._get_map({}, maximum_size=10)
997
target = self._get_map(
998
{('a',): 'content here', ('b',): 'more content'},
999
chk_bytes=basis._store, maximum_size=10)
1000
self.assertEqual([(('a',), None, 'content here'),
1001
(('b',), None, 'more content')],
1002
sorted(list(target.iter_changes(basis))))
1004
def test_iter_changes_ab_empty(self):
1005
# Asking for changes between a dict with keys to an empty dict returns
1007
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1009
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1010
self.assertEqual([(('a',), 'content here', None),
1011
(('b',), 'more content', None)],
1012
sorted(list(target.iter_changes(basis))))
1014
def test_iter_changes_empty_empty_is_empty(self):
1015
basis = self._get_map({}, maximum_size=10)
1016
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
1017
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1019
def test_iter_changes_ab_ab_is_empty(self):
1020
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1022
target = self._get_map(
1023
{('a',): 'content here', ('b',): 'more content'},
1024
chk_bytes=basis._store, maximum_size=10)
1025
self.assertEqual([], sorted(list(target.iter_changes(basis))))
1027
def test_iter_changes_ab_ab_nodes_not_loaded(self):
1028
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1030
target = self._get_map(
1031
{('a',): 'content here', ('b',): 'more content'},
1032
chk_bytes=basis._store, maximum_size=10)
1033
list(target.iter_changes(basis))
1034
self.assertIsInstance(target._root_node, tuple)
1035
self.assertIsInstance(basis._root_node, tuple)
1037
def test_iter_changes_ab_ab_changed_values_shown(self):
1038
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
1040
target = self._get_map(
1041
{('a',): 'content here', ('b',): 'different content'},
1042
chk_bytes=basis._store, maximum_size=10)
1043
result = sorted(list(target.iter_changes(basis)))
1044
self.assertEqual([(('b',), 'more content', 'different content')],
1047
def test_iter_changes_mixed_node_length(self):
1048
# When one side has different node lengths than the other, common
1049
# but different keys still need to be show, and new-and-old included
1051
# aaa - common unaltered
1052
# aab - common altered
1056
# aaa to be not loaded (later test)
1057
# aab, b, at to be returned.
1058
# basis splits at byte 0,1,2, aaa is commonb is basis only
1059
basis_dict = {('aaa',): 'foo bar',
1060
('aab',): 'common altered a', ('b',): 'foo bar b'}
1061
# target splits at byte 1,2, at is target only
1062
target_dict = {('aaa',): 'foo bar',
1063
('aab',): 'common altered b', ('at',): 'foo bar t'}
1065
(('aab',), 'common altered a', 'common altered b'),
1066
(('at',), None, 'foo bar t'),
1067
(('b',), 'foo bar b', None),
1069
basis = self._get_map(basis_dict, maximum_size=10)
1070
target = self._get_map(target_dict, maximum_size=10,
1071
chk_bytes=basis._store)
1072
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1074
def test_iter_changes_common_pages_not_loaded(self):
1075
# aaa - common unaltered
1076
# aab - common altered
1080
# aaa to be not loaded
1081
# aaa not to be in result.
1082
basis_dict = {('aaa',): 'foo bar',
1083
('aab',): 'common altered a', ('b',): 'foo bar b'}
1084
# target splits at byte 1, at is target only
1085
target_dict = {('aaa',): 'foo bar',
1086
('aab',): 'common altered b', ('at',): 'foo bar t'}
1087
basis = self._get_map(basis_dict, maximum_size=10)
1088
target = self._get_map(target_dict, maximum_size=10,
1089
chk_bytes=basis._store)
1090
basis_get = basis._store.get_record_stream
1091
def get_record_stream(keys, order, fulltext):
1092
if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
1093
self.fail("'aaa' pointer was followed %r" % keys)
1094
return basis_get(keys, order, fulltext)
1095
basis._store.get_record_stream = get_record_stream
1096
result = sorted(list(target.iter_changes(basis)))
1097
for change in result:
1098
if change[0] == ('aaa',):
1099
self.fail("Found unexpected change: %s" % change)
1101
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
1102
# Within a leaf there are no hash's to exclude keys, make sure multi
1103
# value leaf nodes are handled well.
1104
basis_dict = {('aaa',): 'foo bar',
1105
('aab',): 'common altered a', ('b',): 'foo bar b'}
1106
target_dict = {('aaa',): 'foo bar',
1107
('aab',): 'common altered b', ('at',): 'foo bar t'}
1109
(('aab',), 'common altered a', 'common altered b'),
1110
(('at',), None, 'foo bar t'),
1111
(('b',), 'foo bar b', None),
1113
basis = self._get_map(basis_dict)
1114
target = self._get_map(target_dict, chk_bytes=basis._store)
1115
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
1117
def test_iteritems_empty(self):
1118
chk_bytes = self.get_chk_bytes()
1119
root_key = CHKMap.from_dict(chk_bytes, {})
1120
chkmap = CHKMap(chk_bytes, root_key)
1121
self.assertEqual([], list(chkmap.iteritems()))
1123
def test_iteritems_two_items(self):
1124
chk_bytes = self.get_chk_bytes()
1125
root_key = CHKMap.from_dict(chk_bytes,
1126
{"a":"content here", "b":"more content"})
1127
chkmap = CHKMap(chk_bytes, root_key)
1128
self.assertEqual([(("a",), "content here"), (("b",), "more content")],
1129
sorted(list(chkmap.iteritems())))
1131
def test_iteritems_selected_one_of_two_items(self):
1132
chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
1133
self.assertEqual({("a",): "content here"},
1134
self.to_dict(chkmap, [("a",)]))
1136
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
1137
chkmap = self._get_map(
1138
{("a","a"):"content here", ("a", "b",):"more content",
1139
("b", ""): 'boring content'},
1140
maximum_size=10, key_width=2)
1142
{("a", "a"): "content here", ("a", "b"): 'more content'},
1143
self.to_dict(chkmap, [("a",)]))
1145
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
1146
search_key_func = chk_map.search_key_registry.get('hash-16-way')
1147
self.assertEqual('E8B7BE43\x00E8B7BE43', search_key_func(('a', 'a')))
1148
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
1149
self.assertEqual('71BEEFF9\x0000000000', search_key_func(('b', '')))
1150
chkmap = self._get_map(
1151
{("a","a"):"content here", ("a", "b",):"more content",
1152
("b", ""): 'boring content'},
1153
maximum_size=10, key_width=2, search_key_func=search_key_func)
1155
{("a", "a"): "content here", ("a", "b"): 'more content'},
1156
self.to_dict(chkmap, [("a",)]))
1158
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
1159
chkmap = self._get_map(
1160
{("a","a"):"content here", ("a", "b",):"more content",
1161
("b", ""): 'boring content'}, key_width=2)
1163
{("a", "a"): "content here", ("a", "b"): 'more content'},
1164
self.to_dict(chkmap, [("a",)]))
1166
def test___len__empty(self):
1167
chkmap = self._get_map({})
1168
self.assertEqual(0, len(chkmap))
1170
def test___len__2(self):
1171
chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"})
1172
self.assertEqual(2, len(chkmap))
1174
def test_max_size_100_bytes_new(self):
1175
# When there is a 100 byte upper node limit, a tree is formed.
1176
chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
1177
# We expect three nodes:
1178
# A root, with two children, and with two key prefixes - k1 to one, and
1179
# k2 to the other as our node splitting is only just being developed.
1180
# The maximum size should be embedded
1181
chkmap._ensure_root()
1182
self.assertEqual(100, chkmap._root_node.maximum_size)
1183
self.assertEqual(1, chkmap._root_node._key_width)
1184
# There should be two child nodes, and prefix of 2(bytes):
1185
self.assertEqual(2, len(chkmap._root_node._items))
1186
self.assertEqual("k", chkmap._root_node._compute_search_prefix())
1187
# The actual nodes pointed at will change as serialisers change; so
1188
# here we test that the key prefix is correct; then load the nodes and
1189
# check they have the right pointed at key; whether they have the
1190
# pointed at value inline or not is also unrelated to this test so we
1191
# don't check that in detail - rather we just check the aggregate
1193
nodes = sorted(chkmap._root_node._items.items())
1196
self.assertEqual('k1', ptr1[0])
1197
self.assertEqual('k2', ptr2[0])
1198
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
1199
self.assertIsInstance(node1, LeafNode)
1200
self.assertEqual(1, len(node1))
1201
self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
1202
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
1203
self.assertIsInstance(node2, LeafNode)
1204
self.assertEqual(1, len(node2))
1205
self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
1206
# Having checked we have a good structure, check that the content is
1208
self.assertEqual(2, len(chkmap))
1209
self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
1210
self.to_dict(chkmap))
1212
def test_init_root_is_LeafNode_new(self):
1213
chk_bytes = self.get_chk_bytes()
1214
chkmap = CHKMap(chk_bytes, None)
1215
self.assertIsInstance(chkmap._root_node, LeafNode)
1216
self.assertEqual({}, self.to_dict(chkmap))
1217
self.assertEqual(0, len(chkmap))
1219
def test_init_and_save_new(self):
1220
chk_bytes = self.get_chk_bytes()
1221
chkmap = CHKMap(chk_bytes, None)
1222
key = chkmap._save()
1223
leaf_node = LeafNode()
1224
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1226
def test_map_first_item_new(self):
1227
chk_bytes = self.get_chk_bytes()
1228
chkmap = CHKMap(chk_bytes, None)
1229
chkmap.map(("foo,",), "bar")
1230
self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
1231
self.assertEqual(1, len(chkmap))
1232
key = chkmap._save()
1233
leaf_node = LeafNode()
1234
leaf_node.map(chk_bytes, ("foo,",), "bar")
1235
self.assertEqual([key], leaf_node.serialise(chk_bytes))
1237
def test_unmap_last_item_root_is_leaf_new(self):
1238
chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
1239
chkmap.unmap(("k1"*50,))
1240
chkmap.unmap(("k2"*50,))
1241
self.assertEqual(0, len(chkmap))
1242
self.assertEqual({}, self.to_dict(chkmap))
1243
key = chkmap._save()
1244
leaf_node = LeafNode()
1245
self.assertEqual([key], leaf_node.serialise(chkmap._store))
1247
def test__dump_tree(self):
1248
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2",
1249
("bbb",): "value3",},
1251
self.assertEqualDiff("'' InternalNode\n"
1252
" 'a' InternalNode\n"
1254
" ('aaa',) 'value1'\n"
1256
" ('aab',) 'value2'\n"
1258
" ('bbb',) 'value3'\n",
1259
chkmap._dump_tree())
1260
self.assertEqualDiff("'' InternalNode\n"
1261
" 'a' InternalNode\n"
1263
" ('aaa',) 'value1'\n"
1265
" ('aab',) 'value2'\n"
1267
" ('bbb',) 'value3'\n",
1268
chkmap._dump_tree())
1269
self.assertEqualDiff(
1270
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1271
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1272
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1273
" ('aaa',) 'value1'\n"
1274
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1275
" ('aab',) 'value2'\n"
1276
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1277
" ('bbb',) 'value3'\n",
1278
chkmap._dump_tree(include_keys=True))
1280
def test__dump_tree_in_progress(self):
1281
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"},
1283
chkmap.map(('bbb',), 'value3')
1284
self.assertEqualDiff("'' InternalNode\n"
1285
" 'a' InternalNode\n"
1287
" ('aaa',) 'value1'\n"
1289
" ('aab',) 'value2'\n"
1291
" ('bbb',) 'value3'\n",
1292
chkmap._dump_tree())
1293
# For things that are updated by adding 'bbb', we don't have a sha key
1294
# for them yet, so they are listed as None
1295
self.assertEqualDiff(
1296
"'' InternalNode None\n"
1297
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1298
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1299
" ('aaa',) 'value1'\n"
1300
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1301
" ('aab',) 'value2'\n"
1302
" 'b' LeafNode None\n"
1303
" ('bbb',) 'value3'\n",
1304
chkmap._dump_tree(include_keys=True))
1307
def _search_key_single(key):
1308
"""A search key function that maps all nodes to the same value"""
1311
def _test_search_key(key):
1312
return 'test:' + '\x00'.join(key)
1315
class TestMapSearchKeys(TestCaseWithStore):
1317
def test_default_chk_map_uses_flat_search_key(self):
1318
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1319
self.assertEqual('1',
1320
chkmap._search_key_func(('1',)))
1321
self.assertEqual('1\x002',
1322
chkmap._search_key_func(('1', '2')))
1323
self.assertEqual('1\x002\x003',
1324
chkmap._search_key_func(('1', '2', '3')))
1326
def test_search_key_is_passed_to_root_node(self):
1327
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1328
search_key_func=_test_search_key)
1329
self.assertIs(_test_search_key, chkmap._search_key_func)
1330
self.assertEqual('test:1\x002\x003',
1331
chkmap._search_key_func(('1', '2', '3')))
1332
self.assertEqual('test:1\x002\x003',
1333
chkmap._root_node._search_key(('1', '2', '3')))
1335
def test_search_key_passed_via__ensure_root(self):
1336
chk_bytes = self.get_chk_bytes()
1337
chkmap = chk_map.CHKMap(chk_bytes, None,
1338
search_key_func=_test_search_key)
1339
root_key = chkmap._save()
1340
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1341
search_key_func=_test_search_key)
1342
chkmap._ensure_root()
1343
self.assertEqual('test:1\x002\x003',
1344
chkmap._root_node._search_key(('1', '2', '3')))
1346
def test_search_key_with_internal_node(self):
1347
chk_bytes = self.get_chk_bytes()
1348
chkmap = chk_map.CHKMap(chk_bytes, None,
1349
search_key_func=_test_search_key)
1350
chkmap._root_node.set_maximum_size(10)
1351
chkmap.map(('1',), 'foo')
1352
chkmap.map(('2',), 'bar')
1353
chkmap.map(('3',), 'baz')
1354
self.assertEqualDiff("'' InternalNode\n"
1355
" 'test:1' LeafNode\n"
1357
" 'test:2' LeafNode\n"
1359
" 'test:3' LeafNode\n"
1361
, chkmap._dump_tree())
1362
root_key = chkmap._save()
1363
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1364
search_key_func=_test_search_key)
1365
self.assertEqualDiff("'' InternalNode\n"
1366
" 'test:1' LeafNode\n"
1368
" 'test:2' LeafNode\n"
1370
" 'test:3' LeafNode\n"
1372
, chkmap._dump_tree())
1374
def test_search_key_16(self):
1375
chk_bytes = self.get_chk_bytes()
1376
chkmap = chk_map.CHKMap(chk_bytes, None,
1377
search_key_func=chk_map._search_key_16)
1378
chkmap._root_node.set_maximum_size(10)
1379
chkmap.map(('1',), 'foo')
1380
chkmap.map(('2',), 'bar')
1381
chkmap.map(('3',), 'baz')
1382
self.assertEqualDiff("'' InternalNode\n"
1389
, chkmap._dump_tree())
1390
root_key = chkmap._save()
1391
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1392
search_key_func=chk_map._search_key_16)
1393
# We can get the values back correctly
1394
self.assertEqual([(('1',), 'foo')],
1395
list(chkmap.iteritems([('1',)])))
1396
self.assertEqualDiff("'' InternalNode\n"
1403
, chkmap._dump_tree())
1405
def test_search_key_255(self):
1406
chk_bytes = self.get_chk_bytes()
1407
chkmap = chk_map.CHKMap(chk_bytes, None,
1408
search_key_func=chk_map._search_key_255)
1409
chkmap._root_node.set_maximum_size(10)
1410
chkmap.map(('1',), 'foo')
1411
chkmap.map(('2',), 'bar')
1412
chkmap.map(('3',), 'baz')
1413
self.assertEqualDiff("'' InternalNode\n"
1414
" '\\x1a' LeafNode\n"
1418
" '\\x83' LeafNode\n"
1420
, chkmap._dump_tree())
1421
root_key = chkmap._save()
1422
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1423
search_key_func=chk_map._search_key_255)
1424
# We can get the values back correctly
1425
self.assertEqual([(('1',), 'foo')],
1426
list(chkmap.iteritems([('1',)])))
1427
self.assertEqualDiff("'' InternalNode\n"
1428
" '\\x1a' LeafNode\n"
1432
" '\\x83' LeafNode\n"
1434
, chkmap._dump_tree())
1436
def test_search_key_collisions(self):
1437
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1438
search_key_func=_search_key_single)
1439
# The node will want to expand, but it cannot, because it knows that
1440
# all the keys must map to this node
1441
chkmap._root_node.set_maximum_size(20)
1442
chkmap.map(('1',), 'foo')
1443
chkmap.map(('2',), 'bar')
1444
chkmap.map(('3',), 'baz')
1445
self.assertEqualDiff("'' LeafNode\n"
1449
, chkmap._dump_tree())
1452
class TestSearchKeyFuncs(tests.TestCase):
1454
def assertSearchKey16(self, expected, key):
1455
self.assertEqual(expected, chk_map._search_key_16(key))
1457
def assertSearchKey255(self, expected, key):
1458
actual = chk_map._search_key_255(key)
1459
self.assertEqual(expected, actual, 'actual: %r' % (actual,))
1461
def test_simple_16(self):
1462
self.assertSearchKey16('8C736521', ('foo',))
1463
self.assertSearchKey16('8C736521\x008C736521', ('foo', 'foo'))
1464
self.assertSearchKey16('8C736521\x0076FF8CAA', ('foo', 'bar'))
1465
self.assertSearchKey16('ED82CD11', ('abcd',))
1467
def test_simple_255(self):
1468
self.assertSearchKey255('\x8cse!', ('foo',))
1469
self.assertSearchKey255('\x8cse!\x00\x8cse!', ('foo', 'foo'))
1470
self.assertSearchKey255('\x8cse!\x00v\xff\x8c\xaa', ('foo', 'bar'))
1471
# The standard mapping for these would include '\n', so it should be
1473
self.assertSearchKey255('\xfdm\x93_\x00P_\x1bL', ('<', 'V'))
1475
def test_255_does_not_include_newline(self):
1476
# When mapping via _search_key_255, we should never have the '\n'
1477
# character, but all other 255 values should be present
1479
for char_in in range(256):
1480
search_key = chk_map._search_key_255((chr(char_in),))
1481
chars_used.update(search_key)
1482
all_chars = set([chr(x) for x in range(256)])
1483
unused_chars = all_chars.symmetric_difference(chars_used)
1484
self.assertEqual(set('\n'), unused_chars)
1487
class TestLeafNode(TestCaseWithStore):
1489
def test_current_size_empty(self):
1491
self.assertEqual(16, node._current_size())
1493
def test_current_size_size_changed(self):
1495
node.set_maximum_size(10)
1496
self.assertEqual(17, node._current_size())
1498
def test_current_size_width_changed(self):
1500
node._key_width = 10
1501
self.assertEqual(17, node._current_size())
1503
def test_current_size_items(self):
1505
base_size = node._current_size()
1506
node.map(None, ("foo bar",), "baz")
1507
self.assertEqual(base_size + 14, node._current_size())
1509
def test_deserialise_empty(self):
1510
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1511
self.assertEqual(0, len(node))
1512
self.assertEqual(10, node.maximum_size)
1513
self.assertEqual(("sha1:1234",), node.key())
1514
self.assertIs(None, node._search_prefix)
1515
self.assertIs(None, node._common_serialised_prefix)
1517
def test_deserialise_items(self):
1518
node = LeafNode.deserialise(
1519
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1521
self.assertEqual(2, len(node))
1522
self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
1523
sorted(node.iteritems(None)))
1525
def test_deserialise_item_with_null_width_1(self):
1526
node = LeafNode.deserialise(
1527
"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1529
self.assertEqual(2, len(node))
1530
self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
1531
sorted(node.iteritems(None)))
1533
def test_deserialise_item_with_null_width_2(self):
1534
node = LeafNode.deserialise(
1535
"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1536
"quux\x00\x001\nblarh\n",
1538
self.assertEqual(2, len(node))
1539
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
1540
sorted(node.iteritems(None)))
1542
def test_iteritems_selected_one_of_two_items(self):
1543
node = LeafNode.deserialise(
1544
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1546
self.assertEqual(2, len(node))
1547
self.assertEqual([(("quux",), "blarh")],
1548
sorted(node.iteritems(None, [("quux",), ("qaz",)])))
1550
def test_deserialise_item_with_common_prefix(self):
1551
node = LeafNode.deserialise(
1552
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1554
self.assertEqual(2, len(node))
1555
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")],
1556
sorted(node.iteritems(None)))
1557
self.assertIs(chk_map._unknown, node._search_prefix)
1558
self.assertEqual('foo\x00', node._common_serialised_prefix)
1560
def test_deserialise_multi_line(self):
1561
node = LeafNode.deserialise(
1562
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1564
self.assertEqual(2, len(node))
1565
self.assertEqual([(("foo", "1"), "bar\nbaz"),
1566
(("foo", "2"), "blarh\n"),
1567
], sorted(node.iteritems(None)))
1568
self.assertIs(chk_map._unknown, node._search_prefix)
1569
self.assertEqual('foo\x00', node._common_serialised_prefix)
1571
def test_key_new(self):
1573
self.assertEqual(None, node.key())
1575
def test_key_after_map(self):
1576
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1577
node.map(None, ("foo bar",), "baz quux")
1578
self.assertEqual(None, node.key())
1580
def test_key_after_unmap(self):
1581
node = LeafNode.deserialise(
1582
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1584
node.unmap(None, ("foo bar",))
1585
self.assertEqual(None, node.key())
1587
def test_map_exceeding_max_size_only_entry_new(self):
1589
node.set_maximum_size(10)
1590
result = node.map(None, ("foo bar",), "baz quux")
1591
self.assertEqual(("foo bar", [("", node)]), result)
1592
self.assertTrue(10 < node._current_size())
1594
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1596
node.set_maximum_size(10)
1597
node.map(None, ("foo bar",), "baz quux")
1598
prefix, result = list(node.map(None, ("blue",), "red"))
1599
self.assertEqual("", prefix)
1600
self.assertEqual(2, len(result))
1601
split_chars = set([result[0][0], result[1][0]])
1602
self.assertEqual(set(["f", "b"]), split_chars)
1603
nodes = dict(result)
1605
self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
1606
self.assertEqual(10, node.maximum_size)
1607
self.assertEqual(1, node._key_width)
1609
self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
1610
self.assertEqual(10, node.maximum_size)
1611
self.assertEqual(1, node._key_width)
1613
def test_map_first(self):
1615
result = node.map(None, ("foo bar",), "baz quux")
1616
self.assertEqual(("foo bar", [("", node)]), result)
1617
self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
1618
self.assertEqual(1, len(node))
1620
def test_map_second(self):
1622
node.map(None, ("foo bar",), "baz quux")
1623
result = node.map(None, ("bingo",), "bango")
1624
self.assertEqual(("", [("", node)]), result)
1625
self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
1626
self.to_dict(node, None))
1627
self.assertEqual(2, len(node))
1629
def test_map_replacement(self):
1631
node.map(None, ("foo bar",), "baz quux")
1632
result = node.map(None, ("foo bar",), "bango")
1633
self.assertEqual(("foo bar", [("", node)]), result)
1634
self.assertEqual({("foo bar",): "bango"},
1635
self.to_dict(node, None))
1636
self.assertEqual(1, len(node))
1638
def test_serialise_empty(self):
1639
store = self.get_chk_bytes()
1641
node.set_maximum_size(10)
1642
expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1643
self.assertEqual([expected_key],
1644
list(node.serialise(store)))
1645
self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
1646
self.assertEqual(expected_key, node.key())
1648
def test_serialise_items(self):
1649
store = self.get_chk_bytes()
1651
node.set_maximum_size(10)
1652
node.map(None, ("foo bar",), "baz quux")
1653
expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1654
self.assertEqual('foo bar', node._common_serialised_prefix)
1655
self.assertEqual([expected_key],
1656
list(node.serialise(store)))
1657
self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1658
self.read_bytes(store, expected_key))
1659
self.assertEqual(expected_key, node.key())
1661
def test_unique_serialised_prefix_empty_new(self):
1663
self.assertIs(None, node._compute_search_prefix())
1665
def test_unique_serialised_prefix_one_item_new(self):
1667
node.map(None, ("foo bar", "baz"), "baz quux")
1668
self.assertEqual("foo bar\x00baz", node._compute_search_prefix())
1670
def test_unmap_missing(self):
1672
self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
1674
def test_unmap_present(self):
1676
node.map(None, ("foo bar",), "baz quux")
1677
result = node.unmap(None, ("foo bar",))
1678
self.assertEqual(node, result)
1679
self.assertEqual({}, self.to_dict(node, None))
1680
self.assertEqual(0, len(node))
1682
def test_map_maintains_common_prefixes(self):
1685
node.map(None, ("foo bar", "baz"), "baz quux")
1686
self.assertEqual('foo bar\x00baz', node._search_prefix)
1687
self.assertEqual('foo bar\x00baz', node._common_serialised_prefix)
1688
node.map(None, ("foo bar", "bing"), "baz quux")
1689
self.assertEqual('foo bar\x00b', node._search_prefix)
1690
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1691
node.map(None, ("fool", "baby"), "baz quux")
1692
self.assertEqual('foo', node._search_prefix)
1693
self.assertEqual('foo', node._common_serialised_prefix)
1694
node.map(None, ("foo bar", "baz"), "replaced")
1695
self.assertEqual('foo', node._search_prefix)
1696
self.assertEqual('foo', node._common_serialised_prefix)
1697
node.map(None, ("very", "different"), "value")
1698
self.assertEqual('', node._search_prefix)
1699
self.assertEqual('', node._common_serialised_prefix)
1701
def test_unmap_maintains_common_prefixes(self):
1704
node.map(None, ("foo bar", "baz"), "baz quux")
1705
node.map(None, ("foo bar", "bing"), "baz quux")
1706
node.map(None, ("fool", "baby"), "baz quux")
1707
node.map(None, ("very", "different"), "value")
1708
self.assertEqual('', node._search_prefix)
1709
self.assertEqual('', node._common_serialised_prefix)
1710
node.unmap(None, ("very", "different"))
1711
self.assertEqual("foo", node._search_prefix)
1712
self.assertEqual("foo", node._common_serialised_prefix)
1713
node.unmap(None, ("fool", "baby"))
1714
self.assertEqual('foo bar\x00b', node._search_prefix)
1715
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1716
node.unmap(None, ("foo bar", "baz"))
1717
self.assertEqual('foo bar\x00bing', node._search_prefix)
1718
self.assertEqual('foo bar\x00bing', node._common_serialised_prefix)
1719
node.unmap(None, ("foo bar", "bing"))
1720
self.assertEqual(None, node._search_prefix)
1721
self.assertEqual(None, node._common_serialised_prefix)
1724
class TestInternalNode(TestCaseWithStore):
1726
def test_add_node_empty_new(self):
1727
node = InternalNode('fo')
1729
child.set_maximum_size(100)
1730
child.map(None, ("foo",), "bar")
1731
node.add_node("foo", child)
1732
# Note that node isn't strictly valid now as a tree (only one child),
1733
# but thats ok for this test.
1734
# The first child defines the node's width:
1735
self.assertEqual(3, node._node_width)
1736
# We should be able to iterate over the contents without doing IO.
1737
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
1738
# The length should be known:
1739
self.assertEqual(1, len(node))
1740
# serialising the node should serialise the child and the node.
1741
chk_bytes = self.get_chk_bytes()
1742
keys = list(node.serialise(chk_bytes))
1743
child_key = child.serialise(chk_bytes)[0]
1745
[child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1747
# We should be able to access deserialised content.
1748
bytes = self.read_bytes(chk_bytes, keys[1])
1749
node = chk_map._deserialise(bytes, keys[1], None)
1750
self.assertEqual(1, len(node))
1751
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
1752
self.assertEqual(3, node._node_width)
1754
def test_add_node_resets_key_new(self):
1755
node = InternalNode('fo')
1757
child.set_maximum_size(100)
1758
child.map(None, ("foo",), "bar")
1759
node.add_node("foo", child)
1760
chk_bytes = self.get_chk_bytes()
1761
keys = list(node.serialise(chk_bytes))
1762
self.assertEqual(keys[1], node._key)
1763
node.add_node("fos", child)
1764
self.assertEqual(None, node._key)
1766
# def test_add_node_empty_oversized_one_ok_new(self):
1767
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1768
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1769
# def test_add_node_one_oversized_second_splits_errors(self):
1771
def test__iter_nodes_no_key_filter(self):
1772
node = InternalNode('')
1774
child.set_maximum_size(100)
1775
child.map(None, ("foo",), "bar")
1776
node.add_node("f", child)
1778
child.set_maximum_size(100)
1779
child.map(None, ("bar",), "baz")
1780
node.add_node("b", child)
1782
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1783
self.assertEqual(None, node_key_filter)
1785
def test__iter_nodes_splits_key_filter(self):
1786
node = InternalNode('')
1788
child.set_maximum_size(100)
1789
child.map(None, ("foo",), "bar")
1790
node.add_node("f", child)
1792
child.set_maximum_size(100)
1793
child.map(None, ("bar",), "baz")
1794
node.add_node("b", child)
1796
# foo and bar both match exactly one leaf node, but 'cat' should not
1797
# match any, and should not be placed in one.
1798
key_filter = (('foo',), ('bar',), ('cat',))
1799
for child, node_key_filter in node._iter_nodes(None,
1800
key_filter=key_filter):
1801
# each child could only match one key filter, so make sure it was
1803
self.assertEqual(1, len(node_key_filter))
1805
def test__iter_nodes_with_multiple_matches(self):
1806
node = InternalNode('')
1808
child.set_maximum_size(100)
1809
child.map(None, ("foo",), "val")
1810
child.map(None, ("fob",), "val")
1811
node.add_node("f", child)
1813
child.set_maximum_size(100)
1814
child.map(None, ("bar",), "val")
1815
child.map(None, ("baz",), "val")
1816
node.add_node("b", child)
1818
# Note that 'ram' doesn't match anything, so it should be freely
1820
key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
1821
for child, node_key_filter in node._iter_nodes(None,
1822
key_filter=key_filter):
1823
# each child could match two key filters, so make sure they were
1825
self.assertEqual(2, len(node_key_filter))
1827
def make_fo_fa_node(self):
1828
node = InternalNode('f')
1830
child.set_maximum_size(100)
1831
child.map(None, ("foo",), "val")
1832
child.map(None, ("fob",), "val")
1833
node.add_node('fo', child)
1835
child.set_maximum_size(100)
1836
child.map(None, ("far",), "val")
1837
child.map(None, ("faz",), "val")
1838
node.add_node("fa", child)
1841
def test__iter_nodes_single_entry(self):
1842
node = self.make_fo_fa_node()
1843
key_filter = [('foo',)]
1844
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1845
self.assertEqual(1, len(nodes))
1846
self.assertEqual(key_filter, nodes[0][1])
1848
def test__iter_nodes_single_entry_misses(self):
1849
node = self.make_fo_fa_node()
1850
key_filter = [('bar',)]
1851
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1852
self.assertEqual(0, len(nodes))
1854
def test__iter_nodes_mixed_key_width(self):
1855
node = self.make_fo_fa_node()
1856
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
1857
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1858
self.assertEqual(1, len(nodes))
1859
matches = key_filter[:]
1860
matches.remove(('b',))
1861
self.assertEqual(sorted(matches), sorted(nodes[0][1]))
1863
def test__iter_nodes_match_all(self):
1864
node = self.make_fo_fa_node()
1865
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
1866
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1867
self.assertEqual(2, len(nodes))
1869
def test__iter_nodes_fixed_widths_and_misses(self):
1870
node = self.make_fo_fa_node()
1871
# foo and faa should both match one child, baz should miss
1872
key_filter = [('foo',), ('faa',), ('baz',)]
1873
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1874
self.assertEqual(2, len(nodes))
1875
for node, matches in nodes:
1876
self.assertEqual(1, len(matches))
1878
def test_iteritems_empty_new(self):
1879
node = InternalNode()
1880
self.assertEqual([], sorted(node.iteritems(None)))
1882
def test_iteritems_two_children(self):
1883
node = InternalNode()
1885
leaf1.map(None, ('foo bar',), 'quux')
1887
leaf2.map(None, ('strange',), 'beast')
1888
node.add_node("f", leaf1)
1889
node.add_node("s", leaf2)
1890
self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
1891
sorted(node.iteritems(None)))
1893
def test_iteritems_two_children_partial(self):
1894
node = InternalNode()
1896
leaf1.map(None, ('foo bar',), 'quux')
1898
leaf2.map(None, ('strange',), 'beast')
1899
node.add_node("f", leaf1)
1900
# This sets up a path that should not be followed - it will error if
1901
# the code tries to.
1902
node._items['f'] = None
1903
node.add_node("s", leaf2)
1904
self.assertEqual([(('strange',), 'beast')],
1905
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1907
def test_iteritems_two_children_with_hash(self):
1908
search_key_func = chk_map.search_key_registry.get('hash-255-way')
1909
node = InternalNode(search_key_func=search_key_func)
1910
leaf1 = LeafNode(search_key_func=search_key_func)
1911
leaf1.map(None, ('foo bar',), 'quux')
1912
leaf2 = LeafNode(search_key_func=search_key_func)
1913
leaf2.map(None, ('strange',), 'beast')
1914
self.assertEqual('\xbeF\x014', search_key_func(('foo bar',)))
1915
self.assertEqual('\x85\xfa\xf7K', search_key_func(('strange',)))
1916
node.add_node("\xbe", leaf1)
1917
# This sets up a path that should not be followed - it will error if
1918
# the code tries to.
1919
node._items['\xbe'] = None
1920
node.add_node("\x85", leaf2)
1921
self.assertEqual([(('strange',), 'beast')],
1922
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1924
def test_iteritems_partial_empty(self):
1925
node = InternalNode()
1926
self.assertEqual([], sorted(node.iteritems([('missing',)])))
1928
def test_map_to_new_child_new(self):
1929
chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
1930
chkmap._ensure_root()
1931
node = chkmap._root_node
1932
# Ensure test validity: nothing paged in below the root.
1934
len([value for value in node._items.values()
1935
if type(value) == tuple]))
1936
# now, mapping to k3 should add a k3 leaf
1937
prefix, nodes = node.map(None, ('k3',), 'quux')
1938
self.assertEqual("k", prefix)
1939
self.assertEqual([("", node)], nodes)
1940
# check new child details
1941
child = node._items['k3']
1942
self.assertIsInstance(child, LeafNode)
1943
self.assertEqual(1, len(child))
1944
self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
1945
self.assertEqual(None, child._key)
1946
self.assertEqual(10, child.maximum_size)
1947
self.assertEqual(1, child._key_width)
1948
# Check overall structure:
1949
self.assertEqual(3, len(chkmap))
1950
self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
1951
self.to_dict(chkmap))
1952
# serialising should only serialise the new data - k3 and the internal
1954
keys = list(node.serialise(chkmap._store))
1955
child_key = child.serialise(chkmap._store)[0]
1956
self.assertEqual([child_key, keys[1]], keys)
1958
def test_map_to_child_child_splits_new(self):
1959
chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
1960
# Check for the canonical root value for this tree:
1961
self.assertEqualDiff("'' InternalNode\n"
1966
, chkmap._dump_tree())
1967
# _dump_tree pages everything in, so reload using just the root
1968
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1969
chkmap._ensure_root()
1970
node = chkmap._root_node
1971
# Ensure test validity: nothing paged in below the root.
1973
len([value for value in node._items.values()
1974
if type(value) == tuple]))
1975
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1976
# k23, which for simplicity in the current implementation generates
1977
# a new internal node between node, and k22/k23.
1978
prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
1979
self.assertEqual("k", prefix)
1980
self.assertEqual([("", node)], nodes)
1981
# check new child details
1982
child = node._items['k2']
1983
self.assertIsInstance(child, InternalNode)
1984
self.assertEqual(2, len(child))
1985
self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
1986
self.to_dict(child, None))
1987
self.assertEqual(None, child._key)
1988
self.assertEqual(10, child.maximum_size)
1989
self.assertEqual(1, child._key_width)
1990
self.assertEqual(3, child._node_width)
1991
# Check overall structure:
1992
self.assertEqual(3, len(chkmap))
1993
self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
1994
self.to_dict(chkmap))
1995
# serialising should only serialise the new data - although k22 hasn't
1996
# changed because its a special corner case (splitting on with only one
1997
# key leaves one node unaltered), in general k22 is serialised, so we
1998
# expect k22, k23, the new internal node, and node, to be serialised.
1999
keys = list(node.serialise(chkmap._store))
2000
child_key = child._key
2001
k22_key = child._items['k22']._key
2002
k23_key = child._items['k23']._key
2003
self.assertEqual([k22_key, k23_key, child_key, node.key()], keys)
2004
self.assertEqualDiff("'' InternalNode\n"
2007
" 'k2' InternalNode\n"
2011
" ('k23',) 'quux'\n"
2012
, chkmap._dump_tree())
2014
def test__search_prefix_filter_with_hash(self):
2015
search_key_func = chk_map.search_key_registry.get('hash-16-way')
2016
node = InternalNode(search_key_func=search_key_func)
2018
node._node_width = 4
2019
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
2020
self.assertEqual('E8B7', node._search_prefix_filter(('a', 'b')))
2021
self.assertEqual('E8B7', node._search_prefix_filter(('a',)))
2023
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
2024
chkmap = self._get_map(
2025
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2026
# Check we have the expected tree.
2027
self.assertEqualDiff("'' InternalNode\n"
2030
" 'k2' InternalNode\n"
2034
" ('k23',) 'quux'\n"
2035
, chkmap._dump_tree())
2036
chkmap = CHKMap(chkmap._store, chkmap._root_node)
2037
chkmap._ensure_root()
2038
node = chkmap._root_node
2039
# unmapping k23 should give us a root, with k1 and k22 as direct
2041
result = node.unmap(chkmap._store, ('k23',))
2042
# check the pointed-at object within node - k2 should now point at the
2043
# k22 leaf (which has been paged in to see if we can collapse the tree)
2044
child = node._items['k2']
2045
self.assertIsInstance(child, LeafNode)
2046
self.assertEqual(1, len(child))
2047
self.assertEqual({('k22',): 'bar'},
2048
self.to_dict(child, None))
2049
# Check overall structure is instact:
2050
self.assertEqual(2, len(chkmap))
2051
self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
2052
self.to_dict(chkmap))
2053
# serialising should only serialise the new data - the root node.
2054
keys = list(node.serialise(chkmap._store))
2055
self.assertEqual([keys[-1]], keys)
2056
chkmap = CHKMap(chkmap._store, keys[-1])
2057
self.assertEqualDiff("'' InternalNode\n"
2062
, chkmap._dump_tree())
2064
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
2065
chkmap = self._get_map(
2066
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
2067
self.assertEqualDiff("'' InternalNode\n"
2070
" 'k2' InternalNode\n"
2074
" ('k23',) 'quux'\n"
2075
, chkmap._dump_tree())
2076
orig_root = chkmap._root_node
2077
chkmap = CHKMap(chkmap._store, orig_root)
2078
chkmap._ensure_root()
2079
node = chkmap._root_node
2080
k2_ptr = node._items['k2']
2081
# unmapping k1 should give us a root, with k22 and k23 as direct
2082
# children, and should not have needed to page in the subtree.
2083
result = node.unmap(chkmap._store, ('k1',))
2084
self.assertEqual(k2_ptr, result)
2085
chkmap = CHKMap(chkmap._store, orig_root)
2086
# Unmapping at the CHKMap level should switch to the new root
2087
chkmap.unmap(('k1',))
2088
self.assertEqual(k2_ptr, chkmap._root_node)
2089
self.assertEqualDiff("'' InternalNode\n"
2093
" ('k23',) 'quux'\n"
2094
, chkmap._dump_tree())
2098
# map -> fits - done
2099
# map -> doesn't fit - shrink from left till fits
2100
# key data to return: the common prefix, new nodes.
2102
# unmap -> how to tell if siblings can be combined.
2103
# combing leaf nodes means expanding the prefix to the left; so gather the size of
2104
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
2105
# is an internal node, we know that that is a dense subtree - can't combine.
2106
# otherwise as soon as the sum of serialised values exceeds the split threshold
2107
# we know we can't combine - stop.
2108
# unmap -> key return data - space in node, common prefix length? and key count
2110
# variable length prefixes? -> later start with fixed width to get something going
2111
# map -> fits - update pointer to leaf
2112
# return [prefix and node] - seems sound.
2113
# map -> doesn't fit - find unique prefix and shift right
2114
# create internal nodes for all the partitions, return list of unique
2115
# prefixes and nodes.
2116
# map -> new prefix - create a leaf
2117
# unmap -> if child key count 0, remove
2118
# unmap -> return space in node, common prefix length? (why?), key count
2120
# map, if 1 node returned, use it, otherwise make an internal and populate.
2121
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
2123
# map inits as empty leafnode.
2129
# AA, AB, AC, AD, BA
2130
# packed internal node - ideal:
2131
# AA, AB, AC, AD, BA
2132
# single byte fanout - A,B, AA,AB,AC,AD, BA
2135
# AB - split, but we want to end up with AB, BA, in one node, with
2139
class TestCHKMapDifference(TestCaseWithExampleMaps):
2141
def get_difference(self, new_roots, old_roots,
2142
search_key_func=None):
2143
if search_key_func is None:
2144
search_key_func = chk_map._search_key_plain
2145
return chk_map.CHKMapDifference(self.get_chk_bytes(),
2146
new_roots, old_roots, search_key_func)
2148
def test__init__(self):
2149
c_map = self.make_root_only_map()
2151
c_map.map(('aaa',), 'new aaa content')
2152
key2 = c_map._save()
2153
diff = self.get_difference([key2], [key1])
2154
self.assertEqual(set([key1]), diff._all_old_chks)
2155
self.assertEqual([], diff._old_queue)
2156
self.assertEqual([], diff._new_queue)
2158
def help__read_all_roots(self, search_key_func):
2159
c_map = self.make_root_only_map(search_key_func=search_key_func)
2161
c_map.map(('aaa',), 'new aaa content')
2162
key2 = c_map._save()
2163
diff = self.get_difference([key2], [key1], search_key_func)
2164
root_results = [record.key for record in diff._read_all_roots()]
2165
self.assertEqual([key2], root_results)
2166
# We should have queued up only items that aren't in the old
2168
self.assertEqual([(('aaa',), 'new aaa content')],
2169
diff._new_item_queue)
2170
self.assertEqual([], diff._new_queue)
2171
# And there are no old references, so that queue should be
2173
self.assertEqual([], diff._old_queue)
2175
def test__read_all_roots_plain(self):
2176
self.help__read_all_roots(search_key_func=chk_map._search_key_plain)
2178
def test__read_all_roots_16(self):
2179
self.help__read_all_roots(search_key_func=chk_map._search_key_16)
2181
def test__read_all_roots_skips_known_old(self):
2182
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2184
c_map2 = self.make_root_only_map(chk_map._search_key_plain)
2186
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2187
root_results = [record.key for record in diff._read_all_roots()]
2188
# We should have no results. key2 is completely contained within key1,
2189
# and we should have seen that in the first pass
2190
self.assertEqual([], root_results)
2192
def test__read_all_roots_prepares_queues(self):
2193
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2195
c_map._dump_tree() # load everything
2196
key1_a = c_map._root_node._items['a'].key()
2197
c_map.map(('abb',), 'new abb content')
2198
key2 = c_map._save()
2199
key2_a = c_map._root_node._items['a'].key()
2200
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2201
root_results = [record.key for record in diff._read_all_roots()]
2202
self.assertEqual([key2], root_results)
2203
# At this point, we should have queued up only the 'a' Leaf on both
2204
# sides, both 'c' and 'd' are known to not have changed on both sides
2205
self.assertEqual([key2_a], diff._new_queue)
2206
self.assertEqual([], diff._new_item_queue)
2207
self.assertEqual([key1_a], diff._old_queue)
2209
def test__read_all_roots_multi_new_prepares_queues(self):
2210
c_map = self.make_one_deep_map(chk_map._search_key_plain)
2212
c_map._dump_tree() # load everything
2213
key1_a = c_map._root_node._items['a'].key()
2214
key1_c = c_map._root_node._items['c'].key()
2215
c_map.map(('abb',), 'new abb content')
2216
key2 = c_map._save()
2217
key2_a = c_map._root_node._items['a'].key()
2218
key2_c = c_map._root_node._items['c'].key()
2219
c_map = chk_map.CHKMap(self.get_chk_bytes(), key1,
2220
chk_map._search_key_plain)
2221
c_map.map(('ccc',), 'new ccc content')
2222
key3 = c_map._save()
2223
key3_a = c_map._root_node._items['a'].key()
2224
key3_c = c_map._root_node._items['c'].key()
2225
diff = self.get_difference([key2, key3], [key1],
2226
chk_map._search_key_plain)
2227
root_results = [record.key for record in diff._read_all_roots()]
2228
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2229
# We should have queued up key2_a, and key3_c, but not key2_c or key3_c
2230
self.assertEqual([key2_a, key3_c], diff._new_queue)
2231
self.assertEqual([], diff._new_item_queue)
2232
# And we should have queued up both a and c for the old set
2233
self.assertEqual([key1_a, key1_c], diff._old_queue)
2235
def test__read_all_roots_different_depths(self):
2236
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2237
c_map._dump_tree() # load everything
2239
key1_a = c_map._root_node._items['a'].key()
2240
key1_c = c_map._root_node._items['c'].key()
2241
key1_d = c_map._root_node._items['d'].key()
2243
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2246
key2_aa = c_map2._root_node._items['aa'].key()
2247
key2_ad = c_map2._root_node._items['ad'].key()
2249
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2250
root_results = [record.key for record in diff._read_all_roots()]
2251
self.assertEqual([key2], root_results)
2252
# Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
2254
self.assertEqual([key1_a], diff._old_queue)
2255
self.assertEqual([key2_aa, key2_ad], diff._new_queue)
2256
self.assertEqual([], diff._new_item_queue)
2258
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2259
root_results = [record.key for record in diff._read_all_roots()]
2260
self.assertEqual([key1], root_results)
2262
self.assertEqual([key2_aa, key2_ad], diff._old_queue)
2263
self.assertEqual([key1_a, key1_c, key1_d], diff._new_queue)
2264
self.assertEqual([], diff._new_item_queue)
2266
def test__read_all_roots_different_depths_16(self):
2267
c_map = self.make_two_deep_map(chk_map._search_key_16)
2268
c_map._dump_tree() # load everything
2270
key1_2 = c_map._root_node._items['2'].key()
2271
key1_4 = c_map._root_node._items['4'].key()
2272
key1_C = c_map._root_node._items['C'].key()
2273
key1_F = c_map._root_node._items['F'].key()
2275
c_map2 = self.make_one_deep_two_prefix_map(chk_map._search_key_16)
2278
key2_F0 = c_map2._root_node._items['F0'].key()
2279
key2_F3 = c_map2._root_node._items['F3'].key()
2280
key2_F4 = c_map2._root_node._items['F4'].key()
2281
key2_FD = c_map2._root_node._items['FD'].key()
2283
diff = self.get_difference([key2], [key1], chk_map._search_key_16)
2284
root_results = [record.key for record in diff._read_all_roots()]
2285
self.assertEqual([key2], root_results)
2286
# Only the subset of keys that may be present should be queued up.
2287
self.assertEqual([key1_F], diff._old_queue)
2288
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2289
sorted(diff._new_queue))
2290
self.assertEqual([], diff._new_item_queue)
2292
diff = self.get_difference([key1], [key2], chk_map._search_key_16)
2293
root_results = [record.key for record in diff._read_all_roots()]
2294
self.assertEqual([key1], root_results)
2296
self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
2297
sorted(diff._old_queue))
2298
self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
2299
sorted(diff._new_queue))
2300
self.assertEqual([], diff._new_item_queue)
2302
def test__read_all_roots_mixed_depth(self):
2303
c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
2304
c_map._dump_tree() # load everything
2306
key1_aa = c_map._root_node._items['aa'].key()
2307
key1_ad = c_map._root_node._items['ad'].key()
2309
c_map2 = self.make_one_deep_one_prefix_map(chk_map._search_key_plain)
2312
key2_a = c_map2._root_node._items['a'].key()
2313
key2_b = c_map2._root_node._items['b'].key()
2315
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2316
root_results = [record.key for record in diff._read_all_roots()]
2317
self.assertEqual([key2], root_results)
2318
# 'ad' matches exactly 'a' on the other side, so it should be removed,
2319
# and neither side should have it queued for walking
2320
self.assertEqual([], diff._old_queue)
2321
self.assertEqual([key2_b], diff._new_queue)
2322
self.assertEqual([], diff._new_item_queue)
2324
diff = self.get_difference([key1], [key2], chk_map._search_key_plain)
2325
root_results = [record.key for record in diff._read_all_roots()]
2326
self.assertEqual([key1], root_results)
2327
# Note: This is technically not the 'true minimal' set that we could
2328
# use The reason is that 'a' was matched exactly to 'ad' (by sha
2329
# sum). However, the code gets complicated in the case of more
2330
# than one interesting key, so for now, we live with this
2331
# Consider revising, though benchmarking showing it to be a
2332
# real-world issue should be done
2333
self.assertEqual([key2_a], diff._old_queue)
2334
# self.assertEqual([], diff._old_queue)
2335
self.assertEqual([key1_aa], diff._new_queue)
2336
self.assertEqual([], diff._new_item_queue)
2338
def test__read_all_roots_yields_extra_deep_records(self):
2339
# This is slightly controversial, as we will yield a chk page that we
2340
# might later on find out could be filtered out. (If a root node is
2341
# referenced deeper in the old set.)
2342
# However, even with stacking, we always have all chk pages that we
2343
# will need. So as long as we filter out the referenced keys, we'll
2344
# never run into problems.
2345
# This allows us to yield a root node record immediately, without any
2347
c_map = self.make_two_deep_map(chk_map._search_key_plain)
2348
c_map._dump_tree() # load all keys
2350
key1_a = c_map._root_node._items['a'].key()
2351
c_map2 = self.get_map({
2352
('acc',): 'initial acc content',
2353
('ace',): 'initial ace content',
2354
}, maximum_size=100)
2355
self.assertEqualDiff(
2357
" ('acc',) 'initial acc content'\n"
2358
" ('ace',) 'initial ace content'\n",
2359
c_map2._dump_tree())
2361
diff = self.get_difference([key2], [key1], chk_map._search_key_plain)
2362
root_results = [record.key for record in diff._read_all_roots()]
2363
self.assertEqual([key2], root_results)
2364
# However, even though we have yielded the root node to be fetched,
2365
# we should have enqued all of the chk pages to be walked, so that we
2366
# can find the keys if they are present
2367
self.assertEqual([key1_a], diff._old_queue)
2368
self.assertEqual([(('acc',), 'initial acc content'),
2369
(('ace',), 'initial ace content'),
2370
], diff._new_item_queue)
2372
def test__read_all_roots_multiple_targets(self):
2373
c_map = self.make_root_only_map()
2375
c_map = self.make_one_deep_map()
2378
key2_c = c_map._root_node._items['c'].key()
2379
key2_d = c_map._root_node._items['d'].key()
2380
c_map.map(('ccc',), 'new ccc value')
2381
key3 = c_map._save()
2382
key3_c = c_map._root_node._items['c'].key()
2383
diff = self.get_difference([key2, key3], [key1],
2384
chk_map._search_key_plain)
2385
root_results = [record.key for record in diff._read_all_roots()]
2386
self.assertEqual(sorted([key2, key3]), sorted(root_results))
2387
self.assertEqual([], diff._old_queue)
2388
# the key 'd' is interesting from key2 and key3, but should only be
2389
# entered into the queue 1 time
2390
self.assertEqual(sorted([key2_c, key3_c, key2_d]),
2391
sorted(diff._new_queue))
2392
self.assertEqual([], diff._new_item_queue)
2394
def test__read_all_roots_no_old(self):
2395
# This is the 'initial branch' case. With nothing in the old
2396
# set, we can just queue up all root nodes into interesting queue, and
2397
# then have them fast-path flushed via _flush_new_queue
2398
c_map = self.make_two_deep_map()
2400
diff = self.get_difference([key1], [], chk_map._search_key_plain)
2401
root_results = [record.key for record in diff._read_all_roots()]
2402
self.assertEqual([], root_results)
2403
self.assertEqual([], diff._old_queue)
2404
self.assertEqual([key1], diff._new_queue)
2405
self.assertEqual([], diff._new_item_queue)
2407
c_map2 = self.make_one_deep_map()
2409
diff = self.get_difference([key1, key2], [], chk_map._search_key_plain)
2410
root_results = [record.key for record in diff._read_all_roots()]
2411
self.assertEqual([], root_results)
2412
self.assertEqual([], diff._old_queue)
2413
self.assertEqual(sorted([key1, key2]), sorted(diff._new_queue))
2414
self.assertEqual([], diff._new_item_queue)
2416
def test__read_all_roots_no_old_16(self):
2417
c_map = self.make_two_deep_map(chk_map._search_key_16)
2419
diff = self.get_difference([key1], [], chk_map._search_key_16)
2420
root_results = [record.key for record in diff._read_all_roots()]
2421
self.assertEqual([], root_results)
2422
self.assertEqual([], diff._old_queue)
2423
self.assertEqual([key1], diff._new_queue)
2424
self.assertEqual([], diff._new_item_queue)
2426
c_map2 = self.make_one_deep_map(chk_map._search_key_16)
2428
diff = self.get_difference([key1, key2], [],
2429
chk_map._search_key_16)
2430
root_results = [record.key for record in diff._read_all_roots()]
2431
self.assertEqual([], root_results)
2432
self.assertEqual([], diff._old_queue)
2433
self.assertEqual(sorted([key1, key2]),
2434
sorted(diff._new_queue))
2435
self.assertEqual([], diff._new_item_queue)
2437
def test__read_all_roots_multiple_old(self):
2438
c_map = self.make_two_deep_map()
2440
c_map._dump_tree() # load everything
2441
key1_a = c_map._root_node._items['a'].key()
2442
c_map.map(('ccc',), 'new ccc value')
2443
key2 = c_map._save()
2444
key2_a = c_map._root_node._items['a'].key()
2445
c_map.map(('add',), 'new add value')
2446
key3 = c_map._save()
2447
key3_a = c_map._root_node._items['a'].key()
2448
diff = self.get_difference([key3], [key1, key2],
2449
chk_map._search_key_plain)
2450
root_results = [record.key for record in diff._read_all_roots()]
2451
self.assertEqual([key3], root_results)
2452
# the 'a' keys should not be queued up 2 times, since they are
2454
self.assertEqual([key1_a], diff._old_queue)
2455
self.assertEqual([key3_a], diff._new_queue)
2456
self.assertEqual([], diff._new_item_queue)
2458
def test__process_next_old_batched_no_dupes(self):
2459
c_map = self.make_two_deep_map()
2461
c_map._dump_tree() # load everything
2462
key1_a = c_map._root_node._items['a'].key()
2463
key1_aa = c_map._root_node._items['a']._items['aa'].key()
2464
key1_ab = c_map._root_node._items['a']._items['ab'].key()
2465
key1_ac = c_map._root_node._items['a']._items['ac'].key()
2466
key1_ad = c_map._root_node._items['a']._items['ad'].key()
2467
c_map.map(('aaa',), 'new aaa value')
2468
key2 = c_map._save()
2469
key2_a = c_map._root_node._items['a'].key()
2470
key2_aa = c_map._root_node._items['a']._items['aa'].key()
2471
c_map.map(('acc',), 'new acc content')
2472
key3 = c_map._save()
2473
key3_a = c_map._root_node._items['a'].key()
2474
key3_ac = c_map._root_node._items['a']._items['ac'].key()
2475
diff = self.get_difference([key3], [key1, key2],
2476
chk_map._search_key_plain)
2477
root_results = [record.key for record in diff._read_all_roots()]
2478
self.assertEqual([key3], root_results)
2479
self.assertEqual(sorted([key1_a, key2_a]),
2480
sorted(diff._old_queue))
2481
self.assertEqual([key3_a], diff._new_queue)
2482
self.assertEqual([], diff._new_item_queue)
2483
diff._process_next_old()
2484
# All of the old records should be brought in and queued up,
2485
# but we should not have any duplicates
2486
self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
2487
sorted(diff._old_queue))
2490
class TestIterInterestingNodes(TestCaseWithExampleMaps):
2492
def get_map_key(self, a_dict, maximum_size=10):
2493
c_map = self.get_map(a_dict, maximum_size=maximum_size)
2496
def assertIterInteresting(self, records, items, interesting_keys,
2498
"""Check the result of iter_interesting_nodes.
2500
Note that we no longer care how many steps are taken, etc, just that
2501
the right contents are returned.
2503
:param records: A list of record keys that should be yielded
2504
:param items: A list of items (key,value) that should be yielded.
2506
store = self.get_chk_bytes()
2507
store._search_key_func = chk_map._search_key_plain
2508
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
2512
for record, new_items in iter_nodes:
2513
if record is not None:
2514
record_keys.append(record.key)
2516
all_items.extend(new_items)
2517
self.assertEqual(sorted(records), sorted(record_keys))
2518
self.assertEqual(sorted(items), sorted(all_items))
2520
def test_empty_to_one_keys(self):
2521
target = self.get_map_key({('a',): 'content'})
2522
self.assertIterInteresting([target],
2523
[(('a',), 'content')],
2526
def test_none_to_one_key(self):
2527
basis = self.get_map_key({})
2528
target = self.get_map_key({('a',): 'content'})
2529
self.assertIterInteresting([target],
2530
[(('a',), 'content')],
2533
def test_one_to_none_key(self):
2534
basis = self.get_map_key({('a',): 'content'})
2535
target = self.get_map_key({})
2536
self.assertIterInteresting([target],
2540
def test_common_pages(self):
2541
basis = self.get_map_key({('a',): 'content',
2545
target = self.get_map_key({('a',): 'content',
2546
('b',): 'other content',
2549
target_map = CHKMap(self.get_chk_bytes(), target)
2550
self.assertEqualDiff(
2553
" ('a',) 'content'\n"
2555
" ('b',) 'other content'\n"
2557
" ('c',) 'content'\n",
2558
target_map._dump_tree())
2559
b_key = target_map._root_node._items['b'].key()
2560
# This should return the root node, and the node for the 'b' key
2561
self.assertIterInteresting([target, b_key],
2562
[(('b',), 'other content')],
2565
def test_common_sub_page(self):
2566
basis = self.get_map_key({('aaa',): 'common',
2569
target = self.get_map_key({('aaa',): 'common',
2573
target_map = CHKMap(self.get_chk_bytes(), target)
2574
self.assertEqualDiff(
2576
" 'a' InternalNode\n"
2578
" ('aaa',) 'common'\n"
2582
" ('c',) 'common'\n",
2583
target_map._dump_tree())
2584
# The key for the internal aa node
2585
a_key = target_map._root_node._items['a'].key()
2586
# The key for the leaf aab node
2587
# aaa_key = target_map._root_node._items['a']._items['aaa'].key()
2588
aab_key = target_map._root_node._items['a']._items['aab'].key()
2589
self.assertIterInteresting([target, a_key, aab_key],
2590
[(('aab',), 'new')],
2593
def test_common_leaf(self):
2594
basis = self.get_map_key({})
2595
target1 = self.get_map_key({('aaa',): 'common'})
2596
target2 = self.get_map_key({('aaa',): 'common',
2599
target3 = self.get_map_key({('aaa',): 'common',
2603
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
2604
# Once as a root node, once as a second layer, and once as a third
2605
# layer. It should only be returned one time regardless
2606
target1_map = CHKMap(self.get_chk_bytes(), target1)
2607
self.assertEqualDiff(
2609
" ('aaa',) 'common'\n",
2610
target1_map._dump_tree())
2611
target2_map = CHKMap(self.get_chk_bytes(), target2)
2612
self.assertEqualDiff(
2615
" ('aaa',) 'common'\n"
2617
" ('bbb',) 'new'\n",
2618
target2_map._dump_tree())
2619
target3_map = CHKMap(self.get_chk_bytes(), target3)
2620
self.assertEqualDiff(
2622
" 'a' InternalNode\n"
2624
" ('aaa',) 'common'\n"
2626
" ('aac',) 'other'\n"
2628
" ('bbb',) 'new'\n",
2629
target3_map._dump_tree())
2630
aaa_key = target1_map._root_node.key()
2631
b_key = target2_map._root_node._items['b'].key()
2632
a_key = target3_map._root_node._items['a'].key()
2633
aac_key = target3_map._root_node._items['a']._items['aac'].key()
2634
self.assertIterInteresting(
2635
[target1, target2, target3, a_key, aac_key, b_key],
2636
[(('aaa',), 'common'), (('bbb',), 'new'), (('aac',), 'other')],
2637
[target1, target2, target3], [basis])
2639
self.assertIterInteresting(
2640
[target2, target3, a_key, aac_key, b_key],
2641
[(('bbb',), 'new'), (('aac',), 'other')],
2642
[target2, target3], [target1])
2644
# Technically, target1 could be filtered out, but since it is a root
2645
# node, we yield it immediately, rather than waiting to find out much
2647
self.assertIterInteresting(
2650
[target1], [target3])
2652
def test_multiple_maps(self):
2653
basis1 = self.get_map_key({('aaa',): 'common',
2656
basis2 = self.get_map_key({('bbb',): 'common',
2659
target1 = self.get_map_key({('aaa',): 'common',
2660
('aac',): 'target1',
2663
target2 = self.get_map_key({('aaa',): 'common',
2664
('bba',): 'target2',
2667
target1_map = CHKMap(self.get_chk_bytes(), target1)
2668
self.assertEqualDiff(
2670
" 'a' InternalNode\n"
2672
" ('aaa',) 'common'\n"
2674
" ('aac',) 'target1'\n"
2676
" ('bbb',) 'common'\n",
2677
target1_map._dump_tree())
2678
# The key for the target1 internal a node
2679
a_key = target1_map._root_node._items['a'].key()
2680
# The key for the leaf aac node
2681
aac_key = target1_map._root_node._items['a']._items['aac'].key()
2683
target2_map = CHKMap(self.get_chk_bytes(), target2)
2684
self.assertEqualDiff(
2687
" ('aaa',) 'common'\n"
2688
" 'b' InternalNode\n"
2690
" ('bba',) 'target2'\n"
2692
" ('bbb',) 'common'\n",
2693
target2_map._dump_tree())
2694
# The key for the target2 internal bb node
2695
b_key = target2_map._root_node._items['b'].key()
2696
# The key for the leaf bba node
2697
bba_key = target2_map._root_node._items['b']._items['bba'].key()
2698
self.assertIterInteresting(
2699
[target1, target2, a_key, aac_key, b_key, bba_key],
2700
[(('aac',), 'target1'), (('bba',), 'target2')],
2701
[target1, target2], [basis1, basis2])
2703
def test_multiple_maps_overlapping_common_new(self):
2704
# Test that when a node found through the interesting_keys iteration
2705
# for *some roots* and also via the old keys iteration, that
2706
# it is still scanned for old refs and items, because its
2707
# not truely new. This requires 2 levels of InternalNodes to expose,
2708
# because of the way the bootstrap in _find_children_info works.
2709
# This suggests that the code is probably amenable to/benefit from
2711
# How does this test work?
2712
# 1) We need a second level InternalNode present in a basis tree.
2713
# 2) We need a left side new tree that uses that InternalNode
2714
# 3) We need a right side new tree that does not use that InternalNode
2715
# at all but that has an unchanged *value* that was reachable inside
2717
basis = self.get_map_key({
2718
# InternalNode, unchanged in left:
2721
# Forces an internalNode at 'a'
2724
left = self.get_map_key({
2725
# All of basis unchanged
2729
# And a new top level node so the root key is different
2732
right = self.get_map_key({
2733
# A value that is unchanged from basis and thus should be filtered
2737
basis_map = CHKMap(self.get_chk_bytes(), basis)
2738
self.assertEqualDiff(
2740
" 'a' InternalNode\n"
2742
" ('aaa',) 'left'\n"
2744
" ('abb',) 'right'\n"
2746
" ('ccc',) 'common'\n",
2747
basis_map._dump_tree())
2748
# Get left expected data
2749
left_map = CHKMap(self.get_chk_bytes(), left)
2750
self.assertEqualDiff(
2752
" 'a' InternalNode\n"
2754
" ('aaa',) 'left'\n"
2756
" ('abb',) 'right'\n"
2758
" ('ccc',) 'common'\n"
2760
" ('ddd',) 'change'\n",
2761
left_map._dump_tree())
2762
# Keys from left side target
2763
l_d_key = left_map._root_node._items['d'].key()
2764
# Get right expected data
2765
right_map = CHKMap(self.get_chk_bytes(), right)
2766
self.assertEqualDiff(
2768
" ('abb',) 'right'\n",
2769
right_map._dump_tree())
2770
# Keys from the right side target - none, the root is enough.
2772
self.assertIterInteresting(
2773
[right, left, l_d_key],
2774
[(('ddd',), 'change')],
2775
[left, right], [basis])
2777
def test_multiple_maps_similar(self):
2778
# We want to have a depth=2 tree, with multiple entries in each leaf
2780
basis = self.get_map_key({
2781
('aaa',): 'unchanged',
2782
('abb',): 'will change left',
2783
('caa',): 'unchanged',
2784
('cbb',): 'will change right',
2786
left = self.get_map_key({
2787
('aaa',): 'unchanged',
2788
('abb',): 'changed left',
2789
('caa',): 'unchanged',
2790
('cbb',): 'will change right',
2792
right = self.get_map_key({
2793
('aaa',): 'unchanged',
2794
('abb',): 'will change left',
2795
('caa',): 'unchanged',
2796
('cbb',): 'changed right',
2798
basis_map = CHKMap(self.get_chk_bytes(), basis)
2799
self.assertEqualDiff(
2802
" ('aaa',) 'unchanged'\n"
2803
" ('abb',) 'will change left'\n"
2805
" ('caa',) 'unchanged'\n"
2806
" ('cbb',) 'will change right'\n",
2807
basis_map._dump_tree())
2808
# Get left expected data
2809
left_map = CHKMap(self.get_chk_bytes(), left)
2810
self.assertEqualDiff(
2813
" ('aaa',) 'unchanged'\n"
2814
" ('abb',) 'changed left'\n"
2816
" ('caa',) 'unchanged'\n"
2817
" ('cbb',) 'will change right'\n",
2818
left_map._dump_tree())
2819
# Keys from left side target
2820
l_a_key = left_map._root_node._items['a'].key()
2821
l_c_key = left_map._root_node._items['c'].key()
2822
# Get right expected data
2823
right_map = CHKMap(self.get_chk_bytes(), right)
2824
self.assertEqualDiff(
2827
" ('aaa',) 'unchanged'\n"
2828
" ('abb',) 'will change left'\n"
2830
" ('caa',) 'unchanged'\n"
2831
" ('cbb',) 'changed right'\n",
2832
right_map._dump_tree())
2833
r_a_key = right_map._root_node._items['a'].key()
2834
r_c_key = right_map._root_node._items['c'].key()
2835
self.assertIterInteresting(
2836
[right, left, l_a_key, r_c_key],
2837
[(('abb',), 'changed left'), (('cbb',), 'changed right')],
2838
[left, right], [basis])