1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for maps built on a CHK versionedfiles facility."""
19
from itertools import izip
26
from bzrlib.chk_map import (
34
class TestNode(tests.TestCase):
36
def assertCommonPrefix(self, expected_common, prefix, key):
37
common = Node.common_prefix(prefix, key)
38
self.assertTrue(len(common) <= len(prefix))
39
self.assertTrue(len(common) <= len(key))
40
self.assertStartsWith(prefix, common)
41
self.assertStartsWith(key, common)
42
self.assertEquals(expected_common, common)
44
def test_common_prefix(self):
45
self.assertCommonPrefix('beg', 'beg', 'begin')
47
def test_no_common_prefix(self):
48
self.assertCommonPrefix('', 'begin', 'end')
51
self.assertCommonPrefix('begin', 'begin', 'begin')
53
def test_not_a_prefix(self):
54
self.assertCommonPrefix('b', 'begin', 'b')
57
self.assertCommonPrefix('', '', 'end')
58
self.assertCommonPrefix('', 'begin', '')
59
self.assertCommonPrefix('', '', '')
62
class TestCaseWithStore(tests.TestCaseWithTransport):
64
def get_chk_bytes(self):
65
# The easiest way to get a CHK store is a development6 repository and
66
# then work with the chk_bytes attribute directly.
67
repo = self.make_repository(".", format="development6-rich-root")
69
self.addCleanup(repo.unlock)
70
repo.start_write_group()
71
self.addCleanup(repo.abort_write_group)
74
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
75
search_key_func=None):
77
chk_bytes = self.get_chk_bytes()
78
root_key = CHKMap.from_dict(chk_bytes, a_dict,
79
maximum_size=maximum_size, key_width=key_width,
80
search_key_func=search_key_func)
81
root_key2 = CHKMap._create_via_map(chk_bytes, a_dict,
82
maximum_size=maximum_size, key_width=key_width,
83
search_key_func=search_key_func)
84
self.assertEqual(root_key, root_key2, "CHKMap.from_dict() did not"
85
" match CHKMap._create_via_map")
86
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
89
def read_bytes(self, chk_bytes, key):
90
stream = chk_bytes.get_record_stream([key], 'unordered', True)
91
record = stream.next()
92
if record.storage_kind == 'absent':
93
self.fail('Store does not contain the key %s' % (key,))
94
return record.get_bytes_as("fulltext")
96
def to_dict(self, node, *args):
97
return dict(node.iteritems(*args))
100
class TestMap(TestCaseWithStore):
102
def assertHasABMap(self, chk_bytes):
103
ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n'
104
ab_sha1 = osutils.sha_string(ab_leaf_bytes)
105
self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1)
106
root_key = ('sha1:' + ab_sha1,)
107
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key))
110
def assertHasEmptyMap(self, chk_bytes):
111
empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n'
112
empty_sha1 = osutils.sha_string(empty_leaf_bytes)
113
self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1)
114
root_key = ('sha1:' + empty_sha1,)
115
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key))
118
def assertMapLayoutEqual(self, map_one, map_two):
119
"""Assert that the internal structure is identical between the maps."""
120
map_one._ensure_root()
121
node_one_stack = [map_one._root_node]
122
map_two._ensure_root()
123
node_two_stack = [map_two._root_node]
124
while node_one_stack:
125
node_one = node_one_stack.pop()
126
node_two = node_two_stack.pop()
127
if node_one.__class__ != node_two.__class__:
128
self.assertEqualDiff(map_one._dump_tree(include_keys=True),
129
map_two._dump_tree(include_keys=True))
130
self.assertEqual(node_one._search_prefix,
131
node_two._search_prefix)
132
if isinstance(node_one, InternalNode):
133
# Internal nodes must have identical references
134
self.assertEqual(sorted(node_one._items.keys()),
135
sorted(node_two._items.keys()))
136
node_one_stack.extend([n for n, _ in
137
node_one._iter_nodes(map_one._store)])
138
node_two_stack.extend([n for n, _ in
139
node_two._iter_nodes(map_two._store)])
141
# Leaf nodes must have identical contents
142
self.assertEqual(node_one._items, node_two._items)
143
self.assertEquals([], node_two_stack)
145
def assertCanonicalForm(self, chkmap):
146
"""Assert that the chkmap is in 'canonical' form.
148
We do this by adding all of the key value pairs from scratch, both in
149
forward order and reverse order, and assert that the final tree layout
152
items = list(chkmap.iteritems())
153
map_forward = chk_map.CHKMap(None, None)
154
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size)
155
for key, value in items:
156
map_forward.map(key, value)
157
self.assertMapLayoutEqual(map_forward, chkmap)
158
map_reverse = chk_map.CHKMap(None, None)
159
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size)
160
for key, value in reversed(items):
161
map_reverse.map(key, value)
162
self.assertMapLayoutEqual(map_reverse, chkmap)
164
def test_assert_map_layout_equal(self):
165
store = self.get_chk_bytes()
166
map_one = CHKMap(store, None)
167
map_one._root_node.set_maximum_size(20)
168
map_two = CHKMap(store, None)
169
map_two._root_node.set_maximum_size(20)
170
self.assertMapLayoutEqual(map_one, map_two)
171
map_one.map('aaa', 'value')
172
self.assertRaises(AssertionError,
173
self.assertMapLayoutEqual, map_one, map_two)
174
map_two.map('aaa', 'value')
175
self.assertMapLayoutEqual(map_one, map_two)
176
# Split the tree, so we ensure that internal nodes and leaf nodes are
178
map_one.map('aab', 'value')
179
self.assertIsInstance(map_one._root_node, InternalNode)
180
self.assertRaises(AssertionError,
181
self.assertMapLayoutEqual, map_one, map_two)
182
map_two.map('aab', 'value')
183
self.assertMapLayoutEqual(map_one, map_two)
184
map_one.map('aac', 'value')
185
self.assertRaises(AssertionError,
186
self.assertMapLayoutEqual, map_one, map_two)
187
self.assertCanonicalForm(map_one)
189
def test_from_dict_empty(self):
190
chk_bytes = self.get_chk_bytes()
191
root_key = CHKMap.from_dict(chk_bytes, {})
192
# Check the data was saved and inserted correctly.
193
expected_root_key = self.assertHasEmptyMap(chk_bytes)
194
self.assertEqual(expected_root_key, root_key)
196
def test_from_dict_ab(self):
197
chk_bytes = self.get_chk_bytes()
198
root_key = CHKMap.from_dict(chk_bytes, {"a": "b"})
199
# Check the data was saved and inserted correctly.
200
expected_root_key = self.assertHasABMap(chk_bytes)
201
self.assertEqual(expected_root_key, root_key)
203
def test_apply_empty_ab(self):
204
# applying a delta (None, "a", "b") to an empty chkmap generates the
205
# same map as from_dict_ab.
206
chk_bytes = self.get_chk_bytes()
207
root_key = CHKMap.from_dict(chk_bytes, {})
208
chkmap = CHKMap(chk_bytes, root_key)
209
new_root = chkmap.apply_delta([(None, "a", "b")])
210
# Check the data was saved and inserted correctly.
211
expected_root_key = self.assertHasABMap(chk_bytes)
212
self.assertEqual(expected_root_key, new_root)
213
# The update should have left us with an in memory root node, with an
215
self.assertEqual(new_root, chkmap._root_node._key)
217
def test_apply_ab_empty(self):
218
# applying a delta ("a", None, None) to a map with 'a' in it generates
220
chk_bytes = self.get_chk_bytes()
221
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"})
222
chkmap = CHKMap(chk_bytes, root_key)
223
new_root = chkmap.apply_delta([(("a",), None, None)])
224
# Check the data was saved and inserted correctly.
225
expected_root_key = self.assertHasEmptyMap(chk_bytes)
226
self.assertEqual(expected_root_key, new_root)
227
# The update should have left us with an in memory root node, with an
229
self.assertEqual(new_root, chkmap._root_node._key)
231
def test_apply_delta_is_deterministic(self):
232
chk_bytes = self.get_chk_bytes()
233
chkmap1 = CHKMap(chk_bytes, None)
234
chkmap1._root_node.set_maximum_size(10)
235
chkmap1.apply_delta([(None, ('aaa',), 'common'),
236
(None, ('bba',), 'target2'),
237
(None, ('bbb',), 'common')])
238
root_key1 = chkmap1._save()
239
self.assertCanonicalForm(chkmap1)
241
chkmap2 = CHKMap(chk_bytes, None)
242
chkmap2._root_node.set_maximum_size(10)
243
chkmap2.apply_delta([(None, ('bbb',), 'common'),
244
(None, ('bba',), 'target2'),
245
(None, ('aaa',), 'common')])
246
root_key2 = chkmap2._save()
247
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True),
248
chkmap2._dump_tree(include_keys=True))
249
self.assertEqual(root_key1, root_key2)
250
self.assertCanonicalForm(chkmap2)
252
def test_stable_splitting(self):
253
store = self.get_chk_bytes()
254
chkmap = CHKMap(store, None)
255
# Should fit 2 keys per LeafNode
256
chkmap._root_node.set_maximum_size(35)
257
chkmap.map(('aaa',), 'v')
258
self.assertEqualDiff("'' LeafNode\n"
261
chkmap.map(('aab',), 'v')
262
self.assertEqualDiff("'' LeafNode\n"
266
self.assertCanonicalForm(chkmap)
268
# Creates a new internal node, and splits the others into leaves
269
chkmap.map(('aac',), 'v')
270
self.assertEqualDiff("'' InternalNode\n"
278
self.assertCanonicalForm(chkmap)
280
# Splits again, because it can't fit in the current structure
281
chkmap.map(('bbb',), 'v')
282
self.assertEqualDiff("'' InternalNode\n"
283
" 'a' InternalNode\n"
293
self.assertCanonicalForm(chkmap)
295
def test_map_splits_with_longer_key(self):
296
store = self.get_chk_bytes()
297
chkmap = CHKMap(store, None)
298
# Should fit 1 key per LeafNode
299
chkmap._root_node.set_maximum_size(10)
300
chkmap.map(('aaa',), 'v')
301
chkmap.map(('aaaa',), 'v')
302
self.assertCanonicalForm(chkmap)
303
self.assertIsInstance(chkmap._root_node, InternalNode)
305
def test_with_linefeed_in_key(self):
306
store = self.get_chk_bytes()
307
chkmap = CHKMap(store, None)
308
# Should fit 1 key per LeafNode
309
chkmap._root_node.set_maximum_size(10)
310
chkmap.map(('a\ra',), 'val1')
311
chkmap.map(('a\rb',), 'val2')
312
chkmap.map(('ac',), 'val3')
313
self.assertCanonicalForm(chkmap)
314
self.assertEqualDiff("'' InternalNode\n"
315
" 'a\\r' InternalNode\n"
316
" 'a\\ra' LeafNode\n"
317
" ('a\\ra',) 'val1'\n"
318
" 'a\\rb' LeafNode\n"
319
" ('a\\rb',) 'val2'\n"
323
# We should also successfully serialise and deserialise these items
324
root_key = chkmap._save()
325
chkmap = CHKMap(store, root_key)
326
self.assertEqualDiff("'' InternalNode\n"
327
" 'a\\r' InternalNode\n"
328
" 'a\\ra' LeafNode\n"
329
" ('a\\ra',) 'val1'\n"
330
" 'a\\rb' LeafNode\n"
331
" ('a\\rb',) 'val2'\n"
336
def test_deep_splitting(self):
337
store = self.get_chk_bytes()
338
chkmap = CHKMap(store, None)
339
# Should fit 2 keys per LeafNode
340
chkmap._root_node.set_maximum_size(40)
341
chkmap.map(('aaaaaaaa',), 'v')
342
chkmap.map(('aaaaabaa',), 'v')
343
self.assertEqualDiff("'' LeafNode\n"
344
" ('aaaaaaaa',) 'v'\n"
345
" ('aaaaabaa',) 'v'\n",
347
chkmap.map(('aaabaaaa',), 'v')
348
chkmap.map(('aaababaa',), 'v')
349
self.assertEqualDiff("'' InternalNode\n"
351
" ('aaaaaaaa',) 'v'\n"
352
" ('aaaaabaa',) 'v'\n"
354
" ('aaabaaaa',) 'v'\n"
355
" ('aaababaa',) 'v'\n",
357
chkmap.map(('aaabacaa',), 'v')
358
chkmap.map(('aaabadaa',), 'v')
359
self.assertEqualDiff("'' InternalNode\n"
361
" ('aaaaaaaa',) 'v'\n"
362
" ('aaaaabaa',) 'v'\n"
363
" 'aaab' InternalNode\n"
364
" 'aaabaa' LeafNode\n"
365
" ('aaabaaaa',) 'v'\n"
366
" 'aaabab' LeafNode\n"
367
" ('aaababaa',) 'v'\n"
368
" 'aaabac' LeafNode\n"
369
" ('aaabacaa',) 'v'\n"
370
" 'aaabad' LeafNode\n"
371
" ('aaabadaa',) 'v'\n",
373
chkmap.map(('aaababba',), 'val')
374
chkmap.map(('aaababca',), 'val')
375
self.assertEqualDiff("'' InternalNode\n"
377
" ('aaaaaaaa',) 'v'\n"
378
" ('aaaaabaa',) 'v'\n"
379
" 'aaab' InternalNode\n"
380
" 'aaabaa' LeafNode\n"
381
" ('aaabaaaa',) 'v'\n"
382
" 'aaabab' InternalNode\n"
383
" 'aaababa' LeafNode\n"
384
" ('aaababaa',) 'v'\n"
385
" 'aaababb' LeafNode\n"
386
" ('aaababba',) 'val'\n"
387
" 'aaababc' LeafNode\n"
388
" ('aaababca',) 'val'\n"
389
" 'aaabac' LeafNode\n"
390
" ('aaabacaa',) 'v'\n"
391
" 'aaabad' LeafNode\n"
392
" ('aaabadaa',) 'v'\n",
394
# Now we add a node that should fit around an existing InternalNode,
395
# but has a slightly different key prefix, which causes a new
397
chkmap.map(('aaabDaaa',), 'v')
398
self.assertEqualDiff("'' InternalNode\n"
400
" ('aaaaaaaa',) 'v'\n"
401
" ('aaaaabaa',) 'v'\n"
402
" 'aaab' InternalNode\n"
403
" 'aaabD' LeafNode\n"
404
" ('aaabDaaa',) 'v'\n"
405
" 'aaaba' InternalNode\n"
406
" 'aaabaa' LeafNode\n"
407
" ('aaabaaaa',) 'v'\n"
408
" 'aaabab' InternalNode\n"
409
" 'aaababa' LeafNode\n"
410
" ('aaababaa',) 'v'\n"
411
" 'aaababb' LeafNode\n"
412
" ('aaababba',) 'val'\n"
413
" 'aaababc' LeafNode\n"
414
" ('aaababca',) 'val'\n"
415
" 'aaabac' LeafNode\n"
416
" ('aaabacaa',) 'v'\n"
417
" 'aaabad' LeafNode\n"
418
" ('aaabadaa',) 'v'\n",
421
def test_map_collapses_if_size_changes(self):
422
store = self.get_chk_bytes()
423
chkmap = CHKMap(store, None)
424
# Should fit 2 keys per LeafNode
425
chkmap._root_node.set_maximum_size(35)
426
chkmap.map(('aaa',), 'v')
427
chkmap.map(('aab',), 'very long value that splits')
428
self.assertEqualDiff("'' InternalNode\n"
432
" ('aab',) 'very long value that splits'\n",
434
self.assertCanonicalForm(chkmap)
435
# Now changing the value to something small should cause a rebuild
436
chkmap.map(('aab',), 'v')
437
self.assertEqualDiff("'' LeafNode\n"
441
self.assertCanonicalForm(chkmap)
443
def test_map_double_deep_collapses(self):
444
store = self.get_chk_bytes()
445
chkmap = CHKMap(store, None)
446
# Should fit 3 small keys per LeafNode
447
chkmap._root_node.set_maximum_size(40)
448
chkmap.map(('aaa',), 'v')
449
chkmap.map(('aab',), 'very long value that splits')
450
chkmap.map(('abc',), 'v')
451
self.assertEqualDiff("'' InternalNode\n"
452
" 'aa' InternalNode\n"
456
" ('aab',) 'very long value that splits'\n"
460
chkmap.map(('aab',), 'v')
461
self.assertCanonicalForm(chkmap)
462
self.assertEqualDiff("'' LeafNode\n"
468
def test_stable_unmap(self):
469
store = self.get_chk_bytes()
470
chkmap = CHKMap(store, None)
471
# Should fit 2 keys per LeafNode
472
chkmap._root_node.set_maximum_size(35)
473
chkmap.map(('aaa',), 'v')
474
chkmap.map(('aab',), 'v')
475
self.assertEqualDiff("'' LeafNode\n"
479
# Creates a new internal node, and splits the others into leaves
480
chkmap.map(('aac',), 'v')
481
self.assertEqualDiff("'' InternalNode\n"
489
self.assertCanonicalForm(chkmap)
490
# Now lets unmap one of the keys, and assert that we collapse the
492
chkmap.unmap(('aac',))
493
self.assertEqualDiff("'' LeafNode\n"
497
self.assertCanonicalForm(chkmap)
499
def test_unmap_double_deep(self):
500
store = self.get_chk_bytes()
501
chkmap = CHKMap(store, None)
502
# Should fit 3 keys per LeafNode
503
chkmap._root_node.set_maximum_size(40)
504
chkmap.map(('aaa',), 'v')
505
chkmap.map(('aaab',), 'v')
506
chkmap.map(('aab',), 'very long value')
507
chkmap.map(('abc',), 'v')
508
self.assertEqualDiff("'' InternalNode\n"
509
" 'aa' InternalNode\n"
514
" ('aab',) 'very long value'\n"
518
# Removing the 'aab' key should cause everything to collapse back to a
520
chkmap.unmap(('aab',))
521
self.assertEqualDiff("'' LeafNode\n"
527
def test_unmap_double_deep_non_empty_leaf(self):
528
store = self.get_chk_bytes()
529
chkmap = CHKMap(store, None)
530
# Should fit 3 keys per LeafNode
531
chkmap._root_node.set_maximum_size(40)
532
chkmap.map(('aaa',), 'v')
533
chkmap.map(('aab',), 'long value')
534
chkmap.map(('aabb',), 'v')
535
chkmap.map(('abc',), 'v')
536
self.assertEqualDiff("'' InternalNode\n"
537
" 'aa' InternalNode\n"
541
" ('aab',) 'long value'\n"
546
# Removing the 'aab' key should cause everything to collapse back to a
548
chkmap.unmap(('aab',))
549
self.assertEqualDiff("'' LeafNode\n"
555
def test_unmap_with_known_internal_node_doesnt_page(self):
556
store = self.get_chk_bytes()
557
chkmap = CHKMap(store, None)
558
# Should fit 3 keys per LeafNode
559
chkmap._root_node.set_maximum_size(30)
560
chkmap.map(('aaa',), 'v')
561
chkmap.map(('aab',), 'v')
562
chkmap.map(('aac',), 'v')
563
chkmap.map(('abc',), 'v')
564
chkmap.map(('acd',), 'v')
565
self.assertEqualDiff("'' InternalNode\n"
566
" 'aa' InternalNode\n"
578
# Save everything to the map, and start over
579
chkmap = CHKMap(store, chkmap._save())
580
# Mapping an 'aa' key loads the internal node, but should not map the
581
# 'ab' and 'ac' nodes
582
chkmap.map(('aad',), 'v')
583
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
584
self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
585
self.assertIsInstance(chkmap._root_node._items['ac'], tuple)
586
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
588
chkmap.unmap(('acd',))
589
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode)
590
self.assertIsInstance(chkmap._root_node._items['ab'], tuple)
592
def test_unmap_without_fitting_doesnt_page_in(self):
593
store = self.get_chk_bytes()
594
chkmap = CHKMap(store, None)
595
# Should fit 2 keys per LeafNode
596
chkmap._root_node.set_maximum_size(20)
597
chkmap.map(('aaa',), 'v')
598
chkmap.map(('aab',), 'v')
599
self.assertEqualDiff("'' InternalNode\n"
605
# Save everything to the map, and start over
606
chkmap = CHKMap(store, chkmap._save())
607
chkmap.map(('aac',), 'v')
608
chkmap.map(('aad',), 'v')
609
chkmap.map(('aae',), 'v')
610
chkmap.map(('aaf',), 'v')
611
# At this point, the previous nodes should not be paged in, but the
612
# newly added nodes would be
613
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
614
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
615
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
616
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
617
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
618
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
619
# Now unmapping one of the new nodes will use only the already-paged-in
620
# nodes to determine that we don't need to do more.
621
chkmap.unmap(('aaf',))
622
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
623
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
624
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
625
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
626
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
628
def test_unmap_pages_in_if_necessary(self):
629
store = self.get_chk_bytes()
630
chkmap = CHKMap(store, None)
631
# Should fit 2 keys per LeafNode
632
chkmap._root_node.set_maximum_size(30)
633
chkmap.map(('aaa',), 'val')
634
chkmap.map(('aab',), 'val')
635
chkmap.map(('aac',), 'val')
636
self.assertEqualDiff("'' InternalNode\n"
644
root_key = chkmap._save()
645
# Save everything to the map, and start over
646
chkmap = CHKMap(store, root_key)
647
chkmap.map(('aad',), 'v')
648
# At this point, the previous nodes should not be paged in, but the
649
# newly added node would be
650
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
651
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
652
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
653
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
654
# Unmapping the new node will check the existing nodes to see if they
656
# Clear the page cache so we ensure we have to read all the children
657
chk_map._page_cache.clear()
658
chkmap.unmap(('aad',))
659
self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode)
660
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
661
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
663
def test_unmap_pages_in_from_page_cache(self):
664
store = self.get_chk_bytes()
665
chkmap = CHKMap(store, None)
666
# Should fit 2 keys per LeafNode
667
chkmap._root_node.set_maximum_size(30)
668
chkmap.map(('aaa',), 'val')
669
chkmap.map(('aab',), 'val')
670
chkmap.map(('aac',), 'val')
671
root_key = chkmap._save()
672
# Save everything to the map, and start over
673
chkmap = CHKMap(store, root_key)
674
chkmap.map(('aad',), 'val')
675
self.assertEqualDiff("'' InternalNode\n"
685
# Save everything to the map, start over after _dump_tree
686
chkmap = CHKMap(store, root_key)
687
chkmap.map(('aad',), 'v')
688
# At this point, the previous nodes should not be paged in, but the
689
# newly added node would be
690
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
691
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
692
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
693
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
694
# Now clear the page cache, and only include 2 of the children in the
696
aab_key = chkmap._root_node._items['aab']
697
aab_bytes = chk_map._page_cache[aab_key]
698
aac_key = chkmap._root_node._items['aac']
699
aac_bytes = chk_map._page_cache[aac_key]
700
chk_map._page_cache.clear()
701
chk_map._page_cache[aab_key] = aab_bytes
702
chk_map._page_cache[aac_key] = aac_bytes
704
# Unmapping the new node will check the nodes from the page cache
705
# first, and not have to read in 'aaa'
706
chkmap.unmap(('aad',))
707
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
708
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode)
709
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode)
711
def test_unmap_uses_existing_items(self):
712
store = self.get_chk_bytes()
713
chkmap = CHKMap(store, None)
714
# Should fit 2 keys per LeafNode
715
chkmap._root_node.set_maximum_size(30)
716
chkmap.map(('aaa',), 'val')
717
chkmap.map(('aab',), 'val')
718
chkmap.map(('aac',), 'val')
719
root_key = chkmap._save()
720
# Save everything to the map, and start over
721
chkmap = CHKMap(store, root_key)
722
chkmap.map(('aad',), 'val')
723
chkmap.map(('aae',), 'val')
724
chkmap.map(('aaf',), 'val')
725
# At this point, the previous nodes should not be paged in, but the
726
# newly added node would be
727
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
728
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
729
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
730
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode)
731
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
732
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
734
# Unmapping a new node will see the other nodes that are already in
735
# memory, and not need to page in anything else
736
chkmap.unmap(('aad',))
737
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple)
738
self.assertIsInstance(chkmap._root_node._items['aab'], tuple)
739
self.assertIsInstance(chkmap._root_node._items['aac'], tuple)
740
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode)
741
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode)
743
def test_iter_changes_empty_ab(self):
744
# Asking for changes between an empty dict to a dict with keys returns
746
basis = self._get_map({}, maximum_size=10)
747
target = self._get_map(
748
{('a',): 'content here', ('b',): 'more content'},
749
chk_bytes=basis._store, maximum_size=10)
750
self.assertEqual([(('a',), None, 'content here'),
751
(('b',), None, 'more content')],
752
sorted(list(target.iter_changes(basis))))
754
def test_iter_changes_ab_empty(self):
755
# Asking for changes between a dict with keys to an empty dict returns
757
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
759
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
760
self.assertEqual([(('a',), 'content here', None),
761
(('b',), 'more content', None)],
762
sorted(list(target.iter_changes(basis))))
764
def test_iter_changes_empty_empty_is_empty(self):
765
basis = self._get_map({}, maximum_size=10)
766
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10)
767
self.assertEqual([], sorted(list(target.iter_changes(basis))))
769
def test_iter_changes_ab_ab_is_empty(self):
770
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
772
target = self._get_map(
773
{('a',): 'content here', ('b',): 'more content'},
774
chk_bytes=basis._store, maximum_size=10)
775
self.assertEqual([], sorted(list(target.iter_changes(basis))))
777
def test_iter_changes_ab_ab_nodes_not_loaded(self):
778
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
780
target = self._get_map(
781
{('a',): 'content here', ('b',): 'more content'},
782
chk_bytes=basis._store, maximum_size=10)
783
list(target.iter_changes(basis))
784
self.assertIsInstance(target._root_node, tuple)
785
self.assertIsInstance(basis._root_node, tuple)
787
def test_iter_changes_ab_ab_changed_values_shown(self):
788
basis = self._get_map({('a',): 'content here', ('b',): 'more content'},
790
target = self._get_map(
791
{('a',): 'content here', ('b',): 'different content'},
792
chk_bytes=basis._store, maximum_size=10)
793
result = sorted(list(target.iter_changes(basis)))
794
self.assertEqual([(('b',), 'more content', 'different content')],
797
def test_iter_changes_mixed_node_length(self):
798
# When one side has different node lengths than the other, common
799
# but different keys still need to be show, and new-and-old included
801
# aaa - common unaltered
802
# aab - common altered
806
# aaa to be not loaded (later test)
807
# aab, b, at to be returned.
808
# basis splits at byte 0,1,2, aaa is commonb is basis only
809
basis_dict = {('aaa',): 'foo bar',
810
('aab',): 'common altered a', ('b',): 'foo bar b'}
811
# target splits at byte 1,2, at is target only
812
target_dict = {('aaa',): 'foo bar',
813
('aab',): 'common altered b', ('at',): 'foo bar t'}
815
(('aab',), 'common altered a', 'common altered b'),
816
(('at',), None, 'foo bar t'),
817
(('b',), 'foo bar b', None),
819
basis = self._get_map(basis_dict, maximum_size=10)
820
target = self._get_map(target_dict, maximum_size=10,
821
chk_bytes=basis._store)
822
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
824
def test_iter_changes_common_pages_not_loaded(self):
825
# aaa - common unaltered
826
# aab - common altered
830
# aaa to be not loaded
831
# aaa not to be in result.
832
basis_dict = {('aaa',): 'foo bar',
833
('aab',): 'common altered a', ('b',): 'foo bar b'}
834
# target splits at byte 1, at is target only
835
target_dict = {('aaa',): 'foo bar',
836
('aab',): 'common altered b', ('at',): 'foo bar t'}
837
basis = self._get_map(basis_dict, maximum_size=10)
838
target = self._get_map(target_dict, maximum_size=10,
839
chk_bytes=basis._store)
840
basis_get = basis._store.get_record_stream
841
def get_record_stream(keys, order, fulltext):
842
if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys:
843
self.fail("'aaa' pointer was followed %r" % keys)
844
return basis_get(keys, order, fulltext)
845
basis._store.get_record_stream = get_record_stream
846
result = sorted(list(target.iter_changes(basis)))
847
for change in result:
848
if change[0] == ('aaa',):
849
self.fail("Found unexpected change: %s" % change)
851
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self):
852
# Within a leaf there are no hash's to exclude keys, make sure multi
853
# value leaf nodes are handled well.
854
basis_dict = {('aaa',): 'foo bar',
855
('aab',): 'common altered a', ('b',): 'foo bar b'}
856
target_dict = {('aaa',): 'foo bar',
857
('aab',): 'common altered b', ('at',): 'foo bar t'}
859
(('aab',), 'common altered a', 'common altered b'),
860
(('at',), None, 'foo bar t'),
861
(('b',), 'foo bar b', None),
863
basis = self._get_map(basis_dict)
864
target = self._get_map(target_dict, chk_bytes=basis._store)
865
self.assertEqual(changes, sorted(list(target.iter_changes(basis))))
867
def test_iteritems_empty(self):
868
chk_bytes = self.get_chk_bytes()
869
root_key = CHKMap.from_dict(chk_bytes, {})
870
chkmap = CHKMap(chk_bytes, root_key)
871
self.assertEqual([], list(chkmap.iteritems()))
873
def test_iteritems_two_items(self):
874
chk_bytes = self.get_chk_bytes()
875
root_key = CHKMap.from_dict(chk_bytes,
876
{"a":"content here", "b":"more content"})
877
chkmap = CHKMap(chk_bytes, root_key)
878
self.assertEqual([(("a",), "content here"), (("b",), "more content")],
879
sorted(list(chkmap.iteritems())))
881
def test_iteritems_selected_one_of_two_items(self):
882
chkmap = self._get_map( {("a",):"content here", ("b",):"more content"})
883
self.assertEqual({("a",): "content here"},
884
self.to_dict(chkmap, [("a",)]))
886
def test_iteritems_keys_prefixed_by_2_width_nodes(self):
887
chkmap = self._get_map(
888
{("a","a"):"content here", ("a", "b",):"more content",
889
("b", ""): 'boring content'},
890
maximum_size=10, key_width=2)
892
{("a", "a"): "content here", ("a", "b"): 'more content'},
893
self.to_dict(chkmap, [("a",)]))
895
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
896
search_key_func = chk_map.search_key_registry.get('hash-16-way')
897
self.assertEqual('E8B7BE43\x00E8B7BE43', search_key_func(('a', 'a')))
898
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
899
self.assertEqual('71BEEFF9\x0000000000', search_key_func(('b', '')))
900
chkmap = self._get_map(
901
{("a","a"):"content here", ("a", "b",):"more content",
902
("b", ""): 'boring content'},
903
maximum_size=10, key_width=2, search_key_func=search_key_func)
905
{("a", "a"): "content here", ("a", "b"): 'more content'},
906
self.to_dict(chkmap, [("a",)]))
908
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
909
chkmap = self._get_map(
910
{("a","a"):"content here", ("a", "b",):"more content",
911
("b", ""): 'boring content'}, key_width=2)
913
{("a", "a"): "content here", ("a", "b"): 'more content'},
914
self.to_dict(chkmap, [("a",)]))
916
def test___len__empty(self):
917
chkmap = self._get_map({})
918
self.assertEqual(0, len(chkmap))
920
def test___len__2(self):
921
chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"})
922
self.assertEqual(2, len(chkmap))
924
def test_max_size_100_bytes_new(self):
925
# When there is a 100 byte upper node limit, a tree is formed.
926
chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100)
927
# We expect three nodes:
928
# A root, with two children, and with two key prefixes - k1 to one, and
929
# k2 to the other as our node splitting is only just being developed.
930
# The maximum size should be embedded
931
chkmap._ensure_root()
932
self.assertEqual(100, chkmap._root_node.maximum_size)
933
self.assertEqual(1, chkmap._root_node._key_width)
934
# There should be two child nodes, and prefix of 2(bytes):
935
self.assertEqual(2, len(chkmap._root_node._items))
936
self.assertEqual("k", chkmap._root_node._compute_search_prefix())
937
# The actual nodes pointed at will change as serialisers change; so
938
# here we test that the key prefix is correct; then load the nodes and
939
# check they have the right pointed at key; whether they have the
940
# pointed at value inline or not is also unrelated to this test so we
941
# don't check that in detail - rather we just check the aggregate
943
nodes = sorted(chkmap._root_node._items.items())
946
self.assertEqual('k1', ptr1[0])
947
self.assertEqual('k2', ptr2[0])
948
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None)
949
self.assertIsInstance(node1, LeafNode)
950
self.assertEqual(1, len(node1))
951
self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store))
952
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None)
953
self.assertIsInstance(node2, LeafNode)
954
self.assertEqual(1, len(node2))
955
self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store))
956
# Having checked we have a good structure, check that the content is
958
self.assertEqual(2, len(chkmap))
959
self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"},
960
self.to_dict(chkmap))
962
def test_init_root_is_LeafNode_new(self):
963
chk_bytes = self.get_chk_bytes()
964
chkmap = CHKMap(chk_bytes, None)
965
self.assertIsInstance(chkmap._root_node, LeafNode)
966
self.assertEqual({}, self.to_dict(chkmap))
967
self.assertEqual(0, len(chkmap))
969
def test_init_and_save_new(self):
970
chk_bytes = self.get_chk_bytes()
971
chkmap = CHKMap(chk_bytes, None)
973
leaf_node = LeafNode()
974
self.assertEqual([key], leaf_node.serialise(chk_bytes))
976
def test_map_first_item_new(self):
977
chk_bytes = self.get_chk_bytes()
978
chkmap = CHKMap(chk_bytes, None)
979
chkmap.map(("foo,",), "bar")
980
self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap))
981
self.assertEqual(1, len(chkmap))
983
leaf_node = LeafNode()
984
leaf_node.map(chk_bytes, ("foo,",), "bar")
985
self.assertEqual([key], leaf_node.serialise(chk_bytes))
987
def test_unmap_last_item_root_is_leaf_new(self):
988
chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"})
989
chkmap.unmap(("k1"*50,))
990
chkmap.unmap(("k2"*50,))
991
self.assertEqual(0, len(chkmap))
992
self.assertEqual({}, self.to_dict(chkmap))
994
leaf_node = LeafNode()
995
self.assertEqual([key], leaf_node.serialise(chkmap._store))
997
def test__dump_tree(self):
998
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2",
999
("bbb",): "value3",},
1001
self.assertEqualDiff("'' InternalNode\n"
1002
" 'a' InternalNode\n"
1004
" ('aaa',) 'value1'\n"
1006
" ('aab',) 'value2'\n"
1008
" ('bbb',) 'value3'\n",
1009
chkmap._dump_tree())
1010
self.assertEqualDiff("'' InternalNode\n"
1011
" 'a' InternalNode\n"
1013
" ('aaa',) 'value1'\n"
1015
" ('aab',) 'value2'\n"
1017
" ('bbb',) 'value3'\n",
1018
chkmap._dump_tree())
1019
self.assertEqualDiff(
1020
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n"
1021
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n"
1022
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n"
1023
" ('aaa',) 'value1'\n"
1024
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n"
1025
" ('aab',) 'value2'\n"
1026
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n"
1027
" ('bbb',) 'value3'\n",
1028
chkmap._dump_tree(include_keys=True))
1030
def test__dump_tree_in_progress(self):
1031
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"},
1033
chkmap.map(('bbb',), 'value3')
1034
self.assertEqualDiff("'' InternalNode\n"
1035
" 'a' InternalNode\n"
1037
" ('aaa',) 'value1'\n"
1039
" ('aab',) 'value2'\n"
1041
" ('bbb',) 'value3'\n",
1042
chkmap._dump_tree())
1043
# For things that are updated by adding 'bbb', we don't have a sha key
1044
# for them yet, so they are listed as None
1045
self.assertEqualDiff(
1046
"'' InternalNode None\n"
1047
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n"
1048
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n"
1049
" ('aaa',) 'value1'\n"
1050
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n"
1051
" ('aab',) 'value2'\n"
1052
" 'b' LeafNode None\n"
1053
" ('bbb',) 'value3'\n",
1054
chkmap._dump_tree(include_keys=True))
1057
def _search_key_single(key):
1058
"""A search key function that maps all nodes to the same value"""
1061
def _test_search_key(key):
1062
return 'test:' + '\x00'.join(key)
1065
class TestMapSearchKeys(TestCaseWithStore):
1067
def test_default_chk_map_uses_flat_search_key(self):
1068
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None)
1069
self.assertEqual('1',
1070
chkmap._search_key_func(('1',)))
1071
self.assertEqual('1\x002',
1072
chkmap._search_key_func(('1', '2')))
1073
self.assertEqual('1\x002\x003',
1074
chkmap._search_key_func(('1', '2', '3')))
1076
def test_search_key_is_passed_to_root_node(self):
1077
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1078
search_key_func=_test_search_key)
1079
self.assertIs(_test_search_key, chkmap._search_key_func)
1080
self.assertEqual('test:1\x002\x003',
1081
chkmap._search_key_func(('1', '2', '3')))
1082
self.assertEqual('test:1\x002\x003',
1083
chkmap._root_node._search_key(('1', '2', '3')))
1085
def test_search_key_passed_via__ensure_root(self):
1086
chk_bytes = self.get_chk_bytes()
1087
chkmap = chk_map.CHKMap(chk_bytes, None,
1088
search_key_func=_test_search_key)
1089
root_key = chkmap._save()
1090
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1091
search_key_func=_test_search_key)
1092
chkmap._ensure_root()
1093
self.assertEqual('test:1\x002\x003',
1094
chkmap._root_node._search_key(('1', '2', '3')))
1096
def test_search_key_with_internal_node(self):
1097
chk_bytes = self.get_chk_bytes()
1098
chkmap = chk_map.CHKMap(chk_bytes, None,
1099
search_key_func=_test_search_key)
1100
chkmap._root_node.set_maximum_size(10)
1101
chkmap.map(('1',), 'foo')
1102
chkmap.map(('2',), 'bar')
1103
chkmap.map(('3',), 'baz')
1104
self.assertEqualDiff("'' InternalNode\n"
1105
" 'test:1' LeafNode\n"
1107
" 'test:2' LeafNode\n"
1109
" 'test:3' LeafNode\n"
1111
, chkmap._dump_tree())
1112
root_key = chkmap._save()
1113
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1114
search_key_func=_test_search_key)
1115
self.assertEqualDiff("'' InternalNode\n"
1116
" 'test:1' LeafNode\n"
1118
" 'test:2' LeafNode\n"
1120
" 'test:3' LeafNode\n"
1122
, chkmap._dump_tree())
1124
def test_search_key_16(self):
1125
chk_bytes = self.get_chk_bytes()
1126
chkmap = chk_map.CHKMap(chk_bytes, None,
1127
search_key_func=chk_map._search_key_16)
1128
chkmap._root_node.set_maximum_size(10)
1129
chkmap.map(('1',), 'foo')
1130
chkmap.map(('2',), 'bar')
1131
chkmap.map(('3',), 'baz')
1132
self.assertEqualDiff("'' InternalNode\n"
1139
, chkmap._dump_tree())
1140
root_key = chkmap._save()
1141
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1142
search_key_func=chk_map._search_key_16)
1143
# We can get the values back correctly
1144
self.assertEqual([(('1',), 'foo')],
1145
list(chkmap.iteritems([('1',)])))
1146
self.assertEqualDiff("'' InternalNode\n"
1153
, chkmap._dump_tree())
1155
def test_search_key_255(self):
1156
chk_bytes = self.get_chk_bytes()
1157
chkmap = chk_map.CHKMap(chk_bytes, None,
1158
search_key_func=chk_map._search_key_255)
1159
chkmap._root_node.set_maximum_size(10)
1160
chkmap.map(('1',), 'foo')
1161
chkmap.map(('2',), 'bar')
1162
chkmap.map(('3',), 'baz')
1163
self.assertEqualDiff("'' InternalNode\n"
1164
" '\\x1a' LeafNode\n"
1168
" '\\x83' LeafNode\n"
1170
, chkmap._dump_tree())
1171
root_key = chkmap._save()
1172
chkmap = chk_map.CHKMap(chk_bytes, root_key,
1173
search_key_func=chk_map._search_key_255)
1174
# We can get the values back correctly
1175
self.assertEqual([(('1',), 'foo')],
1176
list(chkmap.iteritems([('1',)])))
1177
self.assertEqualDiff("'' InternalNode\n"
1178
" '\\x1a' LeafNode\n"
1182
" '\\x83' LeafNode\n"
1184
, chkmap._dump_tree())
1186
def test_search_key_collisions(self):
1187
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
1188
search_key_func=_search_key_single)
1189
# The node will want to expand, but it cannot, because it knows that
1190
# all the keys must map to this node
1191
chkmap._root_node.set_maximum_size(20)
1192
chkmap.map(('1',), 'foo')
1193
chkmap.map(('2',), 'bar')
1194
chkmap.map(('3',), 'baz')
1195
self.assertEqualDiff("'' LeafNode\n"
1199
, chkmap._dump_tree())
1202
class TestSearchKeyFuncs(tests.TestCase):
1204
def assertSearchKey16(self, expected, key):
1205
self.assertEqual(expected, chk_map._search_key_16(key))
1207
def assertSearchKey255(self, expected, key):
1208
actual = chk_map._search_key_255(key)
1209
self.assertEqual(expected, actual, 'actual: %r' % (actual,))
1211
def test_simple_16(self):
1212
self.assertSearchKey16('8C736521', ('foo',))
1213
self.assertSearchKey16('8C736521\x008C736521', ('foo', 'foo'))
1214
self.assertSearchKey16('8C736521\x0076FF8CAA', ('foo', 'bar'))
1215
self.assertSearchKey16('ED82CD11', ('abcd',))
1217
def test_simple_255(self):
1218
self.assertSearchKey255('\x8cse!', ('foo',))
1219
self.assertSearchKey255('\x8cse!\x00\x8cse!', ('foo', 'foo'))
1220
self.assertSearchKey255('\x8cse!\x00v\xff\x8c\xaa', ('foo', 'bar'))
1221
# The standard mapping for these would include '\n', so it should be
1223
self.assertSearchKey255('\xfdm\x93_\x00P_\x1bL', ('<', 'V'))
1225
def test_255_does_not_include_newline(self):
1226
# When mapping via _search_key_255, we should never have the '\n'
1227
# character, but all other 255 values should be present
1229
for char_in in range(256):
1230
search_key = chk_map._search_key_255((chr(char_in),))
1231
chars_used.update(search_key)
1232
all_chars = set([chr(x) for x in range(256)])
1233
unused_chars = all_chars.symmetric_difference(chars_used)
1234
self.assertEqual(set('\n'), unused_chars)
1237
class TestLeafNode(TestCaseWithStore):
1239
def test_current_size_empty(self):
1241
self.assertEqual(16, node._current_size())
1243
def test_current_size_size_changed(self):
1245
node.set_maximum_size(10)
1246
self.assertEqual(17, node._current_size())
1248
def test_current_size_width_changed(self):
1250
node._key_width = 10
1251
self.assertEqual(17, node._current_size())
1253
def test_current_size_items(self):
1255
base_size = node._current_size()
1256
node.map(None, ("foo bar",), "baz")
1257
self.assertEqual(base_size + 14, node._current_size())
1259
def test_deserialise_empty(self):
1260
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1261
self.assertEqual(0, len(node))
1262
self.assertEqual(10, node.maximum_size)
1263
self.assertEqual(("sha1:1234",), node.key())
1264
self.assertIs(None, node._search_prefix)
1265
self.assertIs(None, node._common_serialised_prefix)
1267
def test_deserialise_items(self):
1268
node = LeafNode.deserialise(
1269
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1271
self.assertEqual(2, len(node))
1272
self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
1273
sorted(node.iteritems(None)))
1275
def test_deserialise_item_with_null_width_1(self):
1276
node = LeafNode.deserialise(
1277
"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n",
1279
self.assertEqual(2, len(node))
1280
self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")],
1281
sorted(node.iteritems(None)))
1283
def test_deserialise_item_with_null_width_2(self):
1284
node = LeafNode.deserialise(
1285
"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n"
1286
"quux\x00\x001\nblarh\n",
1288
self.assertEqual(2, len(node))
1289
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")],
1290
sorted(node.iteritems(None)))
1292
def test_iteritems_selected_one_of_two_items(self):
1293
node = LeafNode.deserialise(
1294
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1296
self.assertEqual(2, len(node))
1297
self.assertEqual([(("quux",), "blarh")],
1298
sorted(node.iteritems(None, [("quux",), ("qaz",)])))
1300
def test_deserialise_item_with_common_prefix(self):
1301
node = LeafNode.deserialise(
1302
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n",
1304
self.assertEqual(2, len(node))
1305
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")],
1306
sorted(node.iteritems(None)))
1307
self.assertIs(chk_map._unknown, node._search_prefix)
1308
self.assertEqual('foo\x00', node._common_serialised_prefix)
1310
def test_deserialise_multi_line(self):
1311
node = LeafNode.deserialise(
1312
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n",
1314
self.assertEqual(2, len(node))
1315
self.assertEqual([(("foo", "1"), "bar\nbaz"),
1316
(("foo", "2"), "blarh\n"),
1317
], sorted(node.iteritems(None)))
1318
self.assertIs(chk_map._unknown, node._search_prefix)
1319
self.assertEqual('foo\x00', node._common_serialised_prefix)
1321
def test_key_new(self):
1323
self.assertEqual(None, node.key())
1325
def test_key_after_map(self):
1326
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",))
1327
node.map(None, ("foo bar",), "baz quux")
1328
self.assertEqual(None, node.key())
1330
def test_key_after_unmap(self):
1331
node = LeafNode.deserialise(
1332
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n",
1334
node.unmap(None, ("foo bar",))
1335
self.assertEqual(None, node.key())
1337
def test_map_exceeding_max_size_only_entry_new(self):
1339
node.set_maximum_size(10)
1340
result = node.map(None, ("foo bar",), "baz quux")
1341
self.assertEqual(("foo bar", [("", node)]), result)
1342
self.assertTrue(10 < node._current_size())
1344
def test_map_exceeding_max_size_second_entry_early_difference_new(self):
1346
node.set_maximum_size(10)
1347
node.map(None, ("foo bar",), "baz quux")
1348
prefix, result = list(node.map(None, ("blue",), "red"))
1349
self.assertEqual("", prefix)
1350
self.assertEqual(2, len(result))
1351
split_chars = set([result[0][0], result[1][0]])
1352
self.assertEqual(set(["f", "b"]), split_chars)
1353
nodes = dict(result)
1355
self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None))
1356
self.assertEqual(10, node.maximum_size)
1357
self.assertEqual(1, node._key_width)
1359
self.assertEqual({("blue",): "red"}, self.to_dict(node, None))
1360
self.assertEqual(10, node.maximum_size)
1361
self.assertEqual(1, node._key_width)
1363
def test_map_first(self):
1365
result = node.map(None, ("foo bar",), "baz quux")
1366
self.assertEqual(("foo bar", [("", node)]), result)
1367
self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None))
1368
self.assertEqual(1, len(node))
1370
def test_map_second(self):
1372
node.map(None, ("foo bar",), "baz quux")
1373
result = node.map(None, ("bingo",), "bango")
1374
self.assertEqual(("", [("", node)]), result)
1375
self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
1376
self.to_dict(node, None))
1377
self.assertEqual(2, len(node))
1379
def test_map_replacement(self):
1381
node.map(None, ("foo bar",), "baz quux")
1382
result = node.map(None, ("foo bar",), "bango")
1383
self.assertEqual(("foo bar", [("", node)]), result)
1384
self.assertEqual({("foo bar",): "bango"},
1385
self.to_dict(node, None))
1386
self.assertEqual(1, len(node))
1388
def test_serialise_empty(self):
1389
store = self.get_chk_bytes()
1391
node.set_maximum_size(10)
1392
expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",)
1393
self.assertEqual([expected_key],
1394
list(node.serialise(store)))
1395
self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key))
1396
self.assertEqual(expected_key, node.key())
1398
def test_serialise_items(self):
1399
store = self.get_chk_bytes()
1401
node.set_maximum_size(10)
1402
node.map(None, ("foo bar",), "baz quux")
1403
expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",)
1404
self.assertEqual('foo bar', node._common_serialised_prefix)
1405
self.assertEqual([expected_key],
1406
list(node.serialise(store)))
1407
self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n",
1408
self.read_bytes(store, expected_key))
1409
self.assertEqual(expected_key, node.key())
1411
def test_unique_serialised_prefix_empty_new(self):
1413
self.assertIs(None, node._compute_search_prefix())
1415
def test_unique_serialised_prefix_one_item_new(self):
1417
node.map(None, ("foo bar", "baz"), "baz quux")
1418
self.assertEqual("foo bar\x00baz", node._compute_search_prefix())
1420
def test_unmap_missing(self):
1422
self.assertRaises(KeyError, node.unmap, None, ("foo bar",))
1424
def test_unmap_present(self):
1426
node.map(None, ("foo bar",), "baz quux")
1427
result = node.unmap(None, ("foo bar",))
1428
self.assertEqual(node, result)
1429
self.assertEqual({}, self.to_dict(node, None))
1430
self.assertEqual(0, len(node))
1432
def test_map_maintains_common_prefixes(self):
1435
node.map(None, ("foo bar", "baz"), "baz quux")
1436
self.assertEqual('foo bar\x00baz', node._search_prefix)
1437
self.assertEqual('foo bar\x00baz', node._common_serialised_prefix)
1438
node.map(None, ("foo bar", "bing"), "baz quux")
1439
self.assertEqual('foo bar\x00b', node._search_prefix)
1440
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1441
node.map(None, ("fool", "baby"), "baz quux")
1442
self.assertEqual('foo', node._search_prefix)
1443
self.assertEqual('foo', node._common_serialised_prefix)
1444
node.map(None, ("foo bar", "baz"), "replaced")
1445
self.assertEqual('foo', node._search_prefix)
1446
self.assertEqual('foo', node._common_serialised_prefix)
1447
node.map(None, ("very", "different"), "value")
1448
self.assertEqual('', node._search_prefix)
1449
self.assertEqual('', node._common_serialised_prefix)
1451
def test_unmap_maintains_common_prefixes(self):
1454
node.map(None, ("foo bar", "baz"), "baz quux")
1455
node.map(None, ("foo bar", "bing"), "baz quux")
1456
node.map(None, ("fool", "baby"), "baz quux")
1457
node.map(None, ("very", "different"), "value")
1458
self.assertEqual('', node._search_prefix)
1459
self.assertEqual('', node._common_serialised_prefix)
1460
node.unmap(None, ("very", "different"))
1461
self.assertEqual("foo", node._search_prefix)
1462
self.assertEqual("foo", node._common_serialised_prefix)
1463
node.unmap(None, ("fool", "baby"))
1464
self.assertEqual('foo bar\x00b', node._search_prefix)
1465
self.assertEqual('foo bar\x00b', node._common_serialised_prefix)
1466
node.unmap(None, ("foo bar", "baz"))
1467
self.assertEqual('foo bar\x00bing', node._search_prefix)
1468
self.assertEqual('foo bar\x00bing', node._common_serialised_prefix)
1469
node.unmap(None, ("foo bar", "bing"))
1470
self.assertEqual(None, node._search_prefix)
1471
self.assertEqual(None, node._common_serialised_prefix)
1474
class TestInternalNode(TestCaseWithStore):
1476
def test_add_node_empty_new(self):
1477
node = InternalNode('fo')
1479
child.set_maximum_size(100)
1480
child.map(None, ("foo",), "bar")
1481
node.add_node("foo", child)
1482
# Note that node isn't strictly valid now as a tree (only one child),
1483
# but thats ok for this test.
1484
# The first child defines the node's width:
1485
self.assertEqual(3, node._node_width)
1486
# We should be able to iterate over the contents without doing IO.
1487
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None))
1488
# The length should be known:
1489
self.assertEqual(1, len(node))
1490
# serialising the node should serialise the child and the node.
1491
chk_bytes = self.get_chk_bytes()
1492
keys = list(node.serialise(chk_bytes))
1493
child_key = child.serialise(chk_bytes)[0]
1495
[child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)],
1497
# We should be able to access deserialised content.
1498
bytes = self.read_bytes(chk_bytes, keys[1])
1499
node = chk_map._deserialise(bytes, keys[1], None)
1500
self.assertEqual(1, len(node))
1501
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes))
1502
self.assertEqual(3, node._node_width)
1504
def test_add_node_resets_key_new(self):
1505
node = InternalNode('fo')
1507
child.set_maximum_size(100)
1508
child.map(None, ("foo",), "bar")
1509
node.add_node("foo", child)
1510
chk_bytes = self.get_chk_bytes()
1511
keys = list(node.serialise(chk_bytes))
1512
self.assertEqual(keys[1], node._key)
1513
node.add_node("fos", child)
1514
self.assertEqual(None, node._key)
1516
# def test_add_node_empty_oversized_one_ok_new(self):
1517
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
1518
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
1519
# def test_add_node_one_oversized_second_splits_errors(self):
1521
def test__iter_nodes_no_key_filter(self):
1522
node = InternalNode('')
1524
child.set_maximum_size(100)
1525
child.map(None, ("foo",), "bar")
1526
node.add_node("f", child)
1528
child.set_maximum_size(100)
1529
child.map(None, ("bar",), "baz")
1530
node.add_node("b", child)
1532
for child, node_key_filter in node._iter_nodes(None, key_filter=None):
1533
self.assertEqual(None, node_key_filter)
1535
def test__iter_nodes_splits_key_filter(self):
1536
node = InternalNode('')
1538
child.set_maximum_size(100)
1539
child.map(None, ("foo",), "bar")
1540
node.add_node("f", child)
1542
child.set_maximum_size(100)
1543
child.map(None, ("bar",), "baz")
1544
node.add_node("b", child)
1546
# foo and bar both match exactly one leaf node, but 'cat' should not
1547
# match any, and should not be placed in one.
1548
key_filter = (('foo',), ('bar',), ('cat',))
1549
for child, node_key_filter in node._iter_nodes(None,
1550
key_filter=key_filter):
1551
# each child could only match one key filter, so make sure it was
1553
self.assertEqual(1, len(node_key_filter))
1555
def test__iter_nodes_with_multiple_matches(self):
1556
node = InternalNode('')
1558
child.set_maximum_size(100)
1559
child.map(None, ("foo",), "val")
1560
child.map(None, ("fob",), "val")
1561
node.add_node("f", child)
1563
child.set_maximum_size(100)
1564
child.map(None, ("bar",), "val")
1565
child.map(None, ("baz",), "val")
1566
node.add_node("b", child)
1568
# Note that 'ram' doesn't match anything, so it should be freely
1570
key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
1571
for child, node_key_filter in node._iter_nodes(None,
1572
key_filter=key_filter):
1573
# each child could match two key filters, so make sure they were
1575
self.assertEqual(2, len(node_key_filter))
1577
def make_fo_fa_node(self):
1578
node = InternalNode('f')
1580
child.set_maximum_size(100)
1581
child.map(None, ("foo",), "val")
1582
child.map(None, ("fob",), "val")
1583
node.add_node('fo', child)
1585
child.set_maximum_size(100)
1586
child.map(None, ("far",), "val")
1587
child.map(None, ("faz",), "val")
1588
node.add_node("fa", child)
1591
def test__iter_nodes_single_entry(self):
1592
node = self.make_fo_fa_node()
1593
key_filter = [('foo',)]
1594
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1595
self.assertEqual(1, len(nodes))
1596
self.assertEqual(key_filter, nodes[0][1])
1598
def test__iter_nodes_single_entry_misses(self):
1599
node = self.make_fo_fa_node()
1600
key_filter = [('bar',)]
1601
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1602
self.assertEqual(0, len(nodes))
1604
def test__iter_nodes_mixed_key_width(self):
1605
node = self.make_fo_fa_node()
1606
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
1607
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1608
self.assertEqual(1, len(nodes))
1609
matches = key_filter[:]
1610
matches.remove(('b',))
1611
self.assertEqual(sorted(matches), sorted(nodes[0][1]))
1613
def test__iter_nodes_match_all(self):
1614
node = self.make_fo_fa_node()
1615
key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
1616
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1617
self.assertEqual(2, len(nodes))
1619
def test__iter_nodes_fixed_widths_and_misses(self):
1620
node = self.make_fo_fa_node()
1621
# foo and faa should both match one child, baz should miss
1622
key_filter = [('foo',), ('faa',), ('baz',)]
1623
nodes = list(node._iter_nodes(None, key_filter=key_filter))
1624
self.assertEqual(2, len(nodes))
1625
for node, matches in nodes:
1626
self.assertEqual(1, len(matches))
1628
def test_iteritems_empty_new(self):
1629
node = InternalNode()
1630
self.assertEqual([], sorted(node.iteritems(None)))
1632
def test_iteritems_two_children(self):
1633
node = InternalNode()
1635
leaf1.map(None, ('foo bar',), 'quux')
1637
leaf2.map(None, ('strange',), 'beast')
1638
node.add_node("f", leaf1)
1639
node.add_node("s", leaf2)
1640
self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
1641
sorted(node.iteritems(None)))
1643
def test_iteritems_two_children_partial(self):
1644
node = InternalNode()
1646
leaf1.map(None, ('foo bar',), 'quux')
1648
leaf2.map(None, ('strange',), 'beast')
1649
node.add_node("f", leaf1)
1650
# This sets up a path that should not be followed - it will error if
1651
# the code tries to.
1652
node._items['f'] = None
1653
node.add_node("s", leaf2)
1654
self.assertEqual([(('strange',), 'beast')],
1655
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1657
def test_iteritems_two_children_with_hash(self):
1658
search_key_func = chk_map.search_key_registry.get('hash-255-way')
1659
node = InternalNode(search_key_func=search_key_func)
1660
leaf1 = LeafNode(search_key_func=search_key_func)
1661
leaf1.map(None, ('foo bar',), 'quux')
1662
leaf2 = LeafNode(search_key_func=search_key_func)
1663
leaf2.map(None, ('strange',), 'beast')
1664
self.assertEqual('\xbeF\x014', search_key_func(('foo bar',)))
1665
self.assertEqual('\x85\xfa\xf7K', search_key_func(('strange',)))
1666
node.add_node("\xbe", leaf1)
1667
# This sets up a path that should not be followed - it will error if
1668
# the code tries to.
1669
node._items['\xbe'] = None
1670
node.add_node("\x85", leaf2)
1671
self.assertEqual([(('strange',), 'beast')],
1672
sorted(node.iteritems(None, [('strange',), ('weird',)])))
1674
def test_iteritems_partial_empty(self):
1675
node = InternalNode()
1676
self.assertEqual([], sorted(node.iteritems([('missing',)])))
1678
def test_map_to_new_child_new(self):
1679
chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10)
1680
chkmap._ensure_root()
1681
node = chkmap._root_node
1682
# Ensure test validity: nothing paged in below the root.
1684
len([value for value in node._items.values()
1685
if type(value) == tuple]))
1686
# now, mapping to k3 should add a k3 leaf
1687
prefix, nodes = node.map(None, ('k3',), 'quux')
1688
self.assertEqual("k", prefix)
1689
self.assertEqual([("", node)], nodes)
1690
# check new child details
1691
child = node._items['k3']
1692
self.assertIsInstance(child, LeafNode)
1693
self.assertEqual(1, len(child))
1694
self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None))
1695
self.assertEqual(None, child._key)
1696
self.assertEqual(10, child.maximum_size)
1697
self.assertEqual(1, child._key_width)
1698
# Check overall structure:
1699
self.assertEqual(3, len(chkmap))
1700
self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'},
1701
self.to_dict(chkmap))
1702
# serialising should only serialise the new data - k3 and the internal
1704
keys = list(node.serialise(chkmap._store))
1705
child_key = child.serialise(chkmap._store)[0]
1706
self.assertEqual([child_key, keys[1]], keys)
1708
def test_map_to_child_child_splits_new(self):
1709
chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10)
1710
# Check for the canonical root value for this tree:
1711
self.assertEqualDiff("'' InternalNode\n"
1716
, chkmap._dump_tree())
1717
# _dump_tree pages everything in, so reload using just the root
1718
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1719
chkmap._ensure_root()
1720
node = chkmap._root_node
1721
# Ensure test validity: nothing paged in below the root.
1723
len([value for value in node._items.values()
1724
if type(value) == tuple]))
1725
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
1726
# k23, which for simplicity in the current implementation generates
1727
# a new internal node between node, and k22/k23.
1728
prefix, nodes = node.map(chkmap._store, ('k23',), 'quux')
1729
self.assertEqual("k", prefix)
1730
self.assertEqual([("", node)], nodes)
1731
# check new child details
1732
child = node._items['k2']
1733
self.assertIsInstance(child, InternalNode)
1734
self.assertEqual(2, len(child))
1735
self.assertEqual({('k22',): 'bar', ('k23',): 'quux'},
1736
self.to_dict(child, None))
1737
self.assertEqual(None, child._key)
1738
self.assertEqual(10, child.maximum_size)
1739
self.assertEqual(1, child._key_width)
1740
self.assertEqual(3, child._node_width)
1741
# Check overall structure:
1742
self.assertEqual(3, len(chkmap))
1743
self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'},
1744
self.to_dict(chkmap))
1745
# serialising should only serialise the new data - although k22 hasn't
1746
# changed because its a special corner case (splitting on with only one
1747
# key leaves one node unaltered), in general k22 is serialised, so we
1748
# expect k22, k23, the new internal node, and node, to be serialised.
1749
keys = list(node.serialise(chkmap._store))
1750
child_key = child._key
1751
k22_key = child._items['k22']._key
1752
k23_key = child._items['k23']._key
1753
self.assertEqual([k22_key, k23_key, child_key, node.key()], keys)
1754
self.assertEqualDiff("'' InternalNode\n"
1757
" 'k2' InternalNode\n"
1761
" ('k23',) 'quux'\n"
1762
, chkmap._dump_tree())
1764
def test__search_prefix_filter_with_hash(self):
1765
search_key_func = chk_map.search_key_registry.get('hash-16-way')
1766
node = InternalNode(search_key_func=search_key_func)
1768
node._node_width = 4
1769
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
1770
self.assertEqual('E8B7', node._search_prefix_filter(('a', 'b')))
1771
self.assertEqual('E8B7', node._search_prefix_filter(('a',)))
1773
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
1774
chkmap = self._get_map(
1775
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
1776
# Check we have the expected tree.
1777
self.assertEqualDiff("'' InternalNode\n"
1780
" 'k2' InternalNode\n"
1784
" ('k23',) 'quux'\n"
1785
, chkmap._dump_tree())
1786
chkmap = CHKMap(chkmap._store, chkmap._root_node)
1787
chkmap._ensure_root()
1788
node = chkmap._root_node
1789
# unmapping k23 should give us a root, with k1 and k22 as direct
1791
result = node.unmap(chkmap._store, ('k23',))
1792
# check the pointed-at object within node - k2 should now point at the
1793
# k22 leaf (which has been paged in to see if we can collapse the tree)
1794
child = node._items['k2']
1795
self.assertIsInstance(child, LeafNode)
1796
self.assertEqual(1, len(child))
1797
self.assertEqual({('k22',): 'bar'},
1798
self.to_dict(child, None))
1799
# Check overall structure is instact:
1800
self.assertEqual(2, len(chkmap))
1801
self.assertEqual({('k1',): 'foo', ('k22',): 'bar'},
1802
self.to_dict(chkmap))
1803
# serialising should only serialise the new data - the root node.
1804
keys = list(node.serialise(chkmap._store))
1805
self.assertEqual([keys[-1]], keys)
1806
chkmap = CHKMap(chkmap._store, keys[-1])
1807
self.assertEqualDiff("'' InternalNode\n"
1812
, chkmap._dump_tree())
1814
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self):
1815
chkmap = self._get_map(
1816
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)
1817
self.assertEqualDiff("'' InternalNode\n"
1820
" 'k2' InternalNode\n"
1824
" ('k23',) 'quux'\n"
1825
, chkmap._dump_tree())
1826
orig_root = chkmap._root_node
1827
chkmap = CHKMap(chkmap._store, orig_root)
1828
chkmap._ensure_root()
1829
node = chkmap._root_node
1830
k2_ptr = node._items['k2']
1831
# unmapping k1 should give us a root, with k22 and k23 as direct
1832
# children, and should not have needed to page in the subtree.
1833
result = node.unmap(chkmap._store, ('k1',))
1834
self.assertEqual(k2_ptr, result)
1835
chkmap = CHKMap(chkmap._store, orig_root)
1836
# Unmapping at the CHKMap level should switch to the new root
1837
chkmap.unmap(('k1',))
1838
self.assertEqual(k2_ptr, chkmap._root_node)
1839
self.assertEqualDiff("'' InternalNode\n"
1843
" ('k23',) 'quux'\n"
1844
, chkmap._dump_tree())
1848
# map -> fits - done
1849
# map -> doesn't fit - shrink from left till fits
1850
# key data to return: the common prefix, new nodes.
1852
# unmap -> how to tell if siblings can be combined.
1853
# combing leaf nodes means expanding the prefix to the left; so gather the size of
1854
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
1855
# is an internal node, we know that that is a dense subtree - can't combine.
1856
# otherwise as soon as the sum of serialised values exceeds the split threshold
1857
# we know we can't combine - stop.
1858
# unmap -> key return data - space in node, common prefix length? and key count
1860
# variable length prefixes? -> later start with fixed width to get something going
1861
# map -> fits - update pointer to leaf
1862
# return [prefix and node] - seems sound.
1863
# map -> doesn't fit - find unique prefix and shift right
1864
# create internal nodes for all the partitions, return list of unique
1865
# prefixes and nodes.
1866
# map -> new prefix - create a leaf
1867
# unmap -> if child key count 0, remove
1868
# unmap -> return space in node, common prefix length? (why?), key count
1870
# map, if 1 node returned, use it, otherwise make an internal and populate.
1871
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
1873
# map inits as empty leafnode.
1879
# AA, AB, AC, AD, BA
1880
# packed internal node - ideal:
1881
# AA, AB, AC, AD, BA
1882
# single byte fanout - A,B, AA,AB,AC,AD, BA
1885
# AB - split, but we want to end up with AB, BA, in one node, with
1889
class TestIterInterestingNodes(TestCaseWithStore):
1891
def get_chk_bytes(self):
1892
if getattr(self, '_chk_bytes', None) is None:
1893
self._chk_bytes = super(TestIterInterestingNodes,
1894
self).get_chk_bytes()
1895
return self._chk_bytes
1897
def get_map_key(self, a_dict, maximum_size=10):
1898
c_map = self._get_map(a_dict, maximum_size=maximum_size,
1899
chk_bytes=self.get_chk_bytes())
1902
def assertIterInteresting(self, expected, interesting_keys,
1903
uninteresting_keys):
1904
"""Check the result of iter_interesting_nodes.
1906
:param expected: A list of (record_keys, interesting_chk_pages,
1907
interesting key value pairs)
1909
store = self.get_chk_bytes()
1910
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys,
1912
nodes = list(iter_nodes)
1913
for count, (exp, act) in enumerate(izip(expected, nodes)):
1914
exp_record, exp_items = exp
1916
exp_tuple = (exp_record, sorted(exp_items))
1918
act_tuple = (None, sorted(items))
1920
act_tuple = (record.key, sorted(items))
1921
self.assertEqual(exp_tuple, act_tuple,
1922
'entry %d did not match expected' % count)
1923
self.assertEqual(len(expected), len(nodes))
1925
def test_empty_to_one_keys(self):
1926
target = self.get_map_key({('a',): 'content'})
1927
self.assertIterInteresting(
1928
[(target, [(('a',), 'content')]),
1931
def test_none_to_one_key(self):
1932
basis = self.get_map_key({})
1933
target = self.get_map_key({('a',): 'content'})
1934
self.assertIterInteresting(
1935
[(None, [(('a',), 'content')]),
1937
], [target], [basis])
1939
def test_one_to_none_key(self):
1940
basis = self.get_map_key({('a',): 'content'})
1941
target = self.get_map_key({})
1942
self.assertIterInteresting(
1946
def test_common_pages(self):
1947
basis = self.get_map_key({('a',): 'content',
1951
target = self.get_map_key({('a',): 'content',
1952
('b',): 'other content',
1955
target_map = CHKMap(self.get_chk_bytes(), target)
1956
self.assertEqualDiff(
1959
" ('a',) 'content'\n"
1961
" ('b',) 'other content'\n"
1963
" ('c',) 'content'\n",
1964
target_map._dump_tree())
1965
b_key = target_map._root_node._items['b'].key()
1966
# This should return the root node, and the node for the 'b' key
1967
self.assertIterInteresting(
1969
(b_key, [(('b',), 'other content')])],
1972
def test_common_sub_page(self):
1973
basis = self.get_map_key({('aaa',): 'common',
1976
target = self.get_map_key({('aaa',): 'common',
1980
target_map = CHKMap(self.get_chk_bytes(), target)
1981
self.assertEqualDiff(
1983
" 'a' InternalNode\n"
1985
" ('aaa',) 'common'\n"
1989
" ('c',) 'common'\n",
1990
target_map._dump_tree())
1991
# The key for the internal aa node
1992
a_key = target_map._root_node._items['a'].key()
1993
# The key for the leaf aab node
1994
aab_key = target_map._root_node._items['a']._items['aab'].key()
1995
self.assertIterInteresting(
1998
(aab_key, [(('aab',), 'new')])],
2001
def test_common_leaf(self):
2002
basis = self.get_map_key({})
2003
target1 = self.get_map_key({('aaa',): 'common'})
2004
target2 = self.get_map_key({('aaa',): 'common',
2007
target3 = self.get_map_key({('aaa',): 'common',
2011
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
2012
# Once as a root node, once as a second layer, and once as a third
2013
# layer. It should only be returned one time regardless
2014
target1_map = CHKMap(self.get_chk_bytes(), target1)
2015
self.assertEqualDiff(
2017
" ('aaa',) 'common'\n",
2018
target1_map._dump_tree())
2019
target2_map = CHKMap(self.get_chk_bytes(), target2)
2020
self.assertEqualDiff(
2023
" ('aaa',) 'common'\n"
2025
" ('bbb',) 'new'\n",
2026
target2_map._dump_tree())
2027
target3_map = CHKMap(self.get_chk_bytes(), target3)
2028
self.assertEqualDiff(
2030
" 'a' InternalNode\n"
2032
" ('aaa',) 'common'\n"
2034
" ('aac',) 'other'\n"
2036
" ('bbb',) 'new'\n",
2037
target3_map._dump_tree())
2038
aaa_key = target1_map._root_node.key()
2039
b_key = target2_map._root_node._items['b'].key()
2040
a_key = target3_map._root_node._items['a'].key()
2041
aac_key = target3_map._root_node._items['a']._items['aac'].key()
2042
self.assertIterInteresting(
2043
[(None, [(('aaa',), 'common')]),
2047
(b_key, [(('bbb',), 'new')]),
2049
(aac_key, [(('aac',), 'other')]),
2050
], [target1, target2, target3], [basis])
2052
self.assertIterInteresting(
2055
(b_key, [(('bbb',), 'new')]),
2057
(aac_key, [(('aac',), 'other')]),
2058
], [target2, target3], [target1])
2060
# This may be a case that we relax. A root node is a deep child of the
2061
# excluded set. The cost is buffering root nodes until we have
2062
# determined all possible exclusions. (Because a prefix of '', cannot
2064
self.assertIterInteresting(
2065
[], [target1], [target3])
2067
def test_multiple_maps(self):
2068
basis1 = self.get_map_key({('aaa',): 'common',
2071
basis2 = self.get_map_key({('bbb',): 'common',
2074
target1 = self.get_map_key({('aaa',): 'common',
2075
('aac',): 'target1',
2078
target2 = self.get_map_key({('aaa',): 'common',
2079
('bba',): 'target2',
2082
target1_map = CHKMap(self.get_chk_bytes(), target1)
2083
self.assertEqualDiff(
2085
" 'a' InternalNode\n"
2087
" ('aaa',) 'common'\n"
2089
" ('aac',) 'target1'\n"
2091
" ('bbb',) 'common'\n",
2092
target1_map._dump_tree())
2093
# The key for the target1 internal a node
2094
a_key = target1_map._root_node._items['a'].key()
2095
# The key for the leaf aac node
2096
aac_key = target1_map._root_node._items['a']._items['aac'].key()
2098
target2_map = CHKMap(self.get_chk_bytes(), target2)
2099
self.assertEqualDiff(
2102
" ('aaa',) 'common'\n"
2103
" 'b' InternalNode\n"
2105
" ('bba',) 'target2'\n"
2107
" ('bbb',) 'common'\n",
2108
target2_map._dump_tree())
2109
# The key for the target2 internal bb node
2110
b_key = target2_map._root_node._items['b'].key()
2111
# The key for the leaf bba node
2112
bba_key = target2_map._root_node._items['b']._items['bba'].key()
2113
self.assertIterInteresting(
2118
(aac_key, [(('aac',), 'target1')]),
2119
(bba_key, [(('bba',), 'target2')]),
2120
], [target1, target2], [basis1, basis2])
2122
def test_multiple_maps_overlapping_common_new(self):
2123
# Test that when a node found through the interesting_keys iteration
2124
# for *some roots* and also via the uninteresting keys iteration, that
2125
# it is still scanned for uninteresting refs and items, because its
2126
# not truely new. This requires 2 levels of InternalNodes to expose,
2127
# because of the way the bootstrap in _find_children_info works.
2128
# This suggests that the code is probably amenable to/benefit from
2130
# How does this test work?
2131
# 1) We need a second level InternalNode present in a basis tree.
2132
# 2) We need a left side new tree that uses that InternalNode
2133
# 3) We need a right side new tree that does not use that InternalNode
2134
# at all but that has an unchanged *value* that was reachable inside
2136
basis = self.get_map_key({
2137
# InternalNode, unchanged in left:
2140
# Forces an internalNode at 'a'
2143
left = self.get_map_key({
2144
# All of basis unchanged
2148
# And a new top level node so the root key is different
2151
right = self.get_map_key({
2152
# A value that is unchanged from basis and thus should be filtered
2156
basis_map = CHKMap(self.get_chk_bytes(), basis)
2157
self.assertEqualDiff(
2159
" 'a' InternalNode\n"
2161
" ('aaa',) 'left'\n"
2163
" ('abb',) 'right'\n"
2165
" ('ccc',) 'common'\n",
2166
basis_map._dump_tree())
2167
# Get left expected data
2168
left_map = CHKMap(self.get_chk_bytes(), left)
2169
self.assertEqualDiff(
2171
" 'a' InternalNode\n"
2173
" ('aaa',) 'left'\n"
2175
" ('abb',) 'right'\n"
2177
" ('ccc',) 'common'\n"
2179
" ('ddd',) 'change'\n",
2180
left_map._dump_tree())
2181
# Keys from left side target
2182
l_d_key = left_map._root_node._items['d'].key()
2183
# Get right expected data
2184
right_map = CHKMap(self.get_chk_bytes(), right)
2185
self.assertEqualDiff(
2187
" ('abb',) 'right'\n",
2188
right_map._dump_tree())
2189
# Keys from the right side target - none, the root is enough.
2191
self.expectFailure("we don't properly filter different depths",
2192
self.assertIterInteresting,
2195
(l_d_key, [(('ddd',), 'change')]),
2196
], [left, right], [basis])
2197
self.assertIterInteresting(
2200
(l_d_key, [(('ddd',), 'change')]),
2201
], [left, right], [basis])
2203
def test_multiple_maps_similar(self):
2204
# We want to have a depth=2 tree, with multiple entries in each leaf
2206
basis = self.get_map_key({
2207
('aaa',): 'unchanged',
2208
('abb',): 'will change left',
2209
('caa',): 'unchanged',
2210
('cbb',): 'will change right',
2212
left = self.get_map_key({
2213
('aaa',): 'unchanged',
2214
('abb',): 'changed left',
2215
('caa',): 'unchanged',
2216
('cbb',): 'will change right',
2218
right = self.get_map_key({
2219
('aaa',): 'unchanged',
2220
('abb',): 'will change left',
2221
('caa',): 'unchanged',
2222
('cbb',): 'changed right',
2224
basis_map = CHKMap(self.get_chk_bytes(), basis)
2225
self.assertEqualDiff(
2228
" ('aaa',) 'unchanged'\n"
2229
" ('abb',) 'will change left'\n"
2231
" ('caa',) 'unchanged'\n"
2232
" ('cbb',) 'will change right'\n",
2233
basis_map._dump_tree())
2234
# Get left expected data
2235
left_map = CHKMap(self.get_chk_bytes(), left)
2236
self.assertEqualDiff(
2239
" ('aaa',) 'unchanged'\n"
2240
" ('abb',) 'changed left'\n"
2242
" ('caa',) 'unchanged'\n"
2243
" ('cbb',) 'will change right'\n",
2244
left_map._dump_tree())
2245
# Keys from left side target
2246
l_a_key = left_map._root_node._items['a'].key()
2247
l_c_key = left_map._root_node._items['c'].key()
2248
# Get right expected data
2249
right_map = CHKMap(self.get_chk_bytes(), right)
2250
self.assertEqualDiff(
2253
" ('aaa',) 'unchanged'\n"
2254
" ('abb',) 'will change left'\n"
2256
" ('caa',) 'unchanged'\n"
2257
" ('cbb',) 'changed right'\n",
2258
right_map._dump_tree())
2259
r_a_key = right_map._root_node._items['a'].key()
2260
r_c_key = right_map._root_node._items['c'].key()
2261
self.assertIterInteresting(
2264
(l_a_key, [(('abb',), 'changed left')]),
2265
(r_c_key, [(('cbb',), 'changed right')]),
2266
], [left, right], [basis])