4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
1 |
# Copyright (C) 2008 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Tests for maps built on a CHK versionedfiles facility."""
|
|
18 |
||
19 |
from itertools import izip |
|
20 |
||
21 |
from bzrlib import ( |
|
22 |
chk_map, |
|
23 |
osutils, |
|
24 |
tests, |
|
25 |
)
|
|
26 |
from bzrlib.chk_map import ( |
|
27 |
CHKMap, |
|
28 |
InternalNode, |
|
29 |
LeafNode, |
|
30 |
Node, |
|
31 |
)
|
|
32 |
||
33 |
||
34 |
class TestNode(tests.TestCase): |
|
35 |
||
36 |
def assertCommonPrefix(self, expected_common, prefix, key): |
|
37 |
common = Node.common_prefix(prefix, key) |
|
38 |
self.assertTrue(len(common) <= len(prefix)) |
|
39 |
self.assertTrue(len(common) <= len(key)) |
|
40 |
self.assertStartsWith(prefix, common) |
|
41 |
self.assertStartsWith(key, common) |
|
42 |
self.assertEquals(expected_common, common) |
|
43 |
||
44 |
def test_common_prefix(self): |
|
45 |
self.assertCommonPrefix('beg', 'beg', 'begin') |
|
46 |
||
47 |
def test_no_common_prefix(self): |
|
48 |
self.assertCommonPrefix('', 'begin', 'end') |
|
49 |
||
50 |
def test_equal(self): |
|
51 |
self.assertCommonPrefix('begin', 'begin', 'begin') |
|
52 |
||
53 |
def test_not_a_prefix(self): |
|
54 |
self.assertCommonPrefix('b', 'begin', 'b') |
|
55 |
||
56 |
||
57 |
class TestCaseWithStore(tests.TestCaseWithTransport): |
|
58 |
||
59 |
def get_chk_bytes(self): |
|
4241.6.8
by Robert Collins, John Arbash Meinel, Ian Clatworthy, Vincent Ladeuil
Add --development6-rich-root, disabling the legacy and unneeded development2 format, and activating the tests for CHK features disabled pending this format. (Robert Collins, John Arbash Meinel, Ian Clatworthy, Vincent Ladeuil) |
60 |
# The easiest way to get a CHK store is a development6 repository and
|
4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
61 |
# then work with the chk_bytes attribute directly.
|
4241.6.8
by Robert Collins, John Arbash Meinel, Ian Clatworthy, Vincent Ladeuil
Add --development6-rich-root, disabling the legacy and unneeded development2 format, and activating the tests for CHK features disabled pending this format. (Robert Collins, John Arbash Meinel, Ian Clatworthy, Vincent Ladeuil) |
62 |
repo = self.make_repository(".", format="development6-rich-root") |
4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
63 |
repo.lock_write() |
64 |
self.addCleanup(repo.unlock) |
|
65 |
repo.start_write_group() |
|
66 |
self.addCleanup(repo.abort_write_group) |
|
67 |
return repo.chk_bytes |
|
68 |
||
69 |
def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1, |
|
70 |
search_key_func=None): |
|
71 |
if chk_bytes is None: |
|
72 |
chk_bytes = self.get_chk_bytes() |
|
73 |
root_key = CHKMap.from_dict(chk_bytes, a_dict, |
|
74 |
maximum_size=maximum_size, key_width=key_width, |
|
75 |
search_key_func=search_key_func) |
|
76 |
chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func) |
|
77 |
return chkmap |
|
78 |
||
79 |
def read_bytes(self, chk_bytes, key): |
|
80 |
stream = chk_bytes.get_record_stream([key], 'unordered', True) |
|
81 |
record = stream.next() |
|
82 |
if record.storage_kind == 'absent': |
|
83 |
self.fail('Store does not contain the key %s' % (key,)) |
|
84 |
return record.get_bytes_as("fulltext") |
|
85 |
||
86 |
def to_dict(self, node, *args): |
|
87 |
return dict(node.iteritems(*args)) |
|
88 |
||
89 |
||
90 |
class TestMap(TestCaseWithStore): |
|
91 |
||
92 |
def assertHasABMap(self, chk_bytes): |
|
93 |
ab_leaf_bytes = 'chkleaf:\n0\n1\n1\na\n\x001\nb\n' |
|
94 |
ab_sha1 = osutils.sha_string(ab_leaf_bytes) |
|
95 |
self.assertEqual('90986195696b177c8895d48fdb4b7f2366f798a0', ab_sha1) |
|
96 |
root_key = ('sha1:' + ab_sha1,) |
|
97 |
self.assertEqual(ab_leaf_bytes, self.read_bytes(chk_bytes, root_key)) |
|
98 |
return root_key |
|
99 |
||
100 |
def assertHasEmptyMap(self, chk_bytes): |
|
101 |
empty_leaf_bytes = 'chkleaf:\n0\n1\n0\n\n' |
|
102 |
empty_sha1 = osutils.sha_string(empty_leaf_bytes) |
|
103 |
self.assertEqual('8571e09bf1bcc5b9621ce31b3d4c93d6e9a1ed26', empty_sha1) |
|
104 |
root_key = ('sha1:' + empty_sha1,) |
|
105 |
self.assertEqual(empty_leaf_bytes, self.read_bytes(chk_bytes, root_key)) |
|
106 |
return root_key |
|
107 |
||
108 |
def assertMapLayoutEqual(self, map_one, map_two): |
|
109 |
"""Assert that the internal structure is identical between the maps."""
|
|
110 |
map_one._ensure_root() |
|
111 |
node_one_stack = [map_one._root_node] |
|
112 |
map_two._ensure_root() |
|
113 |
node_two_stack = [map_two._root_node] |
|
114 |
while node_one_stack: |
|
115 |
node_one = node_one_stack.pop() |
|
116 |
node_two = node_two_stack.pop() |
|
117 |
if node_one.__class__ != node_two.__class__: |
|
118 |
self.assertEqualDiff(map_one._dump_tree(include_keys=True), |
|
119 |
map_two._dump_tree(include_keys=True)) |
|
120 |
self.assertEqual(node_one._search_prefix, |
|
121 |
node_two._search_prefix) |
|
122 |
if isinstance(node_one, InternalNode): |
|
123 |
# Internal nodes must have identical references
|
|
124 |
self.assertEqual(sorted(node_one._items.keys()), |
|
125 |
sorted(node_two._items.keys())) |
|
126 |
node_one_stack.extend([n for n, _ in |
|
127 |
node_one._iter_nodes(map_one._store)]) |
|
128 |
node_two_stack.extend([n for n, _ in |
|
129 |
node_two._iter_nodes(map_two._store)]) |
|
130 |
else: |
|
131 |
# Leaf nodes must have identical contents
|
|
132 |
self.assertEqual(node_one._items, node_two._items) |
|
133 |
self.assertEquals([], node_two_stack) |
|
134 |
||
135 |
def assertCanonicalForm(self, chkmap): |
|
136 |
"""Assert that the chkmap is in 'canonical' form.
|
|
137 |
||
138 |
We do this by adding all of the key value pairs from scratch, both in
|
|
139 |
forward order and reverse order, and assert that the final tree layout
|
|
140 |
is identical.
|
|
141 |
"""
|
|
142 |
items = list(chkmap.iteritems()) |
|
143 |
map_forward = chk_map.CHKMap(None, None) |
|
144 |
map_forward._root_node.set_maximum_size(chkmap._root_node.maximum_size) |
|
145 |
for key, value in items: |
|
146 |
map_forward.map(key, value) |
|
147 |
self.assertMapLayoutEqual(map_forward, chkmap) |
|
148 |
map_reverse = chk_map.CHKMap(None, None) |
|
149 |
map_reverse._root_node.set_maximum_size(chkmap._root_node.maximum_size) |
|
150 |
for key, value in reversed(items): |
|
151 |
map_reverse.map(key, value) |
|
152 |
self.assertMapLayoutEqual(map_reverse, chkmap) |
|
153 |
||
154 |
def test_assert_map_layout_equal(self): |
|
155 |
store = self.get_chk_bytes() |
|
156 |
map_one = CHKMap(store, None) |
|
157 |
map_one._root_node.set_maximum_size(20) |
|
158 |
map_two = CHKMap(store, None) |
|
159 |
map_two._root_node.set_maximum_size(20) |
|
160 |
self.assertMapLayoutEqual(map_one, map_two) |
|
161 |
map_one.map('aaa', 'value') |
|
162 |
self.assertRaises(AssertionError, |
|
163 |
self.assertMapLayoutEqual, map_one, map_two) |
|
164 |
map_two.map('aaa', 'value') |
|
165 |
self.assertMapLayoutEqual(map_one, map_two) |
|
166 |
# Split the tree, so we ensure that internal nodes and leaf nodes are
|
|
167 |
# properly checked
|
|
168 |
map_one.map('aab', 'value') |
|
169 |
self.assertIsInstance(map_one._root_node, InternalNode) |
|
170 |
self.assertRaises(AssertionError, |
|
171 |
self.assertMapLayoutEqual, map_one, map_two) |
|
172 |
map_two.map('aab', 'value') |
|
173 |
self.assertMapLayoutEqual(map_one, map_two) |
|
174 |
map_one.map('aac', 'value') |
|
175 |
self.assertRaises(AssertionError, |
|
176 |
self.assertMapLayoutEqual, map_one, map_two) |
|
177 |
self.assertCanonicalForm(map_one) |
|
178 |
||
179 |
def test_from_dict_empty(self): |
|
180 |
chk_bytes = self.get_chk_bytes() |
|
181 |
root_key = CHKMap.from_dict(chk_bytes, {}) |
|
182 |
# Check the data was saved and inserted correctly.
|
|
183 |
expected_root_key = self.assertHasEmptyMap(chk_bytes) |
|
184 |
self.assertEqual(expected_root_key, root_key) |
|
185 |
||
186 |
def test_from_dict_ab(self): |
|
187 |
chk_bytes = self.get_chk_bytes() |
|
188 |
root_key = CHKMap.from_dict(chk_bytes, {"a": "b"}) |
|
189 |
# Check the data was saved and inserted correctly.
|
|
190 |
expected_root_key = self.assertHasABMap(chk_bytes) |
|
191 |
self.assertEqual(expected_root_key, root_key) |
|
192 |
||
193 |
def test_apply_empty_ab(self): |
|
194 |
# applying a delta (None, "a", "b") to an empty chkmap generates the
|
|
195 |
# same map as from_dict_ab.
|
|
196 |
chk_bytes = self.get_chk_bytes() |
|
197 |
root_key = CHKMap.from_dict(chk_bytes, {}) |
|
198 |
chkmap = CHKMap(chk_bytes, root_key) |
|
199 |
new_root = chkmap.apply_delta([(None, "a", "b")]) |
|
200 |
# Check the data was saved and inserted correctly.
|
|
201 |
expected_root_key = self.assertHasABMap(chk_bytes) |
|
202 |
self.assertEqual(expected_root_key, new_root) |
|
203 |
# The update should have left us with an in memory root node, with an
|
|
204 |
# updated key.
|
|
205 |
self.assertEqual(new_root, chkmap._root_node._key) |
|
206 |
||
207 |
def test_apply_ab_empty(self): |
|
208 |
# applying a delta ("a", None, None) to a map with 'a' in it generates
|
|
209 |
# an empty map.
|
|
210 |
chk_bytes = self.get_chk_bytes() |
|
211 |
root_key = CHKMap.from_dict(chk_bytes, {("a",):"b"}) |
|
212 |
chkmap = CHKMap(chk_bytes, root_key) |
|
213 |
new_root = chkmap.apply_delta([(("a",), None, None)]) |
|
214 |
# Check the data was saved and inserted correctly.
|
|
215 |
expected_root_key = self.assertHasEmptyMap(chk_bytes) |
|
216 |
self.assertEqual(expected_root_key, new_root) |
|
217 |
# The update should have left us with an in memory root node, with an
|
|
218 |
# updated key.
|
|
219 |
self.assertEqual(new_root, chkmap._root_node._key) |
|
220 |
||
221 |
def test_apply_delta_is_deterministic(self): |
|
222 |
chk_bytes = self.get_chk_bytes() |
|
223 |
chkmap1 = CHKMap(chk_bytes, None) |
|
224 |
chkmap1._root_node.set_maximum_size(10) |
|
225 |
chkmap1.apply_delta([(None, ('aaa',), 'common'), |
|
226 |
(None, ('bba',), 'target2'), |
|
227 |
(None, ('bbb',), 'common')]) |
|
228 |
root_key1 = chkmap1._save() |
|
229 |
self.assertCanonicalForm(chkmap1) |
|
230 |
||
231 |
chkmap2 = CHKMap(chk_bytes, None) |
|
232 |
chkmap2._root_node.set_maximum_size(10) |
|
233 |
chkmap2.apply_delta([(None, ('bbb',), 'common'), |
|
234 |
(None, ('bba',), 'target2'), |
|
235 |
(None, ('aaa',), 'common')]) |
|
236 |
root_key2 = chkmap2._save() |
|
237 |
self.assertEqualDiff(chkmap1._dump_tree(include_keys=True), |
|
238 |
chkmap2._dump_tree(include_keys=True)) |
|
239 |
self.assertEqual(root_key1, root_key2) |
|
240 |
self.assertCanonicalForm(chkmap2) |
|
241 |
||
242 |
def test_stable_splitting(self): |
|
243 |
store = self.get_chk_bytes() |
|
244 |
chkmap = CHKMap(store, None) |
|
245 |
# Should fit 2 keys per LeafNode
|
|
246 |
chkmap._root_node.set_maximum_size(35) |
|
247 |
chkmap.map(('aaa',), 'v') |
|
248 |
self.assertEqualDiff("'' LeafNode\n" |
|
249 |
" ('aaa',) 'v'\n", |
|
250 |
chkmap._dump_tree()) |
|
251 |
chkmap.map(('aab',), 'v') |
|
252 |
self.assertEqualDiff("'' LeafNode\n" |
|
253 |
" ('aaa',) 'v'\n" |
|
254 |
" ('aab',) 'v'\n", |
|
255 |
chkmap._dump_tree()) |
|
256 |
self.assertCanonicalForm(chkmap) |
|
257 |
||
258 |
# Creates a new internal node, and splits the others into leaves
|
|
259 |
chkmap.map(('aac',), 'v') |
|
260 |
self.assertEqualDiff("'' InternalNode\n" |
|
261 |
" 'aaa' LeafNode\n" |
|
262 |
" ('aaa',) 'v'\n" |
|
263 |
" 'aab' LeafNode\n" |
|
264 |
" ('aab',) 'v'\n" |
|
265 |
" 'aac' LeafNode\n" |
|
266 |
" ('aac',) 'v'\n", |
|
267 |
chkmap._dump_tree()) |
|
268 |
self.assertCanonicalForm(chkmap) |
|
269 |
||
270 |
# Splits again, because it can't fit in the current structure
|
|
271 |
chkmap.map(('bbb',), 'v') |
|
272 |
self.assertEqualDiff("'' InternalNode\n" |
|
273 |
" 'a' InternalNode\n" |
|
274 |
" 'aaa' LeafNode\n" |
|
275 |
" ('aaa',) 'v'\n" |
|
276 |
" 'aab' LeafNode\n" |
|
277 |
" ('aab',) 'v'\n" |
|
278 |
" 'aac' LeafNode\n" |
|
279 |
" ('aac',) 'v'\n" |
|
280 |
" 'b' LeafNode\n" |
|
281 |
" ('bbb',) 'v'\n", |
|
282 |
chkmap._dump_tree()) |
|
283 |
self.assertCanonicalForm(chkmap) |
|
284 |
||
285 |
def test_map_splits_with_longer_key(self): |
|
286 |
store = self.get_chk_bytes() |
|
287 |
chkmap = CHKMap(store, None) |
|
288 |
# Should fit 1 key per LeafNode
|
|
289 |
chkmap._root_node.set_maximum_size(10) |
|
290 |
chkmap.map(('aaa',), 'v') |
|
291 |
chkmap.map(('aaaa',), 'v') |
|
292 |
self.assertCanonicalForm(chkmap) |
|
293 |
self.assertIsInstance(chkmap._root_node, InternalNode) |
|
294 |
||
295 |
def test_with_linefeed_in_key(self): |
|
296 |
store = self.get_chk_bytes() |
|
297 |
chkmap = CHKMap(store, None) |
|
298 |
# Should fit 1 key per LeafNode
|
|
299 |
chkmap._root_node.set_maximum_size(10) |
|
300 |
chkmap.map(('a\ra',), 'val1') |
|
301 |
chkmap.map(('a\rb',), 'val2') |
|
302 |
chkmap.map(('ac',), 'val3') |
|
303 |
self.assertCanonicalForm(chkmap) |
|
304 |
self.assertEqualDiff("'' InternalNode\n" |
|
305 |
" 'a\\r' InternalNode\n" |
|
306 |
" 'a\\ra' LeafNode\n" |
|
307 |
" ('a\\ra',) 'val1'\n" |
|
308 |
" 'a\\rb' LeafNode\n" |
|
309 |
" ('a\\rb',) 'val2'\n" |
|
310 |
" 'ac' LeafNode\n" |
|
311 |
" ('ac',) 'val3'\n", |
|
312 |
chkmap._dump_tree()) |
|
313 |
# We should also successfully serialise and deserialise these items
|
|
314 |
root_key = chkmap._save() |
|
315 |
chkmap = CHKMap(store, root_key) |
|
316 |
self.assertEqualDiff("'' InternalNode\n" |
|
317 |
" 'a\\r' InternalNode\n" |
|
318 |
" 'a\\ra' LeafNode\n" |
|
319 |
" ('a\\ra',) 'val1'\n" |
|
320 |
" 'a\\rb' LeafNode\n" |
|
321 |
" ('a\\rb',) 'val2'\n" |
|
322 |
" 'ac' LeafNode\n" |
|
323 |
" ('ac',) 'val3'\n", |
|
324 |
chkmap._dump_tree()) |
|
325 |
||
326 |
def test_deep_splitting(self): |
|
327 |
store = self.get_chk_bytes() |
|
328 |
chkmap = CHKMap(store, None) |
|
329 |
# Should fit 2 keys per LeafNode
|
|
330 |
chkmap._root_node.set_maximum_size(40) |
|
331 |
chkmap.map(('aaaaaaaa',), 'v') |
|
332 |
chkmap.map(('aaaaabaa',), 'v') |
|
333 |
self.assertEqualDiff("'' LeafNode\n" |
|
334 |
" ('aaaaaaaa',) 'v'\n" |
|
335 |
" ('aaaaabaa',) 'v'\n", |
|
336 |
chkmap._dump_tree()) |
|
337 |
chkmap.map(('aaabaaaa',), 'v') |
|
338 |
chkmap.map(('aaababaa',), 'v') |
|
339 |
self.assertEqualDiff("'' InternalNode\n" |
|
340 |
" 'aaaa' LeafNode\n" |
|
341 |
" ('aaaaaaaa',) 'v'\n" |
|
342 |
" ('aaaaabaa',) 'v'\n" |
|
343 |
" 'aaab' LeafNode\n" |
|
344 |
" ('aaabaaaa',) 'v'\n" |
|
345 |
" ('aaababaa',) 'v'\n", |
|
346 |
chkmap._dump_tree()) |
|
347 |
chkmap.map(('aaabacaa',), 'v') |
|
348 |
chkmap.map(('aaabadaa',), 'v') |
|
349 |
self.assertEqualDiff("'' InternalNode\n" |
|
350 |
" 'aaaa' LeafNode\n" |
|
351 |
" ('aaaaaaaa',) 'v'\n" |
|
352 |
" ('aaaaabaa',) 'v'\n" |
|
353 |
" 'aaab' InternalNode\n" |
|
354 |
" 'aaabaa' LeafNode\n" |
|
355 |
" ('aaabaaaa',) 'v'\n" |
|
356 |
" 'aaabab' LeafNode\n" |
|
357 |
" ('aaababaa',) 'v'\n" |
|
358 |
" 'aaabac' LeafNode\n" |
|
359 |
" ('aaabacaa',) 'v'\n" |
|
360 |
" 'aaabad' LeafNode\n" |
|
361 |
" ('aaabadaa',) 'v'\n", |
|
362 |
chkmap._dump_tree()) |
|
363 |
chkmap.map(('aaababba',), 'val') |
|
364 |
chkmap.map(('aaababca',), 'val') |
|
365 |
self.assertEqualDiff("'' InternalNode\n" |
|
366 |
" 'aaaa' LeafNode\n" |
|
367 |
" ('aaaaaaaa',) 'v'\n" |
|
368 |
" ('aaaaabaa',) 'v'\n" |
|
369 |
" 'aaab' InternalNode\n" |
|
370 |
" 'aaabaa' LeafNode\n" |
|
371 |
" ('aaabaaaa',) 'v'\n" |
|
372 |
" 'aaabab' InternalNode\n" |
|
373 |
" 'aaababa' LeafNode\n" |
|
374 |
" ('aaababaa',) 'v'\n" |
|
375 |
" 'aaababb' LeafNode\n" |
|
376 |
" ('aaababba',) 'val'\n" |
|
377 |
" 'aaababc' LeafNode\n" |
|
378 |
" ('aaababca',) 'val'\n" |
|
379 |
" 'aaabac' LeafNode\n" |
|
380 |
" ('aaabacaa',) 'v'\n" |
|
381 |
" 'aaabad' LeafNode\n" |
|
382 |
" ('aaabadaa',) 'v'\n", |
|
383 |
chkmap._dump_tree()) |
|
384 |
# Now we add a node that should fit around an existing InternalNode,
|
|
385 |
# but has a slightly different key prefix, which causes a new
|
|
386 |
# InternalNode split
|
|
387 |
chkmap.map(('aaabDaaa',), 'v') |
|
388 |
self.assertEqualDiff("'' InternalNode\n" |
|
389 |
" 'aaaa' LeafNode\n" |
|
390 |
" ('aaaaaaaa',) 'v'\n" |
|
391 |
" ('aaaaabaa',) 'v'\n" |
|
392 |
" 'aaab' InternalNode\n" |
|
393 |
" 'aaabD' LeafNode\n" |
|
394 |
" ('aaabDaaa',) 'v'\n" |
|
395 |
" 'aaaba' InternalNode\n" |
|
396 |
" 'aaabaa' LeafNode\n" |
|
397 |
" ('aaabaaaa',) 'v'\n" |
|
398 |
" 'aaabab' InternalNode\n" |
|
399 |
" 'aaababa' LeafNode\n" |
|
400 |
" ('aaababaa',) 'v'\n" |
|
401 |
" 'aaababb' LeafNode\n" |
|
402 |
" ('aaababba',) 'val'\n" |
|
403 |
" 'aaababc' LeafNode\n" |
|
404 |
" ('aaababca',) 'val'\n" |
|
405 |
" 'aaabac' LeafNode\n" |
|
406 |
" ('aaabacaa',) 'v'\n" |
|
407 |
" 'aaabad' LeafNode\n" |
|
408 |
" ('aaabadaa',) 'v'\n", |
|
409 |
chkmap._dump_tree()) |
|
410 |
||
411 |
def test_map_collapses_if_size_changes(self): |
|
412 |
store = self.get_chk_bytes() |
|
413 |
chkmap = CHKMap(store, None) |
|
414 |
# Should fit 2 keys per LeafNode
|
|
415 |
chkmap._root_node.set_maximum_size(35) |
|
416 |
chkmap.map(('aaa',), 'v') |
|
417 |
chkmap.map(('aab',), 'very long value that splits') |
|
418 |
self.assertEqualDiff("'' InternalNode\n" |
|
419 |
" 'aaa' LeafNode\n" |
|
420 |
" ('aaa',) 'v'\n" |
|
421 |
" 'aab' LeafNode\n" |
|
422 |
" ('aab',) 'very long value that splits'\n", |
|
423 |
chkmap._dump_tree()) |
|
424 |
self.assertCanonicalForm(chkmap) |
|
425 |
# Now changing the value to something small should cause a rebuild
|
|
426 |
chkmap.map(('aab',), 'v') |
|
427 |
self.assertEqualDiff("'' LeafNode\n" |
|
428 |
" ('aaa',) 'v'\n" |
|
429 |
" ('aab',) 'v'\n", |
|
430 |
chkmap._dump_tree()) |
|
431 |
self.assertCanonicalForm(chkmap) |
|
432 |
||
433 |
def test_map_double_deep_collapses(self): |
|
434 |
store = self.get_chk_bytes() |
|
435 |
chkmap = CHKMap(store, None) |
|
436 |
# Should fit 3 small keys per LeafNode
|
|
437 |
chkmap._root_node.set_maximum_size(40) |
|
438 |
chkmap.map(('aaa',), 'v') |
|
439 |
chkmap.map(('aab',), 'very long value that splits') |
|
440 |
chkmap.map(('abc',), 'v') |
|
441 |
self.assertEqualDiff("'' InternalNode\n" |
|
442 |
" 'aa' InternalNode\n" |
|
443 |
" 'aaa' LeafNode\n" |
|
444 |
" ('aaa',) 'v'\n" |
|
445 |
" 'aab' LeafNode\n" |
|
446 |
" ('aab',) 'very long value that splits'\n" |
|
447 |
" 'ab' LeafNode\n" |
|
448 |
" ('abc',) 'v'\n", |
|
449 |
chkmap._dump_tree()) |
|
450 |
chkmap.map(('aab',), 'v') |
|
451 |
self.assertCanonicalForm(chkmap) |
|
452 |
self.assertEqualDiff("'' LeafNode\n" |
|
453 |
" ('aaa',) 'v'\n" |
|
454 |
" ('aab',) 'v'\n" |
|
455 |
" ('abc',) 'v'\n", |
|
456 |
chkmap._dump_tree()) |
|
457 |
||
458 |
def test_stable_unmap(self): |
|
459 |
store = self.get_chk_bytes() |
|
460 |
chkmap = CHKMap(store, None) |
|
461 |
# Should fit 2 keys per LeafNode
|
|
462 |
chkmap._root_node.set_maximum_size(35) |
|
463 |
chkmap.map(('aaa',), 'v') |
|
464 |
chkmap.map(('aab',), 'v') |
|
465 |
self.assertEqualDiff("'' LeafNode\n" |
|
466 |
" ('aaa',) 'v'\n" |
|
467 |
" ('aab',) 'v'\n", |
|
468 |
chkmap._dump_tree()) |
|
469 |
# Creates a new internal node, and splits the others into leaves
|
|
470 |
chkmap.map(('aac',), 'v') |
|
471 |
self.assertEqualDiff("'' InternalNode\n" |
|
472 |
" 'aaa' LeafNode\n" |
|
473 |
" ('aaa',) 'v'\n" |
|
474 |
" 'aab' LeafNode\n" |
|
475 |
" ('aab',) 'v'\n" |
|
476 |
" 'aac' LeafNode\n" |
|
477 |
" ('aac',) 'v'\n", |
|
478 |
chkmap._dump_tree()) |
|
479 |
self.assertCanonicalForm(chkmap) |
|
480 |
# Now lets unmap one of the keys, and assert that we collapse the
|
|
481 |
# structures.
|
|
482 |
chkmap.unmap(('aac',)) |
|
483 |
self.assertEqualDiff("'' LeafNode\n" |
|
484 |
" ('aaa',) 'v'\n" |
|
485 |
" ('aab',) 'v'\n", |
|
486 |
chkmap._dump_tree()) |
|
487 |
self.assertCanonicalForm(chkmap) |
|
488 |
||
489 |
def test_unmap_double_deep(self): |
|
490 |
store = self.get_chk_bytes() |
|
491 |
chkmap = CHKMap(store, None) |
|
492 |
# Should fit 3 keys per LeafNode
|
|
493 |
chkmap._root_node.set_maximum_size(40) |
|
494 |
chkmap.map(('aaa',), 'v') |
|
495 |
chkmap.map(('aaab',), 'v') |
|
496 |
chkmap.map(('aab',), 'very long value') |
|
497 |
chkmap.map(('abc',), 'v') |
|
498 |
self.assertEqualDiff("'' InternalNode\n" |
|
499 |
" 'aa' InternalNode\n" |
|
500 |
" 'aaa' LeafNode\n" |
|
501 |
" ('aaa',) 'v'\n" |
|
502 |
" ('aaab',) 'v'\n" |
|
503 |
" 'aab' LeafNode\n" |
|
504 |
" ('aab',) 'very long value'\n" |
|
505 |
" 'ab' LeafNode\n" |
|
506 |
" ('abc',) 'v'\n", |
|
507 |
chkmap._dump_tree()) |
|
508 |
# Removing the 'aab' key should cause everything to collapse back to a
|
|
509 |
# single node
|
|
510 |
chkmap.unmap(('aab',)) |
|
511 |
self.assertEqualDiff("'' LeafNode\n" |
|
512 |
" ('aaa',) 'v'\n" |
|
513 |
" ('aaab',) 'v'\n" |
|
514 |
" ('abc',) 'v'\n", |
|
515 |
chkmap._dump_tree()) |
|
516 |
||
517 |
def test_unmap_double_deep_non_empty_leaf(self): |
|
518 |
store = self.get_chk_bytes() |
|
519 |
chkmap = CHKMap(store, None) |
|
520 |
# Should fit 3 keys per LeafNode
|
|
521 |
chkmap._root_node.set_maximum_size(40) |
|
522 |
chkmap.map(('aaa',), 'v') |
|
523 |
chkmap.map(('aab',), 'long value') |
|
524 |
chkmap.map(('aabb',), 'v') |
|
525 |
chkmap.map(('abc',), 'v') |
|
526 |
self.assertEqualDiff("'' InternalNode\n" |
|
527 |
" 'aa' InternalNode\n" |
|
528 |
" 'aaa' LeafNode\n" |
|
529 |
" ('aaa',) 'v'\n" |
|
530 |
" 'aab' LeafNode\n" |
|
531 |
" ('aab',) 'long value'\n" |
|
532 |
" ('aabb',) 'v'\n" |
|
533 |
" 'ab' LeafNode\n" |
|
534 |
" ('abc',) 'v'\n", |
|
535 |
chkmap._dump_tree()) |
|
536 |
# Removing the 'aab' key should cause everything to collapse back to a
|
|
537 |
# single node
|
|
538 |
chkmap.unmap(('aab',)) |
|
539 |
self.assertEqualDiff("'' LeafNode\n" |
|
540 |
" ('aaa',) 'v'\n" |
|
541 |
" ('aabb',) 'v'\n" |
|
542 |
" ('abc',) 'v'\n", |
|
543 |
chkmap._dump_tree()) |
|
544 |
||
545 |
def test_unmap_with_known_internal_node_doesnt_page(self): |
|
546 |
store = self.get_chk_bytes() |
|
547 |
chkmap = CHKMap(store, None) |
|
548 |
# Should fit 3 keys per LeafNode
|
|
549 |
chkmap._root_node.set_maximum_size(30) |
|
550 |
chkmap.map(('aaa',), 'v') |
|
551 |
chkmap.map(('aab',), 'v') |
|
552 |
chkmap.map(('aac',), 'v') |
|
553 |
chkmap.map(('abc',), 'v') |
|
554 |
chkmap.map(('acd',), 'v') |
|
555 |
self.assertEqualDiff("'' InternalNode\n" |
|
556 |
" 'aa' InternalNode\n" |
|
557 |
" 'aaa' LeafNode\n" |
|
558 |
" ('aaa',) 'v'\n" |
|
559 |
" 'aab' LeafNode\n" |
|
560 |
" ('aab',) 'v'\n" |
|
561 |
" 'aac' LeafNode\n" |
|
562 |
" ('aac',) 'v'\n" |
|
563 |
" 'ab' LeafNode\n" |
|
564 |
" ('abc',) 'v'\n" |
|
565 |
" 'ac' LeafNode\n" |
|
566 |
" ('acd',) 'v'\n", |
|
567 |
chkmap._dump_tree()) |
|
568 |
# Save everything to the map, and start over
|
|
569 |
chkmap = CHKMap(store, chkmap._save()) |
|
570 |
# Mapping an 'aa' key loads the internal node, but should not map the
|
|
571 |
# 'ab' and 'ac' nodes
|
|
572 |
chkmap.map(('aad',), 'v') |
|
573 |
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode) |
|
574 |
self.assertIsInstance(chkmap._root_node._items['ab'], tuple) |
|
575 |
self.assertIsInstance(chkmap._root_node._items['ac'], tuple) |
|
576 |
# Unmapping 'acd' can notice that 'aa' is an InternalNode and not have
|
|
577 |
# to map in 'ab'
|
|
578 |
chkmap.unmap(('acd',)) |
|
579 |
self.assertIsInstance(chkmap._root_node._items['aa'], InternalNode) |
|
580 |
self.assertIsInstance(chkmap._root_node._items['ab'], tuple) |
|
581 |
||
582 |
def test_unmap_without_fitting_doesnt_page_in(self): |
|
583 |
store = self.get_chk_bytes() |
|
584 |
chkmap = CHKMap(store, None) |
|
585 |
# Should fit 2 keys per LeafNode
|
|
586 |
chkmap._root_node.set_maximum_size(20) |
|
587 |
chkmap.map(('aaa',), 'v') |
|
588 |
chkmap.map(('aab',), 'v') |
|
589 |
self.assertEqualDiff("'' InternalNode\n" |
|
590 |
" 'aaa' LeafNode\n" |
|
591 |
" ('aaa',) 'v'\n" |
|
592 |
" 'aab' LeafNode\n" |
|
593 |
" ('aab',) 'v'\n", |
|
594 |
chkmap._dump_tree()) |
|
595 |
# Save everything to the map, and start over
|
|
596 |
chkmap = CHKMap(store, chkmap._save()) |
|
597 |
chkmap.map(('aac',), 'v') |
|
598 |
chkmap.map(('aad',), 'v') |
|
599 |
chkmap.map(('aae',), 'v') |
|
600 |
chkmap.map(('aaf',), 'v') |
|
601 |
# At this point, the previous nodes should not be paged in, but the
|
|
602 |
# newly added nodes would be
|
|
603 |
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple) |
|
604 |
self.assertIsInstance(chkmap._root_node._items['aab'], tuple) |
|
605 |
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode) |
|
606 |
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode) |
|
607 |
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode) |
|
608 |
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode) |
|
609 |
# Now unmapping one of the new nodes will use only the already-paged-in
|
|
610 |
# nodes to determine that we don't need to do more.
|
|
611 |
chkmap.unmap(('aaf',)) |
|
612 |
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple) |
|
613 |
self.assertIsInstance(chkmap._root_node._items['aab'], tuple) |
|
614 |
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode) |
|
615 |
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode) |
|
616 |
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode) |
|
617 |
||
618 |
def test_unmap_pages_in_if_necessary(self): |
|
619 |
store = self.get_chk_bytes() |
|
620 |
chkmap = CHKMap(store, None) |
|
621 |
# Should fit 2 keys per LeafNode
|
|
622 |
chkmap._root_node.set_maximum_size(30) |
|
623 |
chkmap.map(('aaa',), 'val') |
|
624 |
chkmap.map(('aab',), 'val') |
|
625 |
chkmap.map(('aac',), 'val') |
|
626 |
self.assertEqualDiff("'' InternalNode\n" |
|
627 |
" 'aaa' LeafNode\n" |
|
628 |
" ('aaa',) 'val'\n" |
|
629 |
" 'aab' LeafNode\n" |
|
630 |
" ('aab',) 'val'\n" |
|
631 |
" 'aac' LeafNode\n" |
|
632 |
" ('aac',) 'val'\n", |
|
633 |
chkmap._dump_tree()) |
|
634 |
root_key = chkmap._save() |
|
635 |
# Save everything to the map, and start over
|
|
636 |
chkmap = CHKMap(store, root_key) |
|
637 |
chkmap.map(('aad',), 'v') |
|
638 |
# At this point, the previous nodes should not be paged in, but the
|
|
639 |
# newly added node would be
|
|
640 |
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple) |
|
641 |
self.assertIsInstance(chkmap._root_node._items['aab'], tuple) |
|
642 |
self.assertIsInstance(chkmap._root_node._items['aac'], tuple) |
|
643 |
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode) |
|
644 |
# Unmapping the new node will check the existing nodes to see if they
|
|
645 |
# would fit.
|
|
646 |
# Clear the page cache so we ensure we have to read all the children
|
|
647 |
chk_map._page_cache.clear() |
|
648 |
chkmap.unmap(('aad',)) |
|
649 |
self.assertIsInstance(chkmap._root_node._items['aaa'], LeafNode) |
|
650 |
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode) |
|
651 |
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode) |
|
652 |
||
653 |
def test_unmap_pages_in_from_page_cache(self): |
|
654 |
store = self.get_chk_bytes() |
|
655 |
chkmap = CHKMap(store, None) |
|
656 |
# Should fit 2 keys per LeafNode
|
|
657 |
chkmap._root_node.set_maximum_size(30) |
|
658 |
chkmap.map(('aaa',), 'val') |
|
659 |
chkmap.map(('aab',), 'val') |
|
660 |
chkmap.map(('aac',), 'val') |
|
661 |
root_key = chkmap._save() |
|
662 |
# Save everything to the map, and start over
|
|
663 |
chkmap = CHKMap(store, root_key) |
|
664 |
chkmap.map(('aad',), 'val') |
|
665 |
self.assertEqualDiff("'' InternalNode\n" |
|
666 |
" 'aaa' LeafNode\n" |
|
667 |
" ('aaa',) 'val'\n" |
|
668 |
" 'aab' LeafNode\n" |
|
669 |
" ('aab',) 'val'\n" |
|
670 |
" 'aac' LeafNode\n" |
|
671 |
" ('aac',) 'val'\n" |
|
672 |
" 'aad' LeafNode\n" |
|
673 |
" ('aad',) 'val'\n", |
|
674 |
chkmap._dump_tree()) |
|
675 |
# Save everything to the map, start over after _dump_tree
|
|
676 |
chkmap = CHKMap(store, root_key) |
|
677 |
chkmap.map(('aad',), 'v') |
|
678 |
# At this point, the previous nodes should not be paged in, but the
|
|
679 |
# newly added node would be
|
|
680 |
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple) |
|
681 |
self.assertIsInstance(chkmap._root_node._items['aab'], tuple) |
|
682 |
self.assertIsInstance(chkmap._root_node._items['aac'], tuple) |
|
683 |
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode) |
|
684 |
# Now clear the page cache, and only include 2 of the children in the
|
|
685 |
# cache
|
|
686 |
aab_key = chkmap._root_node._items['aab'] |
|
687 |
aab_bytes = chk_map._page_cache[aab_key] |
|
688 |
aac_key = chkmap._root_node._items['aac'] |
|
689 |
aac_bytes = chk_map._page_cache[aac_key] |
|
690 |
chk_map._page_cache.clear() |
|
691 |
chk_map._page_cache[aab_key] = aab_bytes |
|
692 |
chk_map._page_cache[aac_key] = aac_bytes |
|
693 |
||
694 |
# Unmapping the new node will check the nodes from the page cache
|
|
695 |
# first, and not have to read in 'aaa'
|
|
696 |
chkmap.unmap(('aad',)) |
|
697 |
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple) |
|
698 |
self.assertIsInstance(chkmap._root_node._items['aab'], LeafNode) |
|
699 |
self.assertIsInstance(chkmap._root_node._items['aac'], LeafNode) |
|
700 |
||
701 |
def test_unmap_uses_existing_items(self): |
|
702 |
store = self.get_chk_bytes() |
|
703 |
chkmap = CHKMap(store, None) |
|
704 |
# Should fit 2 keys per LeafNode
|
|
705 |
chkmap._root_node.set_maximum_size(30) |
|
706 |
chkmap.map(('aaa',), 'val') |
|
707 |
chkmap.map(('aab',), 'val') |
|
708 |
chkmap.map(('aac',), 'val') |
|
709 |
root_key = chkmap._save() |
|
710 |
# Save everything to the map, and start over
|
|
711 |
chkmap = CHKMap(store, root_key) |
|
712 |
chkmap.map(('aad',), 'val') |
|
713 |
chkmap.map(('aae',), 'val') |
|
714 |
chkmap.map(('aaf',), 'val') |
|
715 |
# At this point, the previous nodes should not be paged in, but the
|
|
716 |
# newly added node would be
|
|
717 |
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple) |
|
718 |
self.assertIsInstance(chkmap._root_node._items['aab'], tuple) |
|
719 |
self.assertIsInstance(chkmap._root_node._items['aac'], tuple) |
|
720 |
self.assertIsInstance(chkmap._root_node._items['aad'], LeafNode) |
|
721 |
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode) |
|
722 |
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode) |
|
723 |
||
724 |
# Unmapping a new node will see the other nodes that are already in
|
|
725 |
# memory, and not need to page in anything else
|
|
726 |
chkmap.unmap(('aad',)) |
|
727 |
self.assertIsInstance(chkmap._root_node._items['aaa'], tuple) |
|
728 |
self.assertIsInstance(chkmap._root_node._items['aab'], tuple) |
|
729 |
self.assertIsInstance(chkmap._root_node._items['aac'], tuple) |
|
730 |
self.assertIsInstance(chkmap._root_node._items['aae'], LeafNode) |
|
731 |
self.assertIsInstance(chkmap._root_node._items['aaf'], LeafNode) |
|
732 |
||
733 |
def test_iter_changes_empty_ab(self): |
|
734 |
# Asking for changes between an empty dict to a dict with keys returns
|
|
735 |
# all the keys.
|
|
736 |
basis = self._get_map({}, maximum_size=10) |
|
737 |
target = self._get_map( |
|
738 |
{('a',): 'content here', ('b',): 'more content'}, |
|
739 |
chk_bytes=basis._store, maximum_size=10) |
|
740 |
self.assertEqual([(('a',), None, 'content here'), |
|
741 |
(('b',), None, 'more content')], |
|
742 |
sorted(list(target.iter_changes(basis)))) |
|
743 |
||
744 |
def test_iter_changes_ab_empty(self): |
|
745 |
# Asking for changes between a dict with keys to an empty dict returns
|
|
746 |
# all the keys.
|
|
747 |
basis = self._get_map({('a',): 'content here', ('b',): 'more content'}, |
|
748 |
maximum_size=10) |
|
749 |
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10) |
|
750 |
self.assertEqual([(('a',), 'content here', None), |
|
751 |
(('b',), 'more content', None)], |
|
752 |
sorted(list(target.iter_changes(basis)))) |
|
753 |
||
754 |
def test_iter_changes_empty_empty_is_empty(self): |
|
755 |
basis = self._get_map({}, maximum_size=10) |
|
756 |
target = self._get_map({}, chk_bytes=basis._store, maximum_size=10) |
|
757 |
self.assertEqual([], sorted(list(target.iter_changes(basis)))) |
|
758 |
||
759 |
def test_iter_changes_ab_ab_is_empty(self): |
|
760 |
basis = self._get_map({('a',): 'content here', ('b',): 'more content'}, |
|
761 |
maximum_size=10) |
|
762 |
target = self._get_map( |
|
763 |
{('a',): 'content here', ('b',): 'more content'}, |
|
764 |
chk_bytes=basis._store, maximum_size=10) |
|
765 |
self.assertEqual([], sorted(list(target.iter_changes(basis)))) |
|
766 |
||
767 |
def test_iter_changes_ab_ab_nodes_not_loaded(self): |
|
768 |
basis = self._get_map({('a',): 'content here', ('b',): 'more content'}, |
|
769 |
maximum_size=10) |
|
770 |
target = self._get_map( |
|
771 |
{('a',): 'content here', ('b',): 'more content'}, |
|
772 |
chk_bytes=basis._store, maximum_size=10) |
|
773 |
list(target.iter_changes(basis)) |
|
774 |
self.assertIsInstance(target._root_node, tuple) |
|
775 |
self.assertIsInstance(basis._root_node, tuple) |
|
776 |
||
777 |
def test_iter_changes_ab_ab_changed_values_shown(self): |
|
778 |
basis = self._get_map({('a',): 'content here', ('b',): 'more content'}, |
|
779 |
maximum_size=10) |
|
780 |
target = self._get_map( |
|
781 |
{('a',): 'content here', ('b',): 'different content'}, |
|
782 |
chk_bytes=basis._store, maximum_size=10) |
|
783 |
result = sorted(list(target.iter_changes(basis))) |
|
784 |
self.assertEqual([(('b',), 'more content', 'different content')], |
|
785 |
result) |
|
786 |
||
787 |
def test_iter_changes_mixed_node_length(self): |
|
788 |
# When one side has different node lengths than the other, common
|
|
789 |
# but different keys still need to be show, and new-and-old included
|
|
790 |
# appropriately.
|
|
791 |
# aaa - common unaltered
|
|
792 |
# aab - common altered
|
|
793 |
# b - basis only
|
|
794 |
# at - target only
|
|
795 |
# we expect:
|
|
796 |
# aaa to be not loaded (later test)
|
|
797 |
# aab, b, at to be returned.
|
|
798 |
# basis splits at byte 0,1,2, aaa is commonb is basis only
|
|
799 |
basis_dict = {('aaa',): 'foo bar', |
|
800 |
('aab',): 'common altered a', ('b',): 'foo bar b'} |
|
801 |
# target splits at byte 1,2, at is target only
|
|
802 |
target_dict = {('aaa',): 'foo bar', |
|
803 |
('aab',): 'common altered b', ('at',): 'foo bar t'} |
|
804 |
changes = [ |
|
805 |
(('aab',), 'common altered a', 'common altered b'), |
|
806 |
(('at',), None, 'foo bar t'), |
|
807 |
(('b',), 'foo bar b', None), |
|
808 |
]
|
|
809 |
basis = self._get_map(basis_dict, maximum_size=10) |
|
810 |
target = self._get_map(target_dict, maximum_size=10, |
|
811 |
chk_bytes=basis._store) |
|
812 |
self.assertEqual(changes, sorted(list(target.iter_changes(basis)))) |
|
813 |
||
814 |
def test_iter_changes_common_pages_not_loaded(self): |
|
815 |
# aaa - common unaltered
|
|
816 |
# aab - common altered
|
|
817 |
# b - basis only
|
|
818 |
# at - target only
|
|
819 |
# we expect:
|
|
820 |
# aaa to be not loaded
|
|
821 |
# aaa not to be in result.
|
|
822 |
basis_dict = {('aaa',): 'foo bar', |
|
823 |
('aab',): 'common altered a', ('b',): 'foo bar b'} |
|
824 |
# target splits at byte 1, at is target only
|
|
825 |
target_dict = {('aaa',): 'foo bar', |
|
826 |
('aab',): 'common altered b', ('at',): 'foo bar t'} |
|
827 |
basis = self._get_map(basis_dict, maximum_size=10) |
|
828 |
target = self._get_map(target_dict, maximum_size=10, |
|
829 |
chk_bytes=basis._store) |
|
830 |
basis_get = basis._store.get_record_stream |
|
831 |
def get_record_stream(keys, order, fulltext): |
|
832 |
if ('sha1:1adf7c0d1b9140ab5f33bb64c6275fa78b1580b7',) in keys: |
|
833 |
self.fail("'aaa' pointer was followed %r" % keys) |
|
834 |
return basis_get(keys, order, fulltext) |
|
835 |
basis._store.get_record_stream = get_record_stream |
|
836 |
result = sorted(list(target.iter_changes(basis))) |
|
837 |
for change in result: |
|
838 |
if change[0] == ('aaa',): |
|
839 |
self.fail("Found unexpected change: %s" % change) |
|
840 |
||
841 |
def test_iter_changes_unchanged_keys_in_multi_key_leafs_ignored(self): |
|
842 |
# Within a leaf there are no hash's to exclude keys, make sure multi
|
|
843 |
# value leaf nodes are handled well.
|
|
844 |
basis_dict = {('aaa',): 'foo bar', |
|
845 |
('aab',): 'common altered a', ('b',): 'foo bar b'} |
|
846 |
target_dict = {('aaa',): 'foo bar', |
|
847 |
('aab',): 'common altered b', ('at',): 'foo bar t'} |
|
848 |
changes = [ |
|
849 |
(('aab',), 'common altered a', 'common altered b'), |
|
850 |
(('at',), None, 'foo bar t'), |
|
851 |
(('b',), 'foo bar b', None), |
|
852 |
]
|
|
853 |
basis = self._get_map(basis_dict) |
|
854 |
target = self._get_map(target_dict, chk_bytes=basis._store) |
|
855 |
self.assertEqual(changes, sorted(list(target.iter_changes(basis)))) |
|
856 |
||
857 |
def test_iteritems_empty(self): |
|
858 |
chk_bytes = self.get_chk_bytes() |
|
859 |
root_key = CHKMap.from_dict(chk_bytes, {}) |
|
860 |
chkmap = CHKMap(chk_bytes, root_key) |
|
861 |
self.assertEqual([], list(chkmap.iteritems())) |
|
862 |
||
863 |
def test_iteritems_two_items(self): |
|
864 |
chk_bytes = self.get_chk_bytes() |
|
865 |
root_key = CHKMap.from_dict(chk_bytes, |
|
866 |
{"a":"content here", "b":"more content"}) |
|
867 |
chkmap = CHKMap(chk_bytes, root_key) |
|
868 |
self.assertEqual([(("a",), "content here"), (("b",), "more content")], |
|
869 |
sorted(list(chkmap.iteritems()))) |
|
870 |
||
871 |
def test_iteritems_selected_one_of_two_items(self): |
|
872 |
chkmap = self._get_map( {("a",):"content here", ("b",):"more content"}) |
|
873 |
self.assertEqual({("a",): "content here"}, |
|
874 |
self.to_dict(chkmap, [("a",)])) |
|
875 |
||
876 |
def test_iteritems_keys_prefixed_by_2_width_nodes(self): |
|
877 |
chkmap = self._get_map( |
|
878 |
{("a","a"):"content here", ("a", "b",):"more content", |
|
879 |
("b", ""): 'boring content'}, |
|
880 |
maximum_size=10, key_width=2) |
|
881 |
self.assertEqual( |
|
882 |
{("a", "a"): "content here", ("a", "b"): 'more content'}, |
|
883 |
self.to_dict(chkmap, [("a",)])) |
|
884 |
||
885 |
def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self): |
|
886 |
search_key_func = chk_map.search_key_registry.get('hash-16-way') |
|
887 |
self.assertEqual('E8B7BE43\x00E8B7BE43', search_key_func(('a', 'a'))) |
|
888 |
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b'))) |
|
889 |
self.assertEqual('71BEEFF9\x0000000000', search_key_func(('b', ''))) |
|
890 |
chkmap = self._get_map( |
|
891 |
{("a","a"):"content here", ("a", "b",):"more content", |
|
892 |
("b", ""): 'boring content'}, |
|
893 |
maximum_size=10, key_width=2, search_key_func=search_key_func) |
|
894 |
self.assertEqual( |
|
895 |
{("a", "a"): "content here", ("a", "b"): 'more content'}, |
|
896 |
self.to_dict(chkmap, [("a",)])) |
|
897 |
||
898 |
def test_iteritems_keys_prefixed_by_2_width_one_leaf(self): |
|
899 |
chkmap = self._get_map( |
|
900 |
{("a","a"):"content here", ("a", "b",):"more content", |
|
901 |
("b", ""): 'boring content'}, key_width=2) |
|
902 |
self.assertEqual( |
|
903 |
{("a", "a"): "content here", ("a", "b"): 'more content'}, |
|
904 |
self.to_dict(chkmap, [("a",)])) |
|
905 |
||
906 |
def test___len__empty(self): |
|
907 |
chkmap = self._get_map({}) |
|
908 |
self.assertEqual(0, len(chkmap)) |
|
909 |
||
910 |
def test___len__2(self): |
|
911 |
chkmap = self._get_map({("foo",):"bar", ("gam",):"quux"}) |
|
912 |
self.assertEqual(2, len(chkmap)) |
|
913 |
||
914 |
def test_max_size_100_bytes_new(self): |
|
915 |
# When there is a 100 byte upper node limit, a tree is formed.
|
|
916 |
chkmap = self._get_map({("k1"*50,):"v1", ("k2"*50,):"v2"}, maximum_size=100) |
|
917 |
# We expect three nodes:
|
|
918 |
# A root, with two children, and with two key prefixes - k1 to one, and
|
|
919 |
# k2 to the other as our node splitting is only just being developed.
|
|
920 |
# The maximum size should be embedded
|
|
921 |
chkmap._ensure_root() |
|
922 |
self.assertEqual(100, chkmap._root_node.maximum_size) |
|
923 |
self.assertEqual(1, chkmap._root_node._key_width) |
|
924 |
# There should be two child nodes, and prefix of 2(bytes):
|
|
925 |
self.assertEqual(2, len(chkmap._root_node._items)) |
|
926 |
self.assertEqual("k", chkmap._root_node._compute_search_prefix()) |
|
927 |
# The actual nodes pointed at will change as serialisers change; so
|
|
928 |
# here we test that the key prefix is correct; then load the nodes and
|
|
929 |
# check they have the right pointed at key; whether they have the
|
|
930 |
# pointed at value inline or not is also unrelated to this test so we
|
|
931 |
# don't check that in detail - rather we just check the aggregate
|
|
932 |
# value.
|
|
933 |
nodes = sorted(chkmap._root_node._items.items()) |
|
934 |
ptr1 = nodes[0] |
|
935 |
ptr2 = nodes[1] |
|
936 |
self.assertEqual('k1', ptr1[0]) |
|
937 |
self.assertEqual('k2', ptr2[0]) |
|
938 |
node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1], None) |
|
939 |
self.assertIsInstance(node1, LeafNode) |
|
940 |
self.assertEqual(1, len(node1)) |
|
941 |
self.assertEqual({('k1'*50,): 'v1'}, self.to_dict(node1, chkmap._store)) |
|
942 |
node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1], None) |
|
943 |
self.assertIsInstance(node2, LeafNode) |
|
944 |
self.assertEqual(1, len(node2)) |
|
945 |
self.assertEqual({('k2'*50,): 'v2'}, self.to_dict(node2, chkmap._store)) |
|
946 |
# Having checked we have a good structure, check that the content is
|
|
947 |
# still accessible.
|
|
948 |
self.assertEqual(2, len(chkmap)) |
|
949 |
self.assertEqual({("k1"*50,): "v1", ("k2"*50,): "v2"}, |
|
950 |
self.to_dict(chkmap)) |
|
951 |
||
952 |
def test_init_root_is_LeafNode_new(self): |
|
953 |
chk_bytes = self.get_chk_bytes() |
|
954 |
chkmap = CHKMap(chk_bytes, None) |
|
955 |
self.assertIsInstance(chkmap._root_node, LeafNode) |
|
956 |
self.assertEqual({}, self.to_dict(chkmap)) |
|
957 |
self.assertEqual(0, len(chkmap)) |
|
958 |
||
959 |
def test_init_and_save_new(self): |
|
960 |
chk_bytes = self.get_chk_bytes() |
|
961 |
chkmap = CHKMap(chk_bytes, None) |
|
962 |
key = chkmap._save() |
|
963 |
leaf_node = LeafNode() |
|
964 |
self.assertEqual([key], leaf_node.serialise(chk_bytes)) |
|
965 |
||
966 |
def test_map_first_item_new(self): |
|
967 |
chk_bytes = self.get_chk_bytes() |
|
968 |
chkmap = CHKMap(chk_bytes, None) |
|
969 |
chkmap.map(("foo,",), "bar") |
|
970 |
self.assertEqual({('foo,',): 'bar'}, self.to_dict(chkmap)) |
|
971 |
self.assertEqual(1, len(chkmap)) |
|
972 |
key = chkmap._save() |
|
973 |
leaf_node = LeafNode() |
|
974 |
leaf_node.map(chk_bytes, ("foo,",), "bar") |
|
975 |
self.assertEqual([key], leaf_node.serialise(chk_bytes)) |
|
976 |
||
977 |
def test_unmap_last_item_root_is_leaf_new(self): |
|
978 |
chkmap = self._get_map({("k1"*50,): "v1", ("k2"*50,): "v2"}) |
|
979 |
chkmap.unmap(("k1"*50,)) |
|
980 |
chkmap.unmap(("k2"*50,)) |
|
981 |
self.assertEqual(0, len(chkmap)) |
|
982 |
self.assertEqual({}, self.to_dict(chkmap)) |
|
983 |
key = chkmap._save() |
|
984 |
leaf_node = LeafNode() |
|
985 |
self.assertEqual([key], leaf_node.serialise(chkmap._store)) |
|
986 |
||
987 |
def test__dump_tree(self): |
|
988 |
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2", |
|
989 |
("bbb",): "value3",}, |
|
990 |
maximum_size=15) |
|
991 |
self.assertEqualDiff("'' InternalNode\n" |
|
992 |
" 'a' InternalNode\n" |
|
993 |
" 'aaa' LeafNode\n" |
|
994 |
" ('aaa',) 'value1'\n" |
|
995 |
" 'aab' LeafNode\n" |
|
996 |
" ('aab',) 'value2'\n" |
|
997 |
" 'b' LeafNode\n" |
|
998 |
" ('bbb',) 'value3'\n", |
|
999 |
chkmap._dump_tree()) |
|
1000 |
self.assertEqualDiff("'' InternalNode\n" |
|
1001 |
" 'a' InternalNode\n" |
|
1002 |
" 'aaa' LeafNode\n" |
|
1003 |
" ('aaa',) 'value1'\n" |
|
1004 |
" 'aab' LeafNode\n" |
|
1005 |
" ('aab',) 'value2'\n" |
|
1006 |
" 'b' LeafNode\n" |
|
1007 |
" ('bbb',) 'value3'\n", |
|
1008 |
chkmap._dump_tree()) |
|
1009 |
self.assertEqualDiff( |
|
1010 |
"'' InternalNode sha1:0690d471eb0a624f359797d0ee4672bd68f4e236\n" |
|
1011 |
" 'a' InternalNode sha1:1514c35503da9418d8fd90c1bed553077cb53673\n" |
|
1012 |
" 'aaa' LeafNode sha1:4cc5970454d40b4ce297a7f13ddb76f63b88fefb\n" |
|
1013 |
" ('aaa',) 'value1'\n" |
|
1014 |
" 'aab' LeafNode sha1:1d68bc90914ef8a3edbcc8bb28b00cb4fea4b5e2\n" |
|
1015 |
" ('aab',) 'value2'\n" |
|
1016 |
" 'b' LeafNode sha1:3686831435b5596515353364eab0399dc45d49e7\n" |
|
1017 |
" ('bbb',) 'value3'\n", |
|
1018 |
chkmap._dump_tree(include_keys=True)) |
|
1019 |
||
1020 |
def test__dump_tree_in_progress(self): |
|
1021 |
chkmap = self._get_map({("aaa",): "value1", ("aab",): "value2"}, |
|
1022 |
maximum_size=10) |
|
1023 |
chkmap.map(('bbb',), 'value3') |
|
1024 |
self.assertEqualDiff("'' InternalNode\n" |
|
1025 |
" 'a' InternalNode\n" |
|
1026 |
" 'aaa' LeafNode\n" |
|
1027 |
" ('aaa',) 'value1'\n" |
|
1028 |
" 'aab' LeafNode\n" |
|
1029 |
" ('aab',) 'value2'\n" |
|
1030 |
" 'b' LeafNode\n" |
|
1031 |
" ('bbb',) 'value3'\n", |
|
1032 |
chkmap._dump_tree()) |
|
1033 |
# For things that are updated by adding 'bbb', we don't have a sha key
|
|
1034 |
# for them yet, so they are listed as None
|
|
1035 |
self.assertEqualDiff( |
|
1036 |
"'' InternalNode None\n" |
|
1037 |
" 'a' InternalNode sha1:6b0d881dd739a66f733c178b24da64395edfaafd\n" |
|
1038 |
" 'aaa' LeafNode sha1:40b39a08d895babce17b20ae5f62d187eaa4f63a\n" |
|
1039 |
" ('aaa',) 'value1'\n" |
|
1040 |
" 'aab' LeafNode sha1:ad1dc7c4e801302c95bf1ba7b20bc45e548cd51a\n" |
|
1041 |
" ('aab',) 'value2'\n" |
|
1042 |
" 'b' LeafNode None\n" |
|
1043 |
" ('bbb',) 'value3'\n", |
|
1044 |
chkmap._dump_tree(include_keys=True)) |
|
1045 |
||
1046 |
||
1047 |
def _search_key_single(key): |
|
1048 |
"""A search key function that maps all nodes to the same value"""
|
|
1049 |
return 'value' |
|
1050 |
||
1051 |
def _test_search_key(key): |
|
1052 |
return 'test:' + '\x00'.join(key) |
|
1053 |
||
1054 |
||
1055 |
class TestMapSearchKeys(TestCaseWithStore): |
|
1056 |
||
1057 |
def test_default_chk_map_uses_flat_search_key(self): |
|
1058 |
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None) |
|
1059 |
self.assertEqual('1', |
|
1060 |
chkmap._search_key_func(('1',))) |
|
1061 |
self.assertEqual('1\x002', |
|
1062 |
chkmap._search_key_func(('1', '2'))) |
|
1063 |
self.assertEqual('1\x002\x003', |
|
1064 |
chkmap._search_key_func(('1', '2', '3'))) |
|
1065 |
||
1066 |
def test_search_key_is_passed_to_root_node(self): |
|
1067 |
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None, |
|
1068 |
search_key_func=_test_search_key) |
|
1069 |
self.assertIs(_test_search_key, chkmap._search_key_func) |
|
1070 |
self.assertEqual('test:1\x002\x003', |
|
1071 |
chkmap._search_key_func(('1', '2', '3'))) |
|
1072 |
self.assertEqual('test:1\x002\x003', |
|
1073 |
chkmap._root_node._search_key(('1', '2', '3'))) |
|
1074 |
||
1075 |
def test_search_key_passed_via__ensure_root(self): |
|
1076 |
chk_bytes = self.get_chk_bytes() |
|
1077 |
chkmap = chk_map.CHKMap(chk_bytes, None, |
|
1078 |
search_key_func=_test_search_key) |
|
1079 |
root_key = chkmap._save() |
|
1080 |
chkmap = chk_map.CHKMap(chk_bytes, root_key, |
|
1081 |
search_key_func=_test_search_key) |
|
1082 |
chkmap._ensure_root() |
|
1083 |
self.assertEqual('test:1\x002\x003', |
|
1084 |
chkmap._root_node._search_key(('1', '2', '3'))) |
|
1085 |
||
1086 |
def test_search_key_with_internal_node(self): |
|
1087 |
chk_bytes = self.get_chk_bytes() |
|
1088 |
chkmap = chk_map.CHKMap(chk_bytes, None, |
|
1089 |
search_key_func=_test_search_key) |
|
1090 |
chkmap._root_node.set_maximum_size(10) |
|
1091 |
chkmap.map(('1',), 'foo') |
|
1092 |
chkmap.map(('2',), 'bar') |
|
1093 |
chkmap.map(('3',), 'baz') |
|
1094 |
self.assertEqualDiff("'' InternalNode\n" |
|
1095 |
" 'test:1' LeafNode\n" |
|
1096 |
" ('1',) 'foo'\n" |
|
1097 |
" 'test:2' LeafNode\n" |
|
1098 |
" ('2',) 'bar'\n" |
|
1099 |
" 'test:3' LeafNode\n" |
|
1100 |
" ('3',) 'baz'\n" |
|
1101 |
, chkmap._dump_tree()) |
|
1102 |
root_key = chkmap._save() |
|
1103 |
chkmap = chk_map.CHKMap(chk_bytes, root_key, |
|
1104 |
search_key_func=_test_search_key) |
|
1105 |
self.assertEqualDiff("'' InternalNode\n" |
|
1106 |
" 'test:1' LeafNode\n" |
|
1107 |
" ('1',) 'foo'\n" |
|
1108 |
" 'test:2' LeafNode\n" |
|
1109 |
" ('2',) 'bar'\n" |
|
1110 |
" 'test:3' LeafNode\n" |
|
1111 |
" ('3',) 'baz'\n" |
|
1112 |
, chkmap._dump_tree()) |
|
1113 |
||
1114 |
def test_search_key_16(self): |
|
1115 |
chk_bytes = self.get_chk_bytes() |
|
1116 |
chkmap = chk_map.CHKMap(chk_bytes, None, |
|
1117 |
search_key_func=chk_map._search_key_16) |
|
1118 |
chkmap._root_node.set_maximum_size(10) |
|
1119 |
chkmap.map(('1',), 'foo') |
|
1120 |
chkmap.map(('2',), 'bar') |
|
1121 |
chkmap.map(('3',), 'baz') |
|
1122 |
self.assertEqualDiff("'' InternalNode\n" |
|
1123 |
" '1' LeafNode\n" |
|
1124 |
" ('2',) 'bar'\n" |
|
1125 |
" '6' LeafNode\n" |
|
1126 |
" ('3',) 'baz'\n" |
|
1127 |
" '8' LeafNode\n" |
|
1128 |
" ('1',) 'foo'\n" |
|
1129 |
, chkmap._dump_tree()) |
|
1130 |
root_key = chkmap._save() |
|
1131 |
chkmap = chk_map.CHKMap(chk_bytes, root_key, |
|
1132 |
search_key_func=chk_map._search_key_16) |
|
1133 |
# We can get the values back correctly
|
|
1134 |
self.assertEqual([(('1',), 'foo')], |
|
1135 |
list(chkmap.iteritems([('1',)]))) |
|
1136 |
self.assertEqualDiff("'' InternalNode\n" |
|
1137 |
" '1' LeafNode\n" |
|
1138 |
" ('2',) 'bar'\n" |
|
1139 |
" '6' LeafNode\n" |
|
1140 |
" ('3',) 'baz'\n" |
|
1141 |
" '8' LeafNode\n" |
|
1142 |
" ('1',) 'foo'\n" |
|
1143 |
, chkmap._dump_tree()) |
|
1144 |
||
1145 |
def test_search_key_255(self): |
|
1146 |
chk_bytes = self.get_chk_bytes() |
|
1147 |
chkmap = chk_map.CHKMap(chk_bytes, None, |
|
1148 |
search_key_func=chk_map._search_key_255) |
|
1149 |
chkmap._root_node.set_maximum_size(10) |
|
1150 |
chkmap.map(('1',), 'foo') |
|
1151 |
chkmap.map(('2',), 'bar') |
|
1152 |
chkmap.map(('3',), 'baz') |
|
1153 |
self.assertEqualDiff("'' InternalNode\n" |
|
1154 |
" '\\x1a' LeafNode\n" |
|
1155 |
" ('2',) 'bar'\n" |
|
1156 |
" 'm' LeafNode\n" |
|
1157 |
" ('3',) 'baz'\n" |
|
1158 |
" '\\x83' LeafNode\n" |
|
1159 |
" ('1',) 'foo'\n" |
|
1160 |
, chkmap._dump_tree()) |
|
1161 |
root_key = chkmap._save() |
|
1162 |
chkmap = chk_map.CHKMap(chk_bytes, root_key, |
|
1163 |
search_key_func=chk_map._search_key_255) |
|
1164 |
# We can get the values back correctly
|
|
1165 |
self.assertEqual([(('1',), 'foo')], |
|
1166 |
list(chkmap.iteritems([('1',)]))) |
|
1167 |
self.assertEqualDiff("'' InternalNode\n" |
|
1168 |
" '\\x1a' LeafNode\n" |
|
1169 |
" ('2',) 'bar'\n" |
|
1170 |
" 'm' LeafNode\n" |
|
1171 |
" ('3',) 'baz'\n" |
|
1172 |
" '\\x83' LeafNode\n" |
|
1173 |
" ('1',) 'foo'\n" |
|
1174 |
, chkmap._dump_tree()) |
|
1175 |
||
1176 |
def test_search_key_collisions(self): |
|
1177 |
chkmap = chk_map.CHKMap(self.get_chk_bytes(), None, |
|
1178 |
search_key_func=_search_key_single) |
|
1179 |
# The node will want to expand, but it cannot, because it knows that
|
|
1180 |
# all the keys must map to this node
|
|
1181 |
chkmap._root_node.set_maximum_size(20) |
|
1182 |
chkmap.map(('1',), 'foo') |
|
1183 |
chkmap.map(('2',), 'bar') |
|
1184 |
chkmap.map(('3',), 'baz') |
|
1185 |
self.assertEqualDiff("'' LeafNode\n" |
|
1186 |
" ('1',) 'foo'\n" |
|
1187 |
" ('2',) 'bar'\n" |
|
1188 |
" ('3',) 'baz'\n" |
|
1189 |
, chkmap._dump_tree()) |
|
1190 |
||
1191 |
||
1192 |
class TestSearchKeyFuncs(tests.TestCase): |
|
1193 |
||
1194 |
def assertSearchKey16(self, expected, key): |
|
1195 |
self.assertEqual(expected, chk_map._search_key_16(key)) |
|
1196 |
||
1197 |
def assertSearchKey255(self, expected, key): |
|
1198 |
actual = chk_map._search_key_255(key) |
|
1199 |
self.assertEqual(expected, actual, 'actual: %r' % (actual,)) |
|
1200 |
||
1201 |
def test_simple_16(self): |
|
1202 |
self.assertSearchKey16('8C736521', ('foo',)) |
|
1203 |
self.assertSearchKey16('8C736521\x008C736521', ('foo', 'foo')) |
|
1204 |
self.assertSearchKey16('8C736521\x0076FF8CAA', ('foo', 'bar')) |
|
1205 |
self.assertSearchKey16('ED82CD11', ('abcd',)) |
|
1206 |
||
1207 |
def test_simple_255(self): |
|
1208 |
self.assertSearchKey255('\x8cse!', ('foo',)) |
|
1209 |
self.assertSearchKey255('\x8cse!\x00\x8cse!', ('foo', 'foo')) |
|
1210 |
self.assertSearchKey255('\x8cse!\x00v\xff\x8c\xaa', ('foo', 'bar')) |
|
1211 |
# The standard mapping for these would include '\n', so it should be
|
|
1212 |
# mapped to '_'
|
|
1213 |
self.assertSearchKey255('\xfdm\x93_\x00P_\x1bL', ('<', 'V')) |
|
1214 |
||
1215 |
def test_255_does_not_include_newline(self): |
|
1216 |
# When mapping via _search_key_255, we should never have the '\n'
|
|
1217 |
# character, but all other 255 values should be present
|
|
1218 |
chars_used = set() |
|
1219 |
for char_in in range(256): |
|
1220 |
search_key = chk_map._search_key_255((chr(char_in),)) |
|
1221 |
chars_used.update(search_key) |
|
1222 |
all_chars = set([chr(x) for x in range(256)]) |
|
1223 |
unused_chars = all_chars.symmetric_difference(chars_used) |
|
1224 |
self.assertEqual(set('\n'), unused_chars) |
|
1225 |
||
1226 |
||
1227 |
class TestLeafNode(TestCaseWithStore): |
|
1228 |
||
1229 |
def test_current_size_empty(self): |
|
1230 |
node = LeafNode() |
|
1231 |
self.assertEqual(16, node._current_size()) |
|
1232 |
||
1233 |
def test_current_size_size_changed(self): |
|
1234 |
node = LeafNode() |
|
1235 |
node.set_maximum_size(10) |
|
1236 |
self.assertEqual(17, node._current_size()) |
|
1237 |
||
1238 |
def test_current_size_width_changed(self): |
|
1239 |
node = LeafNode() |
|
1240 |
node._key_width = 10 |
|
1241 |
self.assertEqual(17, node._current_size()) |
|
1242 |
||
1243 |
def test_current_size_items(self): |
|
1244 |
node = LeafNode() |
|
1245 |
base_size = node._current_size() |
|
1246 |
node.map(None, ("foo bar",), "baz") |
|
1247 |
self.assertEqual(base_size + 14, node._current_size()) |
|
1248 |
||
1249 |
def test_deserialise_empty(self): |
|
1250 |
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",)) |
|
1251 |
self.assertEqual(0, len(node)) |
|
1252 |
self.assertEqual(10, node.maximum_size) |
|
1253 |
self.assertEqual(("sha1:1234",), node.key()) |
|
1254 |
self.assertIs(None, node._search_prefix) |
|
1255 |
self.assertIs(None, node._common_serialised_prefix) |
|
1256 |
||
1257 |
def test_deserialise_items(self): |
|
1258 |
node = LeafNode.deserialise( |
|
1259 |
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n", |
|
1260 |
("sha1:1234",)) |
|
1261 |
self.assertEqual(2, len(node)) |
|
1262 |
self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")], |
|
1263 |
sorted(node.iteritems(None))) |
|
1264 |
||
1265 |
def test_deserialise_item_with_null_width_1(self): |
|
1266 |
node = LeafNode.deserialise( |
|
1267 |
"chkleaf:\n0\n1\n2\n\nfoo\x001\nbar\x00baz\nquux\x001\nblarh\n", |
|
1268 |
("sha1:1234",)) |
|
1269 |
self.assertEqual(2, len(node)) |
|
1270 |
self.assertEqual([(("foo",), "bar\x00baz"), (("quux",), "blarh")], |
|
1271 |
sorted(node.iteritems(None))) |
|
1272 |
||
1273 |
def test_deserialise_item_with_null_width_2(self): |
|
1274 |
node = LeafNode.deserialise( |
|
1275 |
"chkleaf:\n0\n2\n2\n\nfoo\x001\x001\nbar\x00baz\n" |
|
1276 |
"quux\x00\x001\nblarh\n", |
|
1277 |
("sha1:1234",)) |
|
1278 |
self.assertEqual(2, len(node)) |
|
1279 |
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("quux", ""), "blarh")], |
|
1280 |
sorted(node.iteritems(None))) |
|
1281 |
||
1282 |
def test_iteritems_selected_one_of_two_items(self): |
|
1283 |
node = LeafNode.deserialise( |
|
1284 |
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n", |
|
1285 |
("sha1:1234",)) |
|
1286 |
self.assertEqual(2, len(node)) |
|
1287 |
self.assertEqual([(("quux",), "blarh")], |
|
1288 |
sorted(node.iteritems(None, [("quux",), ("qaz",)]))) |
|
1289 |
||
1290 |
def test_deserialise_item_with_common_prefix(self): |
|
1291 |
node = LeafNode.deserialise( |
|
1292 |
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x001\nbar\x00baz\n2\x001\nblarh\n", |
|
1293 |
("sha1:1234",)) |
|
1294 |
self.assertEqual(2, len(node)) |
|
1295 |
self.assertEqual([(("foo", "1"), "bar\x00baz"), (("foo", "2"), "blarh")], |
|
1296 |
sorted(node.iteritems(None))) |
|
1297 |
self.assertIs(chk_map._unknown, node._search_prefix) |
|
1298 |
self.assertEqual('foo\x00', node._common_serialised_prefix) |
|
1299 |
||
1300 |
def test_deserialise_multi_line(self): |
|
1301 |
node = LeafNode.deserialise( |
|
1302 |
"chkleaf:\n0\n2\n2\nfoo\x00\n1\x002\nbar\nbaz\n2\x002\nblarh\n\n", |
|
1303 |
("sha1:1234",)) |
|
1304 |
self.assertEqual(2, len(node)) |
|
1305 |
self.assertEqual([(("foo", "1"), "bar\nbaz"), |
|
1306 |
(("foo", "2"), "blarh\n"), |
|
1307 |
], sorted(node.iteritems(None))) |
|
1308 |
self.assertIs(chk_map._unknown, node._search_prefix) |
|
1309 |
self.assertEqual('foo\x00', node._common_serialised_prefix) |
|
1310 |
||
1311 |
def test_key_new(self): |
|
1312 |
node = LeafNode() |
|
1313 |
self.assertEqual(None, node.key()) |
|
1314 |
||
1315 |
def test_key_after_map(self): |
|
1316 |
node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n\n", ("sha1:1234",)) |
|
1317 |
node.map(None, ("foo bar",), "baz quux") |
|
1318 |
self.assertEqual(None, node.key()) |
|
1319 |
||
1320 |
def test_key_after_unmap(self): |
|
1321 |
node = LeafNode.deserialise( |
|
1322 |
"chkleaf:\n0\n1\n2\n\nfoo bar\x001\nbaz\nquux\x001\nblarh\n", |
|
1323 |
("sha1:1234",)) |
|
1324 |
node.unmap(None, ("foo bar",)) |
|
1325 |
self.assertEqual(None, node.key()) |
|
1326 |
||
1327 |
def test_map_exceeding_max_size_only_entry_new(self): |
|
1328 |
node = LeafNode() |
|
1329 |
node.set_maximum_size(10) |
|
1330 |
result = node.map(None, ("foo bar",), "baz quux") |
|
1331 |
self.assertEqual(("foo bar", [("", node)]), result) |
|
1332 |
self.assertTrue(10 < node._current_size()) |
|
1333 |
||
1334 |
def test_map_exceeding_max_size_second_entry_early_difference_new(self): |
|
1335 |
node = LeafNode() |
|
1336 |
node.set_maximum_size(10) |
|
1337 |
node.map(None, ("foo bar",), "baz quux") |
|
1338 |
prefix, result = list(node.map(None, ("blue",), "red")) |
|
1339 |
self.assertEqual("", prefix) |
|
1340 |
self.assertEqual(2, len(result)) |
|
1341 |
split_chars = set([result[0][0], result[1][0]]) |
|
1342 |
self.assertEqual(set(["f", "b"]), split_chars) |
|
1343 |
nodes = dict(result) |
|
1344 |
node = nodes["f"] |
|
1345 |
self.assertEqual({("foo bar",): "baz quux"}, self.to_dict(node, None)) |
|
1346 |
self.assertEqual(10, node.maximum_size) |
|
1347 |
self.assertEqual(1, node._key_width) |
|
1348 |
node = nodes["b"] |
|
1349 |
self.assertEqual({("blue",): "red"}, self.to_dict(node, None)) |
|
1350 |
self.assertEqual(10, node.maximum_size) |
|
1351 |
self.assertEqual(1, node._key_width) |
|
1352 |
||
1353 |
def test_map_first(self): |
|
1354 |
node = LeafNode() |
|
1355 |
result = node.map(None, ("foo bar",), "baz quux") |
|
1356 |
self.assertEqual(("foo bar", [("", node)]), result) |
|
1357 |
self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node, None)) |
|
1358 |
self.assertEqual(1, len(node)) |
|
1359 |
||
1360 |
def test_map_second(self): |
|
1361 |
node = LeafNode() |
|
1362 |
node.map(None, ("foo bar",), "baz quux") |
|
1363 |
result = node.map(None, ("bingo",), "bango") |
|
1364 |
self.assertEqual(("", [("", node)]), result) |
|
1365 |
self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"}, |
|
1366 |
self.to_dict(node, None)) |
|
1367 |
self.assertEqual(2, len(node)) |
|
1368 |
||
1369 |
def test_map_replacement(self): |
|
1370 |
node = LeafNode() |
|
1371 |
node.map(None, ("foo bar",), "baz quux") |
|
1372 |
result = node.map(None, ("foo bar",), "bango") |
|
1373 |
self.assertEqual(("foo bar", [("", node)]), result) |
|
1374 |
self.assertEqual({("foo bar",): "bango"}, |
|
1375 |
self.to_dict(node, None)) |
|
1376 |
self.assertEqual(1, len(node)) |
|
1377 |
||
1378 |
def test_serialise_empty(self): |
|
1379 |
store = self.get_chk_bytes() |
|
1380 |
node = LeafNode() |
|
1381 |
node.set_maximum_size(10) |
|
1382 |
expected_key = ("sha1:f34c3f0634ea3f85953dffa887620c0a5b1f4a51",) |
|
1383 |
self.assertEqual([expected_key], |
|
1384 |
list(node.serialise(store))) |
|
1385 |
self.assertEqual("chkleaf:\n10\n1\n0\n\n", self.read_bytes(store, expected_key)) |
|
1386 |
self.assertEqual(expected_key, node.key()) |
|
1387 |
||
1388 |
def test_serialise_items(self): |
|
1389 |
store = self.get_chk_bytes() |
|
1390 |
node = LeafNode() |
|
1391 |
node.set_maximum_size(10) |
|
1392 |
node.map(None, ("foo bar",), "baz quux") |
|
1393 |
expected_key = ("sha1:f89fac7edfc6bdb1b1b54a556012ff0c646ef5e0",) |
|
1394 |
self.assertEqual('foo bar', node._common_serialised_prefix) |
|
1395 |
self.assertEqual([expected_key], |
|
1396 |
list(node.serialise(store))) |
|
1397 |
self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\n\x001\nbaz quux\n", |
|
1398 |
self.read_bytes(store, expected_key)) |
|
1399 |
self.assertEqual(expected_key, node.key()) |
|
1400 |
||
1401 |
def test_unique_serialised_prefix_empty_new(self): |
|
1402 |
node = LeafNode() |
|
1403 |
self.assertIs(None, node._compute_search_prefix()) |
|
1404 |
||
1405 |
def test_unique_serialised_prefix_one_item_new(self): |
|
1406 |
node = LeafNode() |
|
1407 |
node.map(None, ("foo bar", "baz"), "baz quux") |
|
1408 |
self.assertEqual("foo bar\x00baz", node._compute_search_prefix()) |
|
1409 |
||
1410 |
def test_unmap_missing(self): |
|
1411 |
node = LeafNode() |
|
1412 |
self.assertRaises(KeyError, node.unmap, None, ("foo bar",)) |
|
1413 |
||
1414 |
def test_unmap_present(self): |
|
1415 |
node = LeafNode() |
|
1416 |
node.map(None, ("foo bar",), "baz quux") |
|
1417 |
result = node.unmap(None, ("foo bar",)) |
|
1418 |
self.assertEqual(node, result) |
|
1419 |
self.assertEqual({}, self.to_dict(node, None)) |
|
1420 |
self.assertEqual(0, len(node)) |
|
1421 |
||
1422 |
def test_map_maintains_common_prefixes(self): |
|
1423 |
node = LeafNode() |
|
1424 |
node._key_width = 2 |
|
1425 |
node.map(None, ("foo bar", "baz"), "baz quux") |
|
1426 |
self.assertEqual('foo bar\x00baz', node._search_prefix) |
|
1427 |
self.assertEqual('foo bar\x00baz', node._common_serialised_prefix) |
|
1428 |
node.map(None, ("foo bar", "bing"), "baz quux") |
|
1429 |
self.assertEqual('foo bar\x00b', node._search_prefix) |
|
1430 |
self.assertEqual('foo bar\x00b', node._common_serialised_prefix) |
|
1431 |
node.map(None, ("fool", "baby"), "baz quux") |
|
1432 |
self.assertEqual('foo', node._search_prefix) |
|
1433 |
self.assertEqual('foo', node._common_serialised_prefix) |
|
1434 |
node.map(None, ("foo bar", "baz"), "replaced") |
|
1435 |
self.assertEqual('foo', node._search_prefix) |
|
1436 |
self.assertEqual('foo', node._common_serialised_prefix) |
|
1437 |
node.map(None, ("very", "different"), "value") |
|
1438 |
self.assertEqual('', node._search_prefix) |
|
1439 |
self.assertEqual('', node._common_serialised_prefix) |
|
1440 |
||
1441 |
def test_unmap_maintains_common_prefixes(self): |
|
1442 |
node = LeafNode() |
|
1443 |
node._key_width = 2 |
|
1444 |
node.map(None, ("foo bar", "baz"), "baz quux") |
|
1445 |
node.map(None, ("foo bar", "bing"), "baz quux") |
|
1446 |
node.map(None, ("fool", "baby"), "baz quux") |
|
1447 |
node.map(None, ("very", "different"), "value") |
|
1448 |
self.assertEqual('', node._search_prefix) |
|
1449 |
self.assertEqual('', node._common_serialised_prefix) |
|
1450 |
node.unmap(None, ("very", "different")) |
|
1451 |
self.assertEqual("foo", node._search_prefix) |
|
1452 |
self.assertEqual("foo", node._common_serialised_prefix) |
|
1453 |
node.unmap(None, ("fool", "baby")) |
|
1454 |
self.assertEqual('foo bar\x00b', node._search_prefix) |
|
1455 |
self.assertEqual('foo bar\x00b', node._common_serialised_prefix) |
|
1456 |
node.unmap(None, ("foo bar", "baz")) |
|
1457 |
self.assertEqual('foo bar\x00bing', node._search_prefix) |
|
1458 |
self.assertEqual('foo bar\x00bing', node._common_serialised_prefix) |
|
1459 |
node.unmap(None, ("foo bar", "bing")) |
|
1460 |
self.assertEqual(None, node._search_prefix) |
|
1461 |
self.assertEqual(None, node._common_serialised_prefix) |
|
1462 |
||
1463 |
||
1464 |
class TestInternalNode(TestCaseWithStore): |
|
1465 |
||
1466 |
def test_add_node_empty_new(self): |
|
1467 |
node = InternalNode('fo') |
|
1468 |
child = LeafNode() |
|
1469 |
child.set_maximum_size(100) |
|
1470 |
child.map(None, ("foo",), "bar") |
|
1471 |
node.add_node("foo", child) |
|
1472 |
# Note that node isn't strictly valid now as a tree (only one child),
|
|
1473 |
# but thats ok for this test.
|
|
1474 |
# The first child defines the node's width:
|
|
1475 |
self.assertEqual(3, node._node_width) |
|
1476 |
# We should be able to iterate over the contents without doing IO.
|
|
1477 |
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, None)) |
|
1478 |
# The length should be known:
|
|
1479 |
self.assertEqual(1, len(node)) |
|
1480 |
# serialising the node should serialise the child and the node.
|
|
1481 |
chk_bytes = self.get_chk_bytes() |
|
1482 |
keys = list(node.serialise(chk_bytes)) |
|
1483 |
child_key = child.serialise(chk_bytes)[0] |
|
1484 |
self.assertEqual( |
|
1485 |
[child_key, ('sha1:cf67e9997d8228a907c1f5bfb25a8bd9cd916fac',)], |
|
1486 |
keys) |
|
1487 |
# We should be able to access deserialised content.
|
|
1488 |
bytes = self.read_bytes(chk_bytes, keys[1]) |
|
1489 |
node = chk_map._deserialise(bytes, keys[1], None) |
|
1490 |
self.assertEqual(1, len(node)) |
|
1491 |
self.assertEqual({('foo',): 'bar'}, self.to_dict(node, chk_bytes)) |
|
1492 |
self.assertEqual(3, node._node_width) |
|
1493 |
||
1494 |
def test_add_node_resets_key_new(self): |
|
1495 |
node = InternalNode('fo') |
|
1496 |
child = LeafNode() |
|
1497 |
child.set_maximum_size(100) |
|
1498 |
child.map(None, ("foo",), "bar") |
|
1499 |
node.add_node("foo", child) |
|
1500 |
chk_bytes = self.get_chk_bytes() |
|
1501 |
keys = list(node.serialise(chk_bytes)) |
|
1502 |
self.assertEqual(keys[1], node._key) |
|
1503 |
node.add_node("fos", child) |
|
1504 |
self.assertEqual(None, node._key) |
|
1505 |
||
1506 |
# def test_add_node_empty_oversized_one_ok_new(self):
|
|
1507 |
# def test_add_node_one_oversized_second_kept_minimum_fan(self):
|
|
1508 |
# def test_add_node_two_oversized_third_kept_minimum_fan(self):
|
|
1509 |
# def test_add_node_one_oversized_second_splits_errors(self):
|
|
1510 |
||
1511 |
def test__iter_nodes_no_key_filter(self): |
|
1512 |
node = InternalNode('') |
|
1513 |
child = LeafNode() |
|
1514 |
child.set_maximum_size(100) |
|
1515 |
child.map(None, ("foo",), "bar") |
|
1516 |
node.add_node("f", child) |
|
1517 |
child = LeafNode() |
|
1518 |
child.set_maximum_size(100) |
|
1519 |
child.map(None, ("bar",), "baz") |
|
1520 |
node.add_node("b", child) |
|
1521 |
||
1522 |
for child, node_key_filter in node._iter_nodes(None, key_filter=None): |
|
1523 |
self.assertEqual(None, node_key_filter) |
|
1524 |
||
1525 |
def test__iter_nodes_splits_key_filter(self): |
|
1526 |
node = InternalNode('') |
|
1527 |
child = LeafNode() |
|
1528 |
child.set_maximum_size(100) |
|
1529 |
child.map(None, ("foo",), "bar") |
|
1530 |
node.add_node("f", child) |
|
1531 |
child = LeafNode() |
|
1532 |
child.set_maximum_size(100) |
|
1533 |
child.map(None, ("bar",), "baz") |
|
1534 |
node.add_node("b", child) |
|
1535 |
||
1536 |
# foo and bar both match exactly one leaf node, but 'cat' should not
|
|
1537 |
# match any, and should not be placed in one.
|
|
1538 |
key_filter = (('foo',), ('bar',), ('cat',)) |
|
1539 |
for child, node_key_filter in node._iter_nodes(None, |
|
1540 |
key_filter=key_filter): |
|
1541 |
# each child could only match one key filter, so make sure it was
|
|
1542 |
# properly filtered
|
|
1543 |
self.assertEqual(1, len(node_key_filter)) |
|
1544 |
||
1545 |
def test__iter_nodes_with_multiple_matches(self): |
|
1546 |
node = InternalNode('') |
|
1547 |
child = LeafNode() |
|
1548 |
child.set_maximum_size(100) |
|
1549 |
child.map(None, ("foo",), "val") |
|
1550 |
child.map(None, ("fob",), "val") |
|
1551 |
node.add_node("f", child) |
|
1552 |
child = LeafNode() |
|
1553 |
child.set_maximum_size(100) |
|
1554 |
child.map(None, ("bar",), "val") |
|
1555 |
child.map(None, ("baz",), "val") |
|
1556 |
node.add_node("b", child) |
|
1557 |
||
1558 |
key_filter = (('foo',), ('fob',), ('bar',), ('baz',)) |
|
1559 |
for child, node_key_filter in node._iter_nodes(None, |
|
1560 |
key_filter=key_filter): |
|
1561 |
# each child could matches two key filters, so make sure they were
|
|
1562 |
# both included.
|
|
1563 |
self.assertEqual(2, len(node_key_filter)) |
|
1564 |
||
1565 |
def test_iteritems_empty_new(self): |
|
1566 |
node = InternalNode() |
|
1567 |
self.assertEqual([], sorted(node.iteritems(None))) |
|
1568 |
||
1569 |
def test_iteritems_two_children(self): |
|
1570 |
node = InternalNode() |
|
1571 |
leaf1 = LeafNode() |
|
1572 |
leaf1.map(None, ('foo bar',), 'quux') |
|
1573 |
leaf2 = LeafNode() |
|
1574 |
leaf2.map(None, ('strange',), 'beast') |
|
1575 |
node.add_node("f", leaf1) |
|
1576 |
node.add_node("s", leaf2) |
|
1577 |
self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')], |
|
1578 |
sorted(node.iteritems(None))) |
|
1579 |
||
1580 |
def test_iteritems_two_children_partial(self): |
|
1581 |
node = InternalNode() |
|
1582 |
leaf1 = LeafNode() |
|
1583 |
leaf1.map(None, ('foo bar',), 'quux') |
|
1584 |
leaf2 = LeafNode() |
|
1585 |
leaf2.map(None, ('strange',), 'beast') |
|
1586 |
node.add_node("f", leaf1) |
|
1587 |
# This sets up a path that should not be followed - it will error if
|
|
1588 |
# the code tries to.
|
|
1589 |
node._items['f'] = None |
|
1590 |
node.add_node("s", leaf2) |
|
1591 |
self.assertEqual([(('strange',), 'beast')], |
|
1592 |
sorted(node.iteritems(None, [('strange',), ('weird',)]))) |
|
1593 |
||
1594 |
def test_iteritems_two_children_with_hash(self): |
|
1595 |
search_key_func = chk_map.search_key_registry.get('hash-255-way') |
|
1596 |
node = InternalNode(search_key_func=search_key_func) |
|
1597 |
leaf1 = LeafNode(search_key_func=search_key_func) |
|
1598 |
leaf1.map(None, ('foo bar',), 'quux') |
|
1599 |
leaf2 = LeafNode(search_key_func=search_key_func) |
|
1600 |
leaf2.map(None, ('strange',), 'beast') |
|
1601 |
self.assertEqual('\xbeF\x014', search_key_func(('foo bar',))) |
|
1602 |
self.assertEqual('\x85\xfa\xf7K', search_key_func(('strange',))) |
|
1603 |
node.add_node("\xbe", leaf1) |
|
1604 |
# This sets up a path that should not be followed - it will error if
|
|
1605 |
# the code tries to.
|
|
1606 |
node._items['\xbe'] = None |
|
1607 |
node.add_node("\x85", leaf2) |
|
1608 |
self.assertEqual([(('strange',), 'beast')], |
|
1609 |
sorted(node.iteritems(None, [('strange',), ('weird',)]))) |
|
1610 |
||
1611 |
def test_iteritems_partial_empty(self): |
|
1612 |
node = InternalNode() |
|
1613 |
self.assertEqual([], sorted(node.iteritems([('missing',)]))) |
|
1614 |
||
1615 |
def test_map_to_new_child_new(self): |
|
1616 |
chkmap = self._get_map({('k1',):'foo', ('k2',):'bar'}, maximum_size=10) |
|
1617 |
chkmap._ensure_root() |
|
1618 |
node = chkmap._root_node |
|
1619 |
# Ensure test validity: nothing paged in below the root.
|
|
1620 |
self.assertEqual(2, |
|
1621 |
len([value for value in node._items.values() |
|
1622 |
if type(value) == tuple])) |
|
1623 |
# now, mapping to k3 should add a k3 leaf
|
|
1624 |
prefix, nodes = node.map(None, ('k3',), 'quux') |
|
1625 |
self.assertEqual("k", prefix) |
|
1626 |
self.assertEqual([("", node)], nodes) |
|
1627 |
# check new child details
|
|
1628 |
child = node._items['k3'] |
|
1629 |
self.assertIsInstance(child, LeafNode) |
|
1630 |
self.assertEqual(1, len(child)) |
|
1631 |
self.assertEqual({('k3',): 'quux'}, self.to_dict(child, None)) |
|
1632 |
self.assertEqual(None, child._key) |
|
1633 |
self.assertEqual(10, child.maximum_size) |
|
1634 |
self.assertEqual(1, child._key_width) |
|
1635 |
# Check overall structure:
|
|
1636 |
self.assertEqual(3, len(chkmap)) |
|
1637 |
self.assertEqual({('k1',): 'foo', ('k2',): 'bar', ('k3',): 'quux'}, |
|
1638 |
self.to_dict(chkmap)) |
|
1639 |
# serialising should only serialise the new data - k3 and the internal
|
|
1640 |
# node.
|
|
1641 |
keys = list(node.serialise(chkmap._store)) |
|
1642 |
child_key = child.serialise(chkmap._store)[0] |
|
1643 |
self.assertEqual([child_key, keys[1]], keys) |
|
1644 |
||
1645 |
def test_map_to_child_child_splits_new(self): |
|
1646 |
chkmap = self._get_map({('k1',):'foo', ('k22',):'bar'}, maximum_size=10) |
|
1647 |
# Check for the canonical root value for this tree:
|
|
1648 |
self.assertEqualDiff("'' InternalNode\n" |
|
1649 |
" 'k1' LeafNode\n" |
|
1650 |
" ('k1',) 'foo'\n" |
|
1651 |
" 'k2' LeafNode\n" |
|
1652 |
" ('k22',) 'bar'\n" |
|
1653 |
, chkmap._dump_tree()) |
|
1654 |
# _dump_tree pages everything in, so reload using just the root
|
|
1655 |
chkmap = CHKMap(chkmap._store, chkmap._root_node) |
|
1656 |
chkmap._ensure_root() |
|
1657 |
node = chkmap._root_node |
|
1658 |
# Ensure test validity: nothing paged in below the root.
|
|
1659 |
self.assertEqual(2, |
|
1660 |
len([value for value in node._items.values() |
|
1661 |
if type(value) == tuple])) |
|
1662 |
# now, mapping to k23 causes k22 ('k2' in node) to split into k22 and
|
|
1663 |
# k23, which for simplicity in the current implementation generates
|
|
1664 |
# a new internal node between node, and k22/k23.
|
|
1665 |
prefix, nodes = node.map(chkmap._store, ('k23',), 'quux') |
|
1666 |
self.assertEqual("k", prefix) |
|
1667 |
self.assertEqual([("", node)], nodes) |
|
1668 |
# check new child details
|
|
1669 |
child = node._items['k2'] |
|
1670 |
self.assertIsInstance(child, InternalNode) |
|
1671 |
self.assertEqual(2, len(child)) |
|
1672 |
self.assertEqual({('k22',): 'bar', ('k23',): 'quux'}, |
|
1673 |
self.to_dict(child, None)) |
|
1674 |
self.assertEqual(None, child._key) |
|
1675 |
self.assertEqual(10, child.maximum_size) |
|
1676 |
self.assertEqual(1, child._key_width) |
|
1677 |
self.assertEqual(3, child._node_width) |
|
1678 |
# Check overall structure:
|
|
1679 |
self.assertEqual(3, len(chkmap)) |
|
1680 |
self.assertEqual({('k1',): 'foo', ('k22',): 'bar', ('k23',): 'quux'}, |
|
1681 |
self.to_dict(chkmap)) |
|
1682 |
# serialising should only serialise the new data - although k22 hasn't
|
|
1683 |
# changed because its a special corner case (splitting on with only one
|
|
1684 |
# key leaves one node unaltered), in general k22 is serialised, so we
|
|
1685 |
# expect k22, k23, the new internal node, and node, to be serialised.
|
|
1686 |
keys = list(node.serialise(chkmap._store)) |
|
1687 |
child_key = child._key |
|
1688 |
k22_key = child._items['k22']._key |
|
1689 |
k23_key = child._items['k23']._key |
|
1690 |
self.assertEqual([k22_key, k23_key, child_key, node.key()], keys) |
|
1691 |
self.assertEqualDiff("'' InternalNode\n" |
|
1692 |
" 'k1' LeafNode\n" |
|
1693 |
" ('k1',) 'foo'\n" |
|
1694 |
" 'k2' InternalNode\n" |
|
1695 |
" 'k22' LeafNode\n" |
|
1696 |
" ('k22',) 'bar'\n" |
|
1697 |
" 'k23' LeafNode\n" |
|
1698 |
" ('k23',) 'quux'\n" |
|
1699 |
, chkmap._dump_tree()) |
|
1700 |
||
1701 |
def test__search_prefix_filter_with_hash(self): |
|
1702 |
search_key_func = chk_map.search_key_registry.get('hash-16-way') |
|
1703 |
node = InternalNode(search_key_func=search_key_func) |
|
1704 |
node._key_width = 2 |
|
1705 |
node._node_width = 4 |
|
1706 |
self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b'))) |
|
1707 |
self.assertEqual('E8B7', node._search_prefix_filter(('a', 'b'))) |
|
1708 |
self.assertEqual('E8B7', node._search_prefix_filter(('a',))) |
|
1709 |
||
1710 |
def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self): |
|
1711 |
chkmap = self._get_map( |
|
1712 |
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10) |
|
1713 |
# Check we have the expected tree.
|
|
1714 |
self.assertEqualDiff("'' InternalNode\n" |
|
1715 |
" 'k1' LeafNode\n" |
|
1716 |
" ('k1',) 'foo'\n" |
|
1717 |
" 'k2' InternalNode\n" |
|
1718 |
" 'k22' LeafNode\n" |
|
1719 |
" ('k22',) 'bar'\n" |
|
1720 |
" 'k23' LeafNode\n" |
|
1721 |
" ('k23',) 'quux'\n" |
|
1722 |
, chkmap._dump_tree()) |
|
1723 |
chkmap = CHKMap(chkmap._store, chkmap._root_node) |
|
1724 |
chkmap._ensure_root() |
|
1725 |
node = chkmap._root_node |
|
1726 |
# unmapping k23 should give us a root, with k1 and k22 as direct
|
|
1727 |
# children.
|
|
1728 |
result = node.unmap(chkmap._store, ('k23',)) |
|
1729 |
# check the pointed-at object within node - k2 should now point at the
|
|
1730 |
# k22 leaf (which has been paged in to see if we can collapse the tree)
|
|
1731 |
child = node._items['k2'] |
|
1732 |
self.assertIsInstance(child, LeafNode) |
|
1733 |
self.assertEqual(1, len(child)) |
|
1734 |
self.assertEqual({('k22',): 'bar'}, |
|
1735 |
self.to_dict(child, None)) |
|
1736 |
# Check overall structure is instact:
|
|
1737 |
self.assertEqual(2, len(chkmap)) |
|
1738 |
self.assertEqual({('k1',): 'foo', ('k22',): 'bar'}, |
|
1739 |
self.to_dict(chkmap)) |
|
1740 |
# serialising should only serialise the new data - the root node.
|
|
1741 |
keys = list(node.serialise(chkmap._store)) |
|
1742 |
self.assertEqual([keys[-1]], keys) |
|
1743 |
chkmap = CHKMap(chkmap._store, keys[-1]) |
|
1744 |
self.assertEqualDiff("'' InternalNode\n" |
|
1745 |
" 'k1' LeafNode\n" |
|
1746 |
" ('k1',) 'foo'\n" |
|
1747 |
" 'k2' LeafNode\n" |
|
1748 |
" ('k22',) 'bar'\n" |
|
1749 |
, chkmap._dump_tree()) |
|
1750 |
||
1751 |
def test_unmap_k1_from_k1_k22_k23_gives_k22_k23_tree_new(self): |
|
1752 |
chkmap = self._get_map( |
|
1753 |
{('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10) |
|
1754 |
self.assertEqualDiff("'' InternalNode\n" |
|
1755 |
" 'k1' LeafNode\n" |
|
1756 |
" ('k1',) 'foo'\n" |
|
1757 |
" 'k2' InternalNode\n" |
|
1758 |
" 'k22' LeafNode\n" |
|
1759 |
" ('k22',) 'bar'\n" |
|
1760 |
" 'k23' LeafNode\n" |
|
1761 |
" ('k23',) 'quux'\n" |
|
1762 |
, chkmap._dump_tree()) |
|
1763 |
orig_root = chkmap._root_node |
|
1764 |
chkmap = CHKMap(chkmap._store, orig_root) |
|
1765 |
chkmap._ensure_root() |
|
1766 |
node = chkmap._root_node |
|
1767 |
k2_ptr = node._items['k2'] |
|
1768 |
# unmapping k1 should give us a root, with k22 and k23 as direct
|
|
1769 |
# children, and should not have needed to page in the subtree.
|
|
1770 |
result = node.unmap(chkmap._store, ('k1',)) |
|
1771 |
self.assertEqual(k2_ptr, result) |
|
1772 |
chkmap = CHKMap(chkmap._store, orig_root) |
|
1773 |
# Unmapping at the CHKMap level should switch to the new root
|
|
1774 |
chkmap.unmap(('k1',)) |
|
1775 |
self.assertEqual(k2_ptr, chkmap._root_node) |
|
1776 |
self.assertEqualDiff("'' InternalNode\n" |
|
1777 |
" 'k22' LeafNode\n" |
|
1778 |
" ('k22',) 'bar'\n" |
|
1779 |
" 'k23' LeafNode\n" |
|
1780 |
" ('k23',) 'quux'\n" |
|
1781 |
, chkmap._dump_tree()) |
|
1782 |
||
1783 |
||
1784 |
# leaf:
|
|
1785 |
# map -> fits - done
|
|
1786 |
# map -> doesn't fit - shrink from left till fits
|
|
1787 |
# key data to return: the common prefix, new nodes.
|
|
1788 |
||
1789 |
# unmap -> how to tell if siblings can be combined.
|
|
1790 |
# combing leaf nodes means expanding the prefix to the left; so gather the size of
|
|
1791 |
# all the leaf nodes addressed by expanding the prefix by 1; if any adjacent node
|
|
1792 |
# is an internal node, we know that that is a dense subtree - can't combine.
|
|
1793 |
# otherwise as soon as the sum of serialised values exceeds the split threshold
|
|
1794 |
# we know we can't combine - stop.
|
|
1795 |
# unmap -> key return data - space in node, common prefix length? and key count
|
|
1796 |
# internal:
|
|
1797 |
# variable length prefixes? -> later start with fixed width to get something going
|
|
1798 |
# map -> fits - update pointer to leaf
|
|
1799 |
# return [prefix and node] - seems sound.
|
|
1800 |
# map -> doesn't fit - find unique prefix and shift right
|
|
1801 |
# create internal nodes for all the partitions, return list of unique
|
|
1802 |
# prefixes and nodes.
|
|
1803 |
# map -> new prefix - create a leaf
|
|
1804 |
# unmap -> if child key count 0, remove
|
|
1805 |
# unmap -> return space in node, common prefix length? (why?), key count
|
|
1806 |
# map:
|
|
1807 |
# map, if 1 node returned, use it, otherwise make an internal and populate.
|
|
1808 |
# map - unmap - if empty, use empty leafnode (avoids special cases in driver
|
|
1809 |
# code)
|
|
1810 |
# map inits as empty leafnode.
|
|
1811 |
# tools:
|
|
1812 |
# visualiser
|
|
1813 |
||
1814 |
||
1815 |
# how to handle:
|
|
1816 |
# AA, AB, AC, AD, BA
|
|
1817 |
# packed internal node - ideal:
|
|
1818 |
# AA, AB, AC, AD, BA
|
|
1819 |
# single byte fanout - A,B, AA,AB,AC,AD, BA
|
|
1820 |
# build order's:
|
|
1821 |
# BA
|
|
1822 |
# AB - split, but we want to end up with AB, BA, in one node, with
|
|
1823 |
# 1-4K get0
|
|
1824 |
||
1825 |
||
1826 |
class TestIterInterestingNodes(TestCaseWithStore): |
|
1827 |
||
1828 |
def get_chk_bytes(self): |
|
1829 |
if getattr(self, '_chk_bytes', None) is None: |
|
1830 |
self._chk_bytes = super(TestIterInterestingNodes, |
|
1831 |
self).get_chk_bytes() |
|
1832 |
return self._chk_bytes |
|
1833 |
||
1834 |
def get_map_key(self, a_dict): |
|
1835 |
c_map = self._get_map(a_dict, maximum_size=10, |
|
1836 |
chk_bytes=self.get_chk_bytes()) |
|
1837 |
return c_map.key() |
|
1838 |
||
1839 |
def assertIterInteresting(self, expected, interesting_keys, |
|
1840 |
uninteresting_keys): |
|
1841 |
"""Check the result of iter_interesting_nodes.
|
|
1842 |
||
1843 |
:param expected: A list of (record_keys, interesting_chk_pages,
|
|
1844 |
interesting key value pairs)
|
|
1845 |
"""
|
|
1846 |
store = self.get_chk_bytes() |
|
1847 |
iter_nodes = chk_map.iter_interesting_nodes(store, interesting_keys, |
|
1848 |
uninteresting_keys) |
|
1849 |
nodes = list(iter_nodes) |
|
1850 |
for count, (exp, act) in enumerate(izip(expected, nodes)): |
|
1851 |
exp_record, exp_items = exp |
|
1852 |
record, items = act |
|
1853 |
exp_tuple = (exp_record, sorted(exp_items)) |
|
1854 |
if record is None: |
|
1855 |
act_tuple = (None, sorted(items)) |
|
1856 |
else: |
|
1857 |
act_tuple = (record.key, sorted(items)) |
|
1858 |
self.assertEqual(exp_tuple, act_tuple, |
|
1859 |
'entry %d did not match expected' % count) |
|
1860 |
self.assertEqual(len(expected), len(nodes)) |
|
1861 |
||
1862 |
def test_empty_to_one_keys(self): |
|
1863 |
target = self.get_map_key({('a',): 'content'}) |
|
1864 |
self.assertIterInteresting( |
|
1865 |
[(target, [(('a',), 'content')]), |
|
1866 |
], [target], []) |
|
1867 |
||
1868 |
def test_none_to_one_key(self): |
|
1869 |
basis = self.get_map_key({}) |
|
1870 |
target = self.get_map_key({('a',): 'content'}) |
|
1871 |
self.assertIterInteresting( |
|
1872 |
[(None, [(('a',), 'content')]), |
|
1873 |
(target, []), |
|
1874 |
], [target], [basis]) |
|
1875 |
||
1876 |
def test_one_to_none_key(self): |
|
1877 |
basis = self.get_map_key({('a',): 'content'}) |
|
1878 |
target = self.get_map_key({}) |
|
1879 |
self.assertIterInteresting( |
|
1880 |
[(target, [])], |
|
1881 |
[target], [basis]) |
|
1882 |
||
1883 |
def test_common_pages(self): |
|
1884 |
basis = self.get_map_key({('a',): 'content', |
|
1885 |
('b',): 'content', |
|
1886 |
('c',): 'content', |
|
1887 |
})
|
|
1888 |
target = self.get_map_key({('a',): 'content', |
|
1889 |
('b',): 'other content', |
|
1890 |
('c',): 'content', |
|
1891 |
})
|
|
1892 |
target_map = CHKMap(self.get_chk_bytes(), target) |
|
1893 |
self.assertEqualDiff( |
|
1894 |
"'' InternalNode\n" |
|
1895 |
" 'a' LeafNode\n" |
|
1896 |
" ('a',) 'content'\n" |
|
1897 |
" 'b' LeafNode\n" |
|
1898 |
" ('b',) 'other content'\n" |
|
1899 |
" 'c' LeafNode\n" |
|
1900 |
" ('c',) 'content'\n", |
|
1901 |
target_map._dump_tree()) |
|
1902 |
b_key = target_map._root_node._items['b'].key() |
|
1903 |
# This should return the root node, and the node for the 'b' key
|
|
1904 |
self.assertIterInteresting( |
|
1905 |
[(target, []), |
|
1906 |
(b_key, [(('b',), 'other content')])], |
|
1907 |
[target], [basis]) |
|
1908 |
||
1909 |
def test_common_sub_page(self): |
|
1910 |
basis = self.get_map_key({('aaa',): 'common', |
|
1911 |
('c',): 'common', |
|
1912 |
})
|
|
1913 |
target = self.get_map_key({('aaa',): 'common', |
|
1914 |
('aab',): 'new', |
|
1915 |
('c',): 'common', |
|
1916 |
})
|
|
1917 |
target_map = CHKMap(self.get_chk_bytes(), target) |
|
1918 |
self.assertEqualDiff( |
|
1919 |
"'' InternalNode\n" |
|
1920 |
" 'a' InternalNode\n" |
|
1921 |
" 'aaa' LeafNode\n" |
|
1922 |
" ('aaa',) 'common'\n" |
|
1923 |
" 'aab' LeafNode\n" |
|
1924 |
" ('aab',) 'new'\n" |
|
1925 |
" 'c' LeafNode\n" |
|
1926 |
" ('c',) 'common'\n", |
|
1927 |
target_map._dump_tree()) |
|
1928 |
# The key for the internal aa node
|
|
1929 |
a_key = target_map._root_node._items['a'].key() |
|
1930 |
# The key for the leaf aab node
|
|
1931 |
aab_key = target_map._root_node._items['a']._items['aab'].key() |
|
1932 |
self.assertIterInteresting( |
|
1933 |
[(target, []), |
|
1934 |
(a_key, []), |
|
1935 |
(aab_key, [(('aab',), 'new')])], |
|
1936 |
[target], [basis]) |
|
1937 |
||
1938 |
def test_common_leaf(self): |
|
1939 |
basis = self.get_map_key({}) |
|
1940 |
target1 = self.get_map_key({('aaa',): 'common'}) |
|
1941 |
target2 = self.get_map_key({('aaa',): 'common', |
|
1942 |
('bbb',): 'new', |
|
1943 |
})
|
|
1944 |
target3 = self.get_map_key({('aaa',): 'common', |
|
1945 |
('aac',): 'other', |
|
1946 |
('bbb',): 'new', |
|
1947 |
})
|
|
1948 |
# The LeafNode containing 'aaa': 'common' occurs at 3 different levels.
|
|
1949 |
# Once as a root node, once as a second layer, and once as a third
|
|
1950 |
# layer. It should only be returned one time regardless
|
|
1951 |
target1_map = CHKMap(self.get_chk_bytes(), target1) |
|
1952 |
self.assertEqualDiff( |
|
1953 |
"'' LeafNode\n" |
|
1954 |
" ('aaa',) 'common'\n", |
|
1955 |
target1_map._dump_tree()) |
|
1956 |
target2_map = CHKMap(self.get_chk_bytes(), target2) |
|
1957 |
self.assertEqualDiff( |
|
1958 |
"'' InternalNode\n" |
|
1959 |
" 'a' LeafNode\n" |
|
1960 |
" ('aaa',) 'common'\n" |
|
1961 |
" 'b' LeafNode\n" |
|
1962 |
" ('bbb',) 'new'\n", |
|
1963 |
target2_map._dump_tree()) |
|
1964 |
target3_map = CHKMap(self.get_chk_bytes(), target3) |
|
1965 |
self.assertEqualDiff( |
|
1966 |
"'' InternalNode\n" |
|
1967 |
" 'a' InternalNode\n" |
|
1968 |
" 'aaa' LeafNode\n" |
|
1969 |
" ('aaa',) 'common'\n" |
|
1970 |
" 'aac' LeafNode\n" |
|
1971 |
" ('aac',) 'other'\n" |
|
1972 |
" 'b' LeafNode\n" |
|
1973 |
" ('bbb',) 'new'\n", |
|
1974 |
target3_map._dump_tree()) |
|
1975 |
aaa_key = target1_map._root_node.key() |
|
1976 |
b_key = target2_map._root_node._items['b'].key() |
|
1977 |
a_key = target3_map._root_node._items['a'].key() |
|
1978 |
aac_key = target3_map._root_node._items['a']._items['aac'].key() |
|
1979 |
self.assertIterInteresting( |
|
1980 |
[(None, [(('aaa',), 'common')]), |
|
1981 |
(target1, []), |
|
1982 |
(target2, []), |
|
1983 |
(target3, []), |
|
1984 |
(b_key, [(('bbb',), 'new')]), |
|
1985 |
(a_key, []), |
|
1986 |
(aac_key, [(('aac',), 'other')]), |
|
1987 |
], [target1, target2, target3], [basis]) |
|
1988 |
||
1989 |
self.assertIterInteresting( |
|
1990 |
[(target2, []), |
|
1991 |
(target3, []), |
|
1992 |
(b_key, [(('bbb',), 'new')]), |
|
1993 |
(a_key, []), |
|
1994 |
(aac_key, [(('aac',), 'other')]), |
|
1995 |
], [target2, target3], [target1]) |
|
1996 |
||
1997 |
# This may be a case that we relax. A root node is a deep child of the
|
|
1998 |
# excluded set. The cost is buffering root nodes until we have
|
|
1999 |
# determined all possible exclusions. (Because a prefix of '', cannot
|
|
2000 |
# be excluded.)
|
|
2001 |
self.assertIterInteresting( |
|
2002 |
[], [target1], [target3]) |
|
2003 |
||
2004 |
def test_multiple_maps(self): |
|
2005 |
basis1 = self.get_map_key({('aaa',): 'common', |
|
2006 |
('aab',): 'basis1', |
|
2007 |
})
|
|
2008 |
basis2 = self.get_map_key({('bbb',): 'common', |
|
2009 |
('bbc',): 'basis2', |
|
2010 |
})
|
|
2011 |
target1 = self.get_map_key({('aaa',): 'common', |
|
2012 |
('aac',): 'target1', |
|
2013 |
('bbb',): 'common', |
|
2014 |
})
|
|
2015 |
target2 = self.get_map_key({('aaa',): 'common', |
|
2016 |
('bba',): 'target2', |
|
2017 |
('bbb',): 'common', |
|
2018 |
})
|
|
2019 |
target1_map = CHKMap(self.get_chk_bytes(), target1) |
|
2020 |
self.assertEqualDiff( |
|
2021 |
"'' InternalNode\n" |
|
2022 |
" 'a' InternalNode\n" |
|
2023 |
" 'aaa' LeafNode\n" |
|
2024 |
" ('aaa',) 'common'\n" |
|
2025 |
" 'aac' LeafNode\n" |
|
2026 |
" ('aac',) 'target1'\n" |
|
2027 |
" 'b' LeafNode\n" |
|
2028 |
" ('bbb',) 'common'\n", |
|
2029 |
target1_map._dump_tree()) |
|
2030 |
# The key for the target1 internal a node
|
|
2031 |
a_key = target1_map._root_node._items['a'].key() |
|
2032 |
# The key for the leaf aac node
|
|
2033 |
aac_key = target1_map._root_node._items['a']._items['aac'].key() |
|
2034 |
||
2035 |
target2_map = CHKMap(self.get_chk_bytes(), target2) |
|
2036 |
self.assertEqualDiff( |
|
2037 |
"'' InternalNode\n" |
|
2038 |
" 'a' LeafNode\n" |
|
2039 |
" ('aaa',) 'common'\n" |
|
2040 |
" 'b' InternalNode\n" |
|
2041 |
" 'bba' LeafNode\n" |
|
2042 |
" ('bba',) 'target2'\n" |
|
2043 |
" 'bbb' LeafNode\n" |
|
2044 |
" ('bbb',) 'common'\n", |
|
2045 |
target2_map._dump_tree()) |
|
2046 |
# The key for the target2 internal bb node
|
|
2047 |
b_key = target2_map._root_node._items['b'].key() |
|
2048 |
# The key for the leaf bba node
|
|
2049 |
bba_key = target2_map._root_node._items['b']._items['bba'].key() |
|
2050 |
self.assertIterInteresting( |
|
2051 |
[(target1, []), |
|
2052 |
(target2, []), |
|
2053 |
(a_key, []), |
|
2054 |
(b_key, []), |
|
2055 |
(aac_key, [(('aac',), 'target1')]), |
|
2056 |
(bba_key, [(('bba',), 'target2')]), |
|
2057 |
], [target1, target2], [basis1, basis2]) |