204
204
:return: The root chk of the resulting CHKMap.
206
root_key = klass._create_directly(store, initial_value,
207
maximum_size=maximum_size, key_width=key_width,
208
search_key_func=search_key_func)
212
def _create_via_map(klass, store, initial_value, maximum_size=0,
213
key_width=1, search_key_func=None):
214
result = klass(store, None, search_key_func=search_key_func)
206
result = CHKMap(store, None, search_key_func=search_key_func)
215
207
result._root_node.set_maximum_size(maximum_size)
216
208
result._root_node._key_width = key_width
218
210
for key, value in initial_value.items():
219
211
delta.append((None, key, value))
220
root_key = result.apply_delta(delta)
224
def _create_directly(klass, store, initial_value, maximum_size=0,
225
key_width=1, search_key_func=None):
226
node = LeafNode(search_key_func=search_key_func)
227
node.set_maximum_size(maximum_size)
228
node._key_width = key_width
229
node._items = dict(initial_value)
230
node._raw_size = sum([node._key_value_len(key, value)
231
for key,value in initial_value.iteritems()])
232
node._len = len(node._items)
233
node._compute_search_prefix()
234
node._compute_serialised_prefix()
237
and node._current_size() > maximum_size):
238
prefix, node_details = node._split(store)
239
if len(node_details) == 1:
240
raise AssertionError('Failed to split using node._split')
241
node = InternalNode(prefix, search_key_func=search_key_func)
242
node.set_maximum_size(maximum_size)
243
node._key_width = key_width
244
for split, subnode in node_details:
245
node.add_node(split, subnode)
246
keys = list(node.serialise(store))
212
return result.apply_delta(delta)
249
214
def iter_changes(self, basis):
250
215
"""Iterate over the changes between basis and self.
799
764
result[prefix] = node
801
766
node = result[prefix]
802
sub_prefix, node_details = node.map(store, key, value)
803
if len(node_details) > 1:
804
if prefix != sub_prefix:
805
# This node has been split and is now found via a different
808
new_node = InternalNode(sub_prefix,
809
search_key_func=self._search_key_func)
810
new_node.set_maximum_size(self._maximum_size)
811
new_node._key_width = self._key_width
812
for split, node in node_details:
813
new_node.add_node(split, node)
814
result[prefix] = new_node
767
node.map(store, key, value)
815
768
return common_prefix, result.items()
817
770
def map(self, store, key, value):
1002
955
# prefix is the key in self._items to use, key_filter is the key_filter
1003
956
# entries that would match this node
1006
958
if key_filter is None:
1007
# yielding all nodes, yield whatever we have, and queue up a read
1008
# for whatever we are missing
1010
959
for prefix, node in self._items.iteritems():
1011
if node.__class__ is tuple:
960
if type(node) == tuple:
1012
961
keys[node] = (prefix, None)
1014
963
yield node, None
1015
elif len(key_filter) == 1:
1016
# Technically, this path could also be handled by the first check
1017
# in 'self._node_width' in length_filters. However, we can handle
1018
# this case without spending any time building up the
1019
# prefix_to_keys, etc state.
1021
# This is a bit ugly, but TIMEIT showed it to be by far the fastest
1022
# 0.626us list(key_filter)[0]
1023
# is a func() for list(), 2 mallocs, and a getitem
1024
# 0.489us [k for k in key_filter][0]
1025
# still has the mallocs, avoids the func() call
1026
# 0.350us iter(key_filter).next()
1027
# has a func() call, and mallocs an iterator
1028
# 0.125us for key in key_filter: pass
1029
# no func() overhead, might malloc an iterator
1030
# 0.105us for key in key_filter: break
1031
# no func() overhead, might malloc an iterator, probably
1032
# avoids checking an 'else' clause as part of the for
1033
for key in key_filter:
1035
search_prefix = self._search_prefix_filter(key)
1036
if len(search_prefix) == self._node_width:
1037
# This item will match exactly, so just do a dict lookup, and
1038
# see what we can return
1041
node = self._items[search_prefix]
1043
# A given key can only match 1 child node, if it isn't
1044
# there, then we can just return nothing
1046
if node.__class__ is tuple:
1047
keys[node] = (search_prefix, [key])
1049
# This is loaded, and the only thing that can match,
1054
# First, convert all keys into a list of search prefixes
1055
# Aggregate common prefixes, and track the keys they come from
1056
966
prefix_to_keys = {}
1057
967
length_filters = {}
1058
968
for key in key_filter:
1059
search_prefix = self._search_prefix_filter(key)
969
search_key = self._search_prefix_filter(key)
1060
970
length_filter = length_filters.setdefault(
1061
len(search_prefix), set())
1062
length_filter.add(search_prefix)
1063
prefix_to_keys.setdefault(search_prefix, []).append(key)
1065
if (self._node_width in length_filters
1066
and len(length_filters) == 1):
1067
# all of the search prefixes match exactly _node_width. This
1068
# means that everything is an exact match, and we can do a
1069
# lookup into self._items, rather than iterating over the items
1071
search_prefixes = length_filters[self._node_width]
1072
for search_prefix in search_prefixes:
1074
node = self._items[search_prefix]
1076
# We can ignore this one
1078
node_key_filter = prefix_to_keys[search_prefix]
1079
if node.__class__ is tuple:
1080
keys[node] = (search_prefix, node_key_filter)
971
len(search_key), set())
972
length_filter.add(search_key)
973
prefix_to_keys.setdefault(search_key, []).append(key)
974
length_filters = length_filters.items()
975
for prefix, node in self._items.iteritems():
977
for length, length_filter in length_filters:
978
sub_prefix = prefix[:length]
979
if sub_prefix in length_filter:
980
node_key_filter.extend(prefix_to_keys[sub_prefix])
981
if node_key_filter: # this key matched something, yield it
982
if type(node) == tuple:
983
keys[node] = (prefix, node_key_filter)
1082
985
yield node, node_key_filter
1084
# The slow way. We walk every item in self._items, and check to
1085
# see if there are any matches
1086
length_filters = length_filters.items()
1087
for prefix, node in self._items.iteritems():
1088
node_key_filter = []
1089
for length, length_filter in length_filters:
1090
sub_prefix = prefix[:length]
1091
if sub_prefix in length_filter:
1092
node_key_filter.extend(prefix_to_keys[sub_prefix])
1093
if node_key_filter: # this key matched something, yield it
1094
if node.__class__ is tuple:
1095
keys[node] = (prefix, node_key_filter)
1097
yield node, node_key_filter
1099
987
# Look in the page cache for some more bytes
1100
988
found_keys = set()