83
85
class CHKMap(object):
84
86
"""A persistent map from string to string backed by a CHK store."""
88
__slots__ = ('_store', '_root_node', '_search_key_func')
86
90
def __init__(self, store, root_key, search_key_func=None):
87
91
"""Create a CHKMap object.
114
118
# Check preconditions first.
115
new_items = set([key for (old, key, value) in delta if key is not None
119
as_st = StaticTuple.from_sequence
120
new_items = set([as_st(key) for (old, key, value) in delta
121
if key is not None and old is None])
117
122
existing_new = list(self.iteritems(key_filter=new_items))
119
124
raise errors.InconsistentDeltaDelta(delta,
147
152
:param node: A tuple key or node object.
148
153
:return: A node object.
150
if type(node) is tuple:
155
if type(node) is StaticTuple:
151
156
bytes = self._read_bytes(node)
152
157
return _deserialise(bytes, node,
153
158
search_key_func=self._search_key_func)
194
199
for key, value in sorted(node._items.iteritems()):
195
200
# Don't use prefix nor indent here to line up when used in
196
201
# tests in conjunction with assertEqualDiff
197
result.append(' %r %r' % (key, value))
202
result.append(' %r %r' % (tuple(key), value))
218
223
root_key = klass._create_directly(store, initial_value,
219
224
maximum_size=maximum_size, key_width=key_width,
220
225
search_key_func=search_key_func)
226
if type(root_key) is not StaticTuple:
227
raise AssertionError('we got a %s instead of a StaticTuple'
238
246
node = LeafNode(search_key_func=search_key_func)
239
247
node.set_maximum_size(maximum_size)
240
248
node._key_width = key_width
241
node._items = dict(initial_value)
249
as_st = StaticTuple.from_sequence
250
node._items = dict([(as_st(key), val) for key, val
251
in initial_value.iteritems()])
242
252
node._raw_size = sum([node._key_value_len(key, value)
243
for key,value in initial_value.iteritems()])
253
for key,value in node._items.iteritems()])
244
254
node._len = len(node._items)
245
255
node._compute_search_prefix()
246
256
node._compute_serialised_prefix()
482
492
def iteritems(self, key_filter=None):
483
493
"""Iterate over the entire CHKMap's contents."""
484
494
self._ensure_root()
495
if key_filter is not None:
496
as_st = StaticTuple.from_sequence
497
key_filter = [as_st(key) for key in key_filter]
485
498
return self._root_node.iteritems(self._store, key_filter=key_filter)
488
501
"""Return the key for this map."""
489
if type(self._root_node) is tuple:
502
if type(self._root_node) is StaticTuple:
490
503
return self._root_node
492
505
return self._root_node._key
501
514
:param key: A key to map.
502
515
:param value: The value to assign to key.
517
key = StaticTuple.from_sequence(key)
504
518
# Need a root object.
505
519
self._ensure_root()
506
520
prefix, node_details = self._root_node.map(self._store, key, value)
517
531
def _node_key(self, node):
518
532
"""Get the key for a node whether it's a tuple or node."""
519
533
if type(node) is tuple:
534
node = StaticTuple.from_sequence(node)
535
if type(node) is StaticTuple:
524
540
def unmap(self, key, check_remap=True):
525
541
"""remove key from the map."""
542
key = StaticTuple.from_sequence(key)
526
543
self._ensure_root()
527
544
if type(self._root_node) is InternalNode:
528
545
unmapped = self._root_node.unmap(self._store, key,
556
573
adding the header bytes, and without prefix compression.
576
__slots__ = ('_key', '_len', '_maximum_size', '_key_width',
577
'_raw_size', '_items', '_search_prefix', '_search_key_func'
559
580
def __init__(self, key_width=1):
560
581
"""Create a node.
698
721
:param bytes: The bytes of the node.
699
722
:param key: The key that the serialised node has.
724
key = static_tuple.expect_static_tuple(key)
701
725
return _deserialise_leaf_node(bytes, key,
702
726
search_key_func=search_key_func)
873
897
lines.append(serialized[prefix_len:])
874
898
lines.extend(value_lines)
875
899
sha1, _, _ = store.add_lines((None,), (), lines)
876
self._key = ("sha1:" + sha1,)
900
self._key = StaticTuple("sha1:" + sha1,).intern()
877
901
bytes = ''.join(lines)
878
902
if len(bytes) != self._current_size():
879
903
raise AssertionError('Invalid _current_size')
947
971
LeafNode or InternalNode.
974
__slots__ = ('_node_width',)
950
976
def __init__(self, prefix='', search_key_func=None):
951
977
Node.__init__(self)
952
978
# The size of an internalnode with default values and no children.
994
1020
:param key: The key that the serialised node has.
995
1021
:return: An InternalNode instance.
1023
key = static_tuple.expect_static_tuple(key)
997
1024
return _deserialise_internal_node(bytes, key,
998
1025
search_key_func=search_key_func)
1024
1051
# for whatever we are missing
1025
1052
shortcut = True
1026
1053
for prefix, node in self._items.iteritems():
1027
if node.__class__ is tuple:
1054
if node.__class__ is StaticTuple:
1028
1055
keys[node] = (prefix, None)
1030
1057
yield node, None
1059
1086
# A given key can only match 1 child node, if it isn't
1060
1087
# there, then we can just return nothing
1062
if node.__class__ is tuple:
1089
if node.__class__ is StaticTuple:
1063
1090
keys[node] = (search_prefix, [key])
1065
1092
# This is loaded, and the only thing that can match,
1092
1119
# We can ignore this one
1094
1121
node_key_filter = prefix_to_keys[search_prefix]
1095
if node.__class__ is tuple:
1122
if node.__class__ is StaticTuple:
1096
1123
keys[node] = (search_prefix, node_key_filter)
1098
1125
yield node, node_key_filter
1107
1134
if sub_prefix in length_filter:
1108
1135
node_key_filter.extend(prefix_to_keys[sub_prefix])
1109
1136
if node_key_filter: # this key matched something, yield it
1110
if node.__class__ is tuple:
1137
if node.__class__ is StaticTuple:
1111
1138
keys[node] = (prefix, node_key_filter)
1113
1140
yield node, node_key_filter
1262
1289
lines.append('%s\n' % (self._search_prefix,))
1263
1290
prefix_len = len(self._search_prefix)
1264
1291
for prefix, node in sorted(self._items.items()):
1265
if type(node) is tuple:
1292
if type(node) is StaticTuple:
1268
1295
key = node._key[0]
1272
1299
% (serialised, self._search_prefix))
1273
1300
lines.append(serialised[prefix_len:])
1274
1301
sha1, _, _ = store.add_lines((None,), (), lines)
1275
self._key = ("sha1:" + sha1,)
1302
self._key = StaticTuple("sha1:" + sha1,).intern()
1276
1303
_page_cache.add(self._key, ''.join(lines))
1277
1304
yield self._key
1428
1455
def __init__(self, store, new_root_keys, old_root_keys,
1429
1456
search_key_func, pb=None):
1457
# TODO: Should we add a StaticTuple barrier here? It would be nice to
1458
# force callers to use StaticTuple, because there will often be
1459
# lots of keys passed in here. And even if we cast it locally,
1460
# that just meanst that we will have *both* a StaticTuple and a
1461
# tuple() in memory, referring to the same object. (so a net
1462
# increase in memory, not a decrease.)
1430
1463
self._store = store
1431
1464
self._new_root_keys = new_root_keys
1432
1465
self._old_root_keys = old_root_keys
1434
1467
# All uninteresting chks that we have seen. By the time they are added
1435
1468
# here, they should be either fully ignored, or queued up for
1470
# TODO: This might grow to a large size if there are lots of merge
1471
# parents, etc. However, it probably doesn't scale to O(history)
1472
# like _processed_new_refs does.
1437
1473
self._all_old_chks = set(self._old_root_keys)
1438
1474
# All items that we have seen from the old_root_keys
1439
1475
self._all_old_items = set()
1440
1476
# These are interesting items which were either read, or already in the
1441
1477
# interesting queue (so we don't need to walk them again)
1478
# TODO: processed_new_refs becomes O(all_chks), consider switching to
1442
1480
self._processed_new_refs = set()
1443
1481
self._search_key_func = search_key_func
1456
1494
# this code. (We may want to evaluate saving the raw bytes into the
1457
1495
# page cache, which would allow a working tree update after the fetch
1458
1496
# to not have to read the bytes again.)
1497
as_st = StaticTuple.from_sequence
1459
1498
stream = self._store.get_record_stream(keys, 'unordered', True)
1460
1499
for record in stream:
1461
1500
if self._pb is not None:
1468
1507
if type(node) is InternalNode:
1469
1508
# Note we don't have to do node.refs() because we know that
1470
1509
# there are no children that have been pushed into this node
1510
# Note: Using as_st() here seemed to save 1.2MB, which would
1511
# indicate that we keep 100k prefix_refs around while
1512
# processing. They *should* be shorter lived than that...
1513
# It does cost us ~10s of processing time
1514
#prefix_refs = [as_st(item) for item in node._items.iteritems()]
1471
1515
prefix_refs = node._items.items()
1474
1518
prefix_refs = []
1519
# Note: We don't use a StaticTuple here. Profiling showed a
1520
# minor memory improvement (0.8MB out of 335MB peak 0.2%)
1521
# But a significant slowdown (15s / 145s, or 10%)
1475
1522
items = node._items.items()
1476
1523
yield record, node, prefix_refs, items
1485
1532
if p_r[1] not in all_old_chks]
1486
1533
new_refs = [p_r[1] for p_r in prefix_refs]
1487
1534
all_old_chks.update(new_refs)
1535
# TODO: This might be a good time to turn items into StaticTuple
1536
# instances and possibly intern them. However, this does not
1537
# impact 'initial branch' performance, so I'm not worrying
1488
1539
self._all_old_items.update(items)
1489
1540
# Queue up the uninteresting references
1490
1541
# Don't actually put them in the 'to-read' queue until we have
1543
1594
# current design allows for this, as callers will do the work
1544
1595
# to make the results unique. We might profile whether we
1545
1596
# gain anything by ensuring unique return values for items
1597
# TODO: This might be a good time to cast to StaticTuple, as
1598
# self._new_item_queue will hold the contents of multiple
1599
# records for an extended lifetime
1546
1600
new_items = [item for item in items
1547
1601
if item not in self._all_old_items]
1548
1602
self._new_item_queue.extend(new_items)
1574
1628
yield None, new_items
1575
1629
refs = refs.difference(all_old_chks)
1630
processed_new_refs.update(refs)
1632
# TODO: Using a SimpleSet for self._processed_new_refs and
1633
# saved as much as 10MB of peak memory. However, it requires
1634
# implementing a non-pyrex version.
1577
1635
next_refs = set()
1578
1636
next_refs_update = next_refs.update
1579
1637
# Inlining _read_nodes_from_store improves 'bzr branch bzr.dev'
1580
1638
# from 1m54s to 1m51s. Consider it.
1581
1639
for record, _, p_refs, items in self._read_nodes_from_store(refs):
1582
items = [item for item in items
1583
if item not in all_old_items]
1641
# using the 'if' check saves about 145s => 141s, when
1642
# streaming initial branch of Launchpad data.
1643
items = [item for item in items
1644
if item not in all_old_items]
1584
1645
yield record, items
1585
1646
next_refs_update([p_r[1] for p_r in p_refs])
1648
# set1.difference(set/dict) walks all of set1, and checks if it
1649
# exists in 'other'.
1650
# set1.difference(iterable) walks all of iterable, and does a
1651
# 'difference_update' on a clone of set1. Pick wisely based on the
1652
# expected sizes of objects.
1653
# in our case it is expected that 'new_refs' will always be quite
1586
1655
next_refs = next_refs.difference(all_old_chks)
1587
1656
next_refs = next_refs.difference(processed_new_refs)
1588
1657
processed_new_refs.update(next_refs)
1595
1664
self._old_queue = []
1596
1665
all_old_chks = self._all_old_chks
1597
1666
for record, _, prefix_refs, items in self._read_nodes_from_store(refs):
1667
# TODO: Use StaticTuple here?
1598
1668
self._all_old_items.update(items)
1599
1669
refs = [r for _,r in prefix_refs if r not in all_old_chks]
1600
1670
self._old_queue.extend(refs)
1650
1721
search_key_registry.register('hash-16-way', _search_key_16)
1651
1722
search_key_registry.register('hash-255-way', _search_key_255)
1725
def _check_key(key):
1726
"""Helper function to assert that a key is properly formatted.
1728
This generally shouldn't be used in production code, but it can be helpful
1731
if type(key) is not StaticTuple:
1732
raise TypeError('key %r is not StaticTuple but %s' % (key, type(key)))
1734
raise ValueError('key %r should have length 1, not %d' % (key, len(key),))
1735
if type(key[0]) is not str:
1736
raise TypeError('key %r should hold a str, not %r'
1737
% (key, type(key[0])))
1738
if not key[0].startswith('sha1:'):
1739
raise ValueError('key %r should point to a sha1:' % (key,))