52
52
_newline_null_re = re.compile('[\n\0]')
55
def _has_key_from_parent_map(self, key):
56
"""Check if this index has one key.
58
If it's possible to check for multiple keys at once through
59
calling get_parent_map that should be faster.
61
return (key in self.get_parent_map([key]))
64
def _missing_keys_from_parent_map(self, keys):
65
return set(keys) - set(self.get_parent_map(keys))
55
68
class GraphIndexBuilder(object):
56
69
"""A builder that can build a GraphIndex.
58
71
The resulting graph has the structure:
60
73
_SIGNATURE OPTIONS NODES NEWLINE
61
74
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
62
75
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
95
110
if not element or _whitespace_re.search(element) is not None:
96
111
raise errors.BadIndexKey(element)
113
def _external_references(self):
114
"""Return references that are not present in this index.
118
# TODO: JAM 2008-11-21 This makes an assumption about how the reference
119
# lists are used. It is currently correct for pack-0.92 through
120
# 1.9, which use the node references (3rd column) second
121
# reference list as the compression parent. Perhaps this should
122
# be moved into something higher up the stack, since it
123
# makes assumptions about how the index is used.
124
if self.reference_lists > 1:
125
for node in self.iter_all_entries():
127
refs.update(node[3][1])
130
# If reference_lists == 0 there can be no external references, and
131
# if reference_lists == 1, then there isn't a place to store the
98
135
def _get_nodes_by_key(self):
99
136
if self._nodes_by_key is None:
100
137
nodes_by_key = {}
278
315
(len(result.getvalue()), expected_bytes))
318
def set_optimize(self, for_size=None, combine_backing_indices=None):
319
"""Change how the builder tries to optimize the result.
321
:param for_size: Tell the builder to try and make the index as small as
323
:param combine_backing_indices: If the builder spills to disk to save
324
memory, should the on-disk indices be combined. Set to True if you
325
are going to be probing the index, but to False if you are not. (If
326
you are not querying, then the time spent combining is wasted.)
329
# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
331
if for_size is not None:
332
self._optimize_for_size = for_size
333
if combine_backing_indices is not None:
334
self._combine_backing_indices = combine_backing_indices
336
def find_ancestry(self, keys, ref_list_num):
337
"""See CombinedGraphIndex.find_ancestry()"""
343
for _, key, value, ref_lists in self.iter_entries(pending):
344
parent_keys = ref_lists[ref_list_num]
345
parent_map[key] = parent_keys
346
next_pending.update([p for p in parent_keys if p not in
348
missing_keys.update(pending.difference(parent_map))
349
pending = next_pending
350
return parent_map, missing_keys
282
353
class GraphIndex(object):
283
354
"""An index for data with embedded graphs.
285
356
The index maps keys to a list of key reference lists, and a value.
286
357
Each node has the same number of key reference lists. Each key reference
287
358
list can be empty or an arbitrary length. The value is an opaque NULL
288
terminated string without any newlines. The storage of the index is
359
terminated string without any newlines. The storage of the index is
289
360
hidden in the interface: keys and key references are always tuples of
290
361
bytestrings, never the internal representation (e.g. dictionary offsets).
382
453
node_value = value
383
454
self._nodes[key] = node_value
384
if self._key_length > 1:
385
# TODO: We may want to do this lazily, but if we are calling
386
# _buffer_all, we are likely to be doing
387
# iter_entries_prefix
388
key_dict = self._nodes_by_key
389
if self.node_ref_lists:
390
key_value = key, node_value[0], node_value[1]
392
key_value = key, node_value
393
# For a key of (foo, bar, baz) create
394
# _nodes_by_key[foo][bar][baz] = key_value
395
for subkey in key[:-1]:
396
key_dict = key_dict.setdefault(subkey, {})
397
key_dict[key[-1]] = key_value
398
455
# cache the keys for quick set intersections
399
456
self._keys = set(self._nodes)
400
457
if trailers != 1:
401
458
# there must be one line - the empty trailer line.
402
459
raise errors.BadIndexData(self)
461
def external_references(self, ref_list_num):
462
"""Return references that are not present in this index.
465
if ref_list_num + 1 > self.node_ref_lists:
466
raise ValueError('No ref list %d, index has %d ref lists'
467
% (ref_list_num, self.node_ref_lists))
469
for key, (value, ref_lists) in self._nodes.iteritems():
470
ref_list = ref_lists[ref_list_num]
471
refs.update(ref_list)
472
return refs - self._keys
474
def _get_nodes_by_key(self):
475
if self._nodes_by_key is None:
477
if self.node_ref_lists:
478
for key, (value, references) in self._nodes.iteritems():
479
key_dict = nodes_by_key
480
for subkey in key[:-1]:
481
key_dict = key_dict.setdefault(subkey, {})
482
key_dict[key[-1]] = key, value, references
484
for key, value in self._nodes.iteritems():
485
key_dict = nodes_by_key
486
for subkey in key[:-1]:
487
key_dict = key_dict.setdefault(subkey, {})
488
key_dict[key[-1]] = key, value
489
self._nodes_by_key = nodes_by_key
490
return self._nodes_by_key
404
492
def iter_all_entries(self):
405
493
"""Iterate over all keys within the index.
629
718
# the last thing looked up was a terminal element
630
719
yield (self, ) + key_dict
721
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
722
"""See BTreeIndex._find_ancestors."""
723
# The api can be implemented as a trivial overlay on top of
724
# iter_entries, it is not an efficient implementation, but it at least
728
for index, key, value, refs in self.iter_entries(keys):
729
parent_keys = refs[ref_list_num]
731
parent_map[key] = parent_keys
732
search_keys.update(parent_keys)
733
# Figure out what, if anything, was missing
734
missing_keys.update(set(keys).difference(found_keys))
735
search_keys = search_keys.difference(parent_map)
632
738
def key_count(self):
633
739
"""Return an estimate of the number of keys in this index.
635
741
For GraphIndex the estimate is exact.
637
743
if self._key_count is None:
1101
1207
in the index list.
1104
def __init__(self, indices):
1210
def __init__(self, indices, reload_func=None):
1105
1211
"""Create a CombinedGraphIndex backed by indices.
1107
1213
:param indices: An ordered list of indices to query for data.
1214
:param reload_func: A function to call if we find we are missing an
1215
index. Should have the form reload_func() => True/False to indicate
1216
if reloading actually changed anything.
1109
1218
self._indices = indices
1219
self._reload_func = reload_func
1111
1221
def __repr__(self):
1112
1222
return "%s(%s)" % (
1113
1223
self.__class__.__name__,
1114
1224
', '.join(map(repr, self._indices)))
1116
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1117
def get_parents(self, revision_ids):
1118
"""See graph._StackedParentsProvider.get_parents.
1120
This implementation thunks the graph.Graph.get_parents api across to
1123
:param revision_ids: An iterable of graph keys for this graph.
1124
:return: A list of parent details for each key in revision_ids.
1125
Each parent details will be one of:
1126
* None when the key was missing
1127
* (NULL_REVISION,) when the key has no parents.
1128
* (parent_key, parent_key...) otherwise.
1130
parent_map = self.get_parent_map(revision_ids)
1131
return [parent_map.get(r, None) for r in revision_ids]
1133
1226
def get_parent_map(self, keys):
1134
"""See graph._StackedParentsProvider.get_parent_map"""
1227
"""See graph.StackedParentsProvider.get_parent_map"""
1135
1228
search_keys = set(keys)
1136
1229
if NULL_REVISION in search_keys:
1137
1230
search_keys.discard(NULL_REVISION)
1215
1320
seen_keys = set()
1216
for index in self._indices:
1217
for node in index.iter_entries_prefix(keys):
1218
if node[1] in seen_keys:
1220
seen_keys.add(node[1])
1323
for index in self._indices:
1324
for node in index.iter_entries_prefix(keys):
1325
if node[1] in seen_keys:
1327
seen_keys.add(node[1])
1330
except errors.NoSuchFile:
1331
self._reload_or_raise()
1333
def find_ancestry(self, keys, ref_list_num):
1334
"""Find the complete ancestry for the given set of keys.
1336
Note that this is a whole-ancestry request, so it should be used
1339
:param keys: An iterable of keys to look for
1340
:param ref_list_num: The reference list which references the parents
1342
:return: (parent_map, missing_keys)
1344
missing_keys = set()
1346
keys_to_lookup = set(keys)
1348
while keys_to_lookup:
1349
# keys that *all* indexes claim are missing, stop searching them
1351
all_index_missing = None
1352
# print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
1353
# print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
1355
# len(missing_keys))
1356
for index_idx, index in enumerate(self._indices):
1357
# TODO: we should probably be doing something with
1358
# 'missing_keys' since we've already determined that
1359
# those revisions have not been found anywhere
1360
index_missing_keys = set()
1361
# Find all of the ancestry we can from this index
1362
# keep looking until the search_keys set is empty, which means
1363
# things we didn't find should be in index_missing_keys
1364
search_keys = keys_to_lookup
1366
# print ' \t%2d\t\t%4d\t%5d\t%5d' % (
1367
# index_idx, len(search_keys),
1368
# len(parent_map), len(index_missing_keys))
1371
# TODO: ref_list_num should really be a parameter, since
1372
# CombinedGraphIndex does not know what the ref lists
1374
search_keys = index._find_ancestors(search_keys,
1375
ref_list_num, parent_map, index_missing_keys)
1376
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1377
# sub_generation, len(search_keys),
1378
# len(parent_map), len(index_missing_keys))
1379
# Now set whatever was missing to be searched in the next index
1380
keys_to_lookup = index_missing_keys
1381
if all_index_missing is None:
1382
all_index_missing = set(index_missing_keys)
1384
all_index_missing.intersection_update(index_missing_keys)
1385
if not keys_to_lookup:
1387
if all_index_missing is None:
1388
# There were no indexes, so all search keys are 'missing'
1389
missing_keys.update(keys_to_lookup)
1390
keys_to_lookup = None
1392
missing_keys.update(all_index_missing)
1393
keys_to_lookup.difference_update(all_index_missing)
1394
return parent_map, missing_keys
1223
1396
def key_count(self):
1224
1397
"""Return an estimate of the number of keys in this index.
1226
1399
For CombinedGraphIndex this is approximated by the sum of the keys of
1227
1400
the child indices. As child indices may have duplicate keys this can
1228
1401
have a maximum error of the number of child indices * largest number of
1229
1402
keys in any index.
1231
return sum((index.key_count() for index in self._indices), 0)
1406
return sum((index.key_count() for index in self._indices), 0)
1407
except errors.NoSuchFile:
1408
self._reload_or_raise()
1410
missing_keys = _missing_keys_from_parent_map
1412
def _reload_or_raise(self):
1413
"""We just got a NoSuchFile exception.
1415
Try to reload the indices, if it fails, just raise the current
1418
if self._reload_func is None:
1420
exc_type, exc_value, exc_traceback = sys.exc_info()
1421
trace.mutter('Trying to reload after getting exception: %s',
1423
if not self._reload_func():
1424
# We tried to reload, but nothing changed, so we fail anyway
1425
trace.mutter('_reload_func indicated nothing has changed.'
1426
' Raising original exception.')
1427
raise exc_type, exc_value, exc_traceback
1233
1429
def validate(self):
1234
1430
"""Validate that everything in the index can be accessed."""
1235
for index in self._indices:
1433
for index in self._indices:
1436
except errors.NoSuchFile:
1437
self._reload_or_raise()
1239
1440
class InMemoryGraphIndex(GraphIndexBuilder):