224
229
# There may be duplicates, but I don't think it is worth worrying
226
231
self._nodes[reference] = ('a', (), '')
232
self._absent_keys.update(absent_references)
233
self._absent_keys.discard(key)
227
234
self._nodes[key] = ('', node_refs, value)
229
235
if self._nodes_by_key is not None and self._key_length > 1:
230
236
self._update_nodes_by_key(key, value, node_refs)
238
def clear_cache(self):
239
"""See GraphIndex.clear_cache()
241
This is a no-op, but we need the api to conform to a generic 'Index'
232
245
def finish(self):
233
246
lines = [_SIGNATURE]
234
247
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
235
248
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
236
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
249
key_count = len(self._nodes) - len(self._absent_keys)
250
lines.append(_OPTION_LEN + str(key_count) + '\n')
237
251
prefix_length = sum(len(x) for x in lines)
238
252
# references are byte offsets. To avoid having to do nasty
239
253
# polynomial work to resolve offsets (references to later in the
315
329
(len(result.getvalue()), expected_bytes))
318
def set_optimize(self, for_size=True):
332
def set_optimize(self, for_size=None, combine_backing_indices=None):
319
333
"""Change how the builder tries to optimize the result.
321
335
:param for_size: Tell the builder to try and make the index as small as
337
:param combine_backing_indices: If the builder spills to disk to save
338
memory, should the on-disk indices be combined. Set to True if you
339
are going to be probing the index, but to False if you are not. (If
340
you are not querying, then the time spent combining is wasted.)
325
343
# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
326
344
# other builders do.
327
self._optimize_for_size = for_size
345
if for_size is not None:
346
self._optimize_for_size = for_size
347
if combine_backing_indices is not None:
348
self._combine_backing_indices = combine_backing_indices
350
def find_ancestry(self, keys, ref_list_num):
351
"""See CombinedGraphIndex.find_ancestry()"""
357
for _, key, value, ref_lists in self.iter_entries(pending):
358
parent_keys = ref_lists[ref_list_num]
359
parent_map[key] = parent_keys
360
next_pending.update([p for p in parent_keys if p not in
362
missing_keys.update(pending.difference(parent_map))
363
pending = next_pending
364
return parent_map, missing_keys
330
367
class GraphIndex(object):
331
368
"""An index for data with embedded graphs.
333
370
The index maps keys to a list of key reference lists, and a value.
334
371
Each node has the same number of key reference lists. Each key reference
335
372
list can be empty or an arbitrary length. The value is an opaque NULL
336
terminated string without any newlines. The storage of the index is
373
terminated string without any newlines. The storage of the index is
337
374
hidden in the interface: keys and key references are always tuples of
338
375
bytestrings, never the internal representation (e.g. dictionary offsets).
430
476
node_value = value
431
477
self._nodes[key] = node_value
432
478
# cache the keys for quick set intersections
433
self._keys = set(self._nodes)
434
479
if trailers != 1:
435
480
# there must be one line - the empty trailer line.
436
481
raise errors.BadIndexData(self)
483
def clear_cache(self):
484
"""Clear out any cached/memoized values.
486
This can be called at any time, but generally it is used when we have
487
extracted some information, but don't expect to be requesting any more
491
def external_references(self, ref_list_num):
492
"""Return references that are not present in this index.
495
if ref_list_num + 1 > self.node_ref_lists:
496
raise ValueError('No ref list %d, index has %d ref lists'
497
% (ref_list_num, self.node_ref_lists))
500
for key, (value, ref_lists) in nodes.iteritems():
501
ref_list = ref_lists[ref_list_num]
502
refs.update([ref for ref in ref_list if ref not in nodes])
438
505
def _get_nodes_by_key(self):
439
506
if self._nodes_by_key is None:
440
507
nodes_by_key = {}
682
752
# the last thing looked up was a terminal element
683
753
yield (self, ) + key_dict
755
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
756
"""See BTreeIndex._find_ancestors."""
757
# The api can be implemented as a trivial overlay on top of
758
# iter_entries, it is not an efficient implementation, but it at least
762
for index, key, value, refs in self.iter_entries(keys):
763
parent_keys = refs[ref_list_num]
765
parent_map[key] = parent_keys
766
search_keys.update(parent_keys)
767
# Figure out what, if anything, was missing
768
missing_keys.update(set(keys).difference(found_keys))
769
search_keys = search_keys.difference(parent_map)
685
772
def key_count(self):
686
773
"""Return an estimate of the number of keys in this index.
688
775
For GraphIndex the estimate is exact.
690
777
if self._key_count is None:
1165
1268
self._indices = indices
1166
1269
self._reload_func = reload_func
1270
# Sibling indices are other CombinedGraphIndex that we should call
1271
# _move_to_front_by_name on when we auto-reorder ourself.
1272
self._sibling_indices = []
1273
# A list of names that corresponds to the instances in self._indices,
1274
# so _index_names[0] is always the name for _indices[0], etc. Sibling
1275
# indices must all use the same set of names as each other.
1276
self._index_names = [None] * len(self._indices)
1168
1278
def __repr__(self):
1169
1279
return "%s(%s)" % (
1170
1280
self.__class__.__name__,
1171
1281
', '.join(map(repr, self._indices)))
1173
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1174
def get_parents(self, revision_ids):
1175
"""See graph._StackedParentsProvider.get_parents.
1177
This implementation thunks the graph.Graph.get_parents api across to
1180
:param revision_ids: An iterable of graph keys for this graph.
1181
:return: A list of parent details for each key in revision_ids.
1182
Each parent details will be one of:
1183
* None when the key was missing
1184
* (NULL_REVISION,) when the key has no parents.
1185
* (parent_key, parent_key...) otherwise.
1187
parent_map = self.get_parent_map(revision_ids)
1188
return [parent_map.get(r, None) for r in revision_ids]
1283
def clear_cache(self):
1284
"""See GraphIndex.clear_cache()"""
1285
for index in self._indices:
1190
1288
def get_parent_map(self, keys):
1191
"""See graph._StackedParentsProvider.get_parent_map"""
1289
"""See graph.StackedParentsProvider.get_parent_map"""
1192
1290
search_keys = set(keys)
1193
1291
if NULL_REVISION in search_keys:
1194
1292
search_keys.discard(NULL_REVISION)
1284
1392
seen_keys = set()
1287
1396
for index in self._indices:
1288
1398
for node in index.iter_entries_prefix(keys):
1289
1399
if node[1] in seen_keys:
1291
1401
seen_keys.add(node[1])
1405
hit_indices.append(index)
1294
1407
except errors.NoSuchFile:
1295
1408
self._reload_or_raise()
1409
self._move_to_front(hit_indices)
1411
def _move_to_front(self, hit_indices):
1412
"""Rearrange self._indices so that hit_indices are first.
1414
Order is maintained as much as possible, e.g. the first unhit index
1415
will be the first index in _indices after the hit_indices, and the
1416
hit_indices will be present in exactly the order they are passed to
1419
_move_to_front propagates to all objects in self._sibling_indices by
1420
calling _move_to_front_by_name.
1422
if self._indices[:len(hit_indices)] == hit_indices:
1423
# The 'hit_indices' are already at the front (and in the same
1424
# order), no need to re-order
1426
hit_names = self._move_to_front_by_index(hit_indices)
1427
for sibling_idx in self._sibling_indices:
1428
sibling_idx._move_to_front_by_name(hit_names)
1430
def _move_to_front_by_index(self, hit_indices):
1431
"""Core logic for _move_to_front.
1433
Returns a list of names corresponding to the hit_indices param.
1435
indices_info = zip(self._index_names, self._indices)
1436
if 'index' in debug.debug_flags:
1437
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1438
indices_info, hit_indices)
1441
new_hit_indices = []
1444
for offset, (name, idx) in enumerate(indices_info):
1445
if idx in hit_indices:
1446
hit_names.append(name)
1447
new_hit_indices.append(idx)
1448
if len(new_hit_indices) == len(hit_indices):
1449
# We've found all of the hit entries, everything else is
1451
unhit_names.extend(self._index_names[offset+1:])
1452
unhit_indices.extend(self._indices[offset+1:])
1455
unhit_names.append(name)
1456
unhit_indices.append(idx)
1458
self._indices = new_hit_indices + unhit_indices
1459
self._index_names = hit_names + unhit_names
1460
if 'index' in debug.debug_flags:
1461
mutter('CombinedGraphIndex reordered: %r', self._indices)
1464
def _move_to_front_by_name(self, hit_names):
1465
"""Moves indices named by 'hit_names' to front of the search order, as
1466
described in _move_to_front.
1468
# Translate names to index instances, and then call
1469
# _move_to_front_by_index.
1470
indices_info = zip(self._index_names, self._indices)
1472
for name, idx in indices_info:
1473
if name in hit_names:
1474
hit_indices.append(idx)
1475
self._move_to_front_by_index(hit_indices)
1477
def find_ancestry(self, keys, ref_list_num):
1478
"""Find the complete ancestry for the given set of keys.
1480
Note that this is a whole-ancestry request, so it should be used
1483
:param keys: An iterable of keys to look for
1484
:param ref_list_num: The reference list which references the parents
1486
:return: (parent_map, missing_keys)
1488
# XXX: make this call _move_to_front?
1489
missing_keys = set()
1491
keys_to_lookup = set(keys)
1493
while keys_to_lookup:
1494
# keys that *all* indexes claim are missing, stop searching them
1496
all_index_missing = None
1497
# print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
1498
# print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
1500
# len(missing_keys))
1501
for index_idx, index in enumerate(self._indices):
1502
# TODO: we should probably be doing something with
1503
# 'missing_keys' since we've already determined that
1504
# those revisions have not been found anywhere
1505
index_missing_keys = set()
1506
# Find all of the ancestry we can from this index
1507
# keep looking until the search_keys set is empty, which means
1508
# things we didn't find should be in index_missing_keys
1509
search_keys = keys_to_lookup
1511
# print ' \t%2d\t\t%4d\t%5d\t%5d' % (
1512
# index_idx, len(search_keys),
1513
# len(parent_map), len(index_missing_keys))
1516
# TODO: ref_list_num should really be a parameter, since
1517
# CombinedGraphIndex does not know what the ref lists
1519
search_keys = index._find_ancestors(search_keys,
1520
ref_list_num, parent_map, index_missing_keys)
1521
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1522
# sub_generation, len(search_keys),
1523
# len(parent_map), len(index_missing_keys))
1524
# Now set whatever was missing to be searched in the next index
1525
keys_to_lookup = index_missing_keys
1526
if all_index_missing is None:
1527
all_index_missing = set(index_missing_keys)
1529
all_index_missing.intersection_update(index_missing_keys)
1530
if not keys_to_lookup:
1532
if all_index_missing is None:
1533
# There were no indexes, so all search keys are 'missing'
1534
missing_keys.update(keys_to_lookup)
1535
keys_to_lookup = None
1537
missing_keys.update(all_index_missing)
1538
keys_to_lookup.difference_update(all_index_missing)
1539
return parent_map, missing_keys
1297
1541
def key_count(self):
1298
1542
"""Return an estimate of the number of keys in this index.