229
224
# There may be duplicates, but I don't think it is worth worrying
231
226
self._nodes[reference] = ('a', (), '')
232
self._absent_keys.update(absent_references)
233
self._absent_keys.discard(key)
234
227
self._nodes[key] = ('', node_refs, value)
235
229
if self._nodes_by_key is not None and self._key_length > 1:
236
230
self._update_nodes_by_key(key, value, node_refs)
238
def clear_cache(self):
239
"""See GraphIndex.clear_cache()
241
This is a no-op, but we need the api to conform to a generic 'Index'
245
232
def finish(self):
246
233
lines = [_SIGNATURE]
247
234
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
235
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
249
key_count = len(self._nodes) - len(self._absent_keys)
250
lines.append(_OPTION_LEN + str(key_count) + '\n')
236
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
251
237
prefix_length = sum(len(x) for x in lines)
252
238
# references are byte offsets. To avoid having to do nasty
253
239
# polynomial work to resolve offsets (references to later in the
329
315
(len(result.getvalue()), expected_bytes))
332
def set_optimize(self, for_size=None, combine_backing_indices=None):
318
def set_optimize(self, for_size=True):
333
319
"""Change how the builder tries to optimize the result.
335
321
:param for_size: Tell the builder to try and make the index as small as
337
:param combine_backing_indices: If the builder spills to disk to save
338
memory, should the on-disk indices be combined. Set to True if you
339
are going to be probing the index, but to False if you are not. (If
340
you are not querying, then the time spent combining is wasted.)
343
325
# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
344
326
# other builders do.
345
if for_size is not None:
346
self._optimize_for_size = for_size
347
if combine_backing_indices is not None:
348
self._combine_backing_indices = combine_backing_indices
350
def find_ancestry(self, keys, ref_list_num):
351
"""See CombinedGraphIndex.find_ancestry()"""
357
for _, key, value, ref_lists in self.iter_entries(pending):
358
parent_keys = ref_lists[ref_list_num]
359
parent_map[key] = parent_keys
360
next_pending.update([p for p in parent_keys if p not in
362
missing_keys.update(pending.difference(parent_map))
363
pending = next_pending
364
return parent_map, missing_keys
327
self._optimize_for_size = for_size
367
330
class GraphIndex(object):
368
331
"""An index for data with embedded graphs.
370
333
The index maps keys to a list of key reference lists, and a value.
371
334
Each node has the same number of key reference lists. Each key reference
372
335
list can be empty or an arbitrary length. The value is an opaque NULL
373
terminated string without any newlines. The storage of the index is
336
terminated string without any newlines. The storage of the index is
374
337
hidden in the interface: keys and key references are always tuples of
375
338
bytestrings, never the internal representation (e.g. dictionary offsets).
476
430
node_value = value
477
431
self._nodes[key] = node_value
478
432
# cache the keys for quick set intersections
433
self._keys = set(self._nodes)
479
434
if trailers != 1:
480
435
# there must be one line - the empty trailer line.
481
436
raise errors.BadIndexData(self)
483
def clear_cache(self):
484
"""Clear out any cached/memoized values.
486
This can be called at any time, but generally it is used when we have
487
extracted some information, but don't expect to be requesting any more
491
def external_references(self, ref_list_num):
492
"""Return references that are not present in this index.
495
if ref_list_num + 1 > self.node_ref_lists:
496
raise ValueError('No ref list %d, index has %d ref lists'
497
% (ref_list_num, self.node_ref_lists))
500
for key, (value, ref_lists) in nodes.iteritems():
501
ref_list = ref_lists[ref_list_num]
502
refs.update([ref for ref in ref_list if ref not in nodes])
505
438
def _get_nodes_by_key(self):
506
439
if self._nodes_by_key is None:
507
440
nodes_by_key = {}
752
682
# the last thing looked up was a terminal element
753
683
yield (self, ) + key_dict
755
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
756
"""See BTreeIndex._find_ancestors."""
757
# The api can be implemented as a trivial overlay on top of
758
# iter_entries, it is not an efficient implementation, but it at least
762
for index, key, value, refs in self.iter_entries(keys):
763
parent_keys = refs[ref_list_num]
765
parent_map[key] = parent_keys
766
search_keys.update(parent_keys)
767
# Figure out what, if anything, was missing
768
missing_keys.update(set(keys).difference(found_keys))
769
search_keys = search_keys.difference(parent_map)
772
685
def key_count(self):
773
686
"""Return an estimate of the number of keys in this index.
775
688
For GraphIndex the estimate is exact.
777
690
if self._key_count is None:
1268
1165
self._indices = indices
1269
1166
self._reload_func = reload_func
1270
# Sibling indices are other CombinedGraphIndex that we should call
1271
# _move_to_front_by_name on when we auto-reorder ourself.
1272
self._sibling_indices = []
1273
# A list of names that corresponds to the instances in self._indices,
1274
# so _index_names[0] is always the name for _indices[0], etc. Sibling
1275
# indices must all use the same set of names as each other.
1276
self._index_names = [None] * len(self._indices)
1278
1168
def __repr__(self):
1279
1169
return "%s(%s)" % (
1280
1170
self.__class__.__name__,
1281
1171
', '.join(map(repr, self._indices)))
1283
def clear_cache(self):
1284
"""See GraphIndex.clear_cache()"""
1285
for index in self._indices:
1173
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1174
def get_parents(self, revision_ids):
1175
"""See graph._StackedParentsProvider.get_parents.
1177
This implementation thunks the graph.Graph.get_parents api across to
1180
:param revision_ids: An iterable of graph keys for this graph.
1181
:return: A list of parent details for each key in revision_ids.
1182
Each parent details will be one of:
1183
* None when the key was missing
1184
* (NULL_REVISION,) when the key has no parents.
1185
* (parent_key, parent_key...) otherwise.
1187
parent_map = self.get_parent_map(revision_ids)
1188
return [parent_map.get(r, None) for r in revision_ids]
1288
1190
def get_parent_map(self, keys):
1289
"""See graph.StackedParentsProvider.get_parent_map"""
1191
"""See graph._StackedParentsProvider.get_parent_map"""
1290
1192
search_keys = set(keys)
1291
1193
if NULL_REVISION in search_keys:
1292
1194
search_keys.discard(NULL_REVISION)
1392
1284
seen_keys = set()
1396
1287
for index in self._indices:
1398
1288
for node in index.iter_entries_prefix(keys):
1399
1289
if node[1] in seen_keys:
1401
1291
seen_keys.add(node[1])
1405
hit_indices.append(index)
1407
1294
except errors.NoSuchFile:
1408
1295
self._reload_or_raise()
1409
self._move_to_front(hit_indices)
1411
def _move_to_front(self, hit_indices):
1412
"""Rearrange self._indices so that hit_indices are first.
1414
Order is maintained as much as possible, e.g. the first unhit index
1415
will be the first index in _indices after the hit_indices, and the
1416
hit_indices will be present in exactly the order they are passed to
1419
_move_to_front propagates to all objects in self._sibling_indices by
1420
calling _move_to_front_by_name.
1422
if self._indices[:len(hit_indices)] == hit_indices:
1423
# The 'hit_indices' are already at the front (and in the same
1424
# order), no need to re-order
1426
hit_names = self._move_to_front_by_index(hit_indices)
1427
for sibling_idx in self._sibling_indices:
1428
sibling_idx._move_to_front_by_name(hit_names)
1430
def _move_to_front_by_index(self, hit_indices):
1431
"""Core logic for _move_to_front.
1433
Returns a list of names corresponding to the hit_indices param.
1435
indices_info = zip(self._index_names, self._indices)
1436
if 'index' in debug.debug_flags:
1437
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1438
indices_info, hit_indices)
1441
new_hit_indices = []
1444
for offset, (name, idx) in enumerate(indices_info):
1445
if idx in hit_indices:
1446
hit_names.append(name)
1447
new_hit_indices.append(idx)
1448
if len(new_hit_indices) == len(hit_indices):
1449
# We've found all of the hit entries, everything else is
1451
unhit_names.extend(self._index_names[offset+1:])
1452
unhit_indices.extend(self._indices[offset+1:])
1455
unhit_names.append(name)
1456
unhit_indices.append(idx)
1458
self._indices = new_hit_indices + unhit_indices
1459
self._index_names = hit_names + unhit_names
1460
if 'index' in debug.debug_flags:
1461
mutter('CombinedGraphIndex reordered: %r', self._indices)
1464
def _move_to_front_by_name(self, hit_names):
1465
"""Moves indices named by 'hit_names' to front of the search order, as
1466
described in _move_to_front.
1468
# Translate names to index instances, and then call
1469
# _move_to_front_by_index.
1470
indices_info = zip(self._index_names, self._indices)
1472
for name, idx in indices_info:
1473
if name in hit_names:
1474
hit_indices.append(idx)
1475
self._move_to_front_by_index(hit_indices)
1477
def find_ancestry(self, keys, ref_list_num):
1478
"""Find the complete ancestry for the given set of keys.
1480
Note that this is a whole-ancestry request, so it should be used
1483
:param keys: An iterable of keys to look for
1484
:param ref_list_num: The reference list which references the parents
1486
:return: (parent_map, missing_keys)
1488
# XXX: make this call _move_to_front?
1489
missing_keys = set()
1491
keys_to_lookup = set(keys)
1493
while keys_to_lookup:
1494
# keys that *all* indexes claim are missing, stop searching them
1496
all_index_missing = None
1497
# print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
1498
# print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
1500
# len(missing_keys))
1501
for index_idx, index in enumerate(self._indices):
1502
# TODO: we should probably be doing something with
1503
# 'missing_keys' since we've already determined that
1504
# those revisions have not been found anywhere
1505
index_missing_keys = set()
1506
# Find all of the ancestry we can from this index
1507
# keep looking until the search_keys set is empty, which means
1508
# things we didn't find should be in index_missing_keys
1509
search_keys = keys_to_lookup
1511
# print ' \t%2d\t\t%4d\t%5d\t%5d' % (
1512
# index_idx, len(search_keys),
1513
# len(parent_map), len(index_missing_keys))
1516
# TODO: ref_list_num should really be a parameter, since
1517
# CombinedGraphIndex does not know what the ref lists
1519
search_keys = index._find_ancestors(search_keys,
1520
ref_list_num, parent_map, index_missing_keys)
1521
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1522
# sub_generation, len(search_keys),
1523
# len(parent_map), len(index_missing_keys))
1524
# Now set whatever was missing to be searched in the next index
1525
keys_to_lookup = index_missing_keys
1526
if all_index_missing is None:
1527
all_index_missing = set(index_missing_keys)
1529
all_index_missing.intersection_update(index_missing_keys)
1530
if not keys_to_lookup:
1532
if all_index_missing is None:
1533
# There were no indexes, so all search keys are 'missing'
1534
missing_keys.update(keys_to_lookup)
1535
keys_to_lookup = None
1537
missing_keys.update(all_index_missing)
1538
keys_to_lookup.difference_update(all_index_missing)
1539
return parent_map, missing_keys
1541
1297
def key_count(self):
1542
1298
"""Return an estimate of the number of keys in this index.