70
69
class GraphIndexBuilder(object):
71
70
"""A builder that can build a GraphIndex.
73
The resulting graph has the structure::
72
The resulting graph has the structure:
75
_SIGNATURE OPTIONS NODES NEWLINE
76
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
77
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
79
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
80
KEY := Not-whitespace-utf8
82
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
83
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
84
REFERENCE := DIGITS ; digits is the byte offset in the index of the
86
VALUE := no-newline-no-null-bytes
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
89
88
def __init__(self, reference_lists=0, key_elements=1):
231
225
# There may be duplicates, but I don't think it is worth worrying
233
227
self._nodes[reference] = ('a', (), '')
234
self._absent_keys.update(absent_references)
235
self._absent_keys.discard(key)
236
228
self._nodes[key] = ('', node_refs, value)
237
230
if self._nodes_by_key is not None and self._key_length > 1:
238
231
self._update_nodes_by_key(key, value, node_refs)
240
def clear_cache(self):
241
"""See GraphIndex.clear_cache()
243
This is a no-op, but we need the api to conform to a generic 'Index'
247
233
def finish(self):
248
234
lines = [_SIGNATURE]
249
235
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
250
236
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
251
key_count = len(self._nodes) - len(self._absent_keys)
252
lines.append(_OPTION_LEN + str(key_count) + '\n')
237
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
253
238
prefix_length = sum(len(x) for x in lines)
254
239
# references are byte offsets. To avoid having to do nasty
255
240
# polynomial work to resolve offsets (references to later in the
755
703
# the last thing looked up was a terminal element
756
704
yield (self, ) + key_dict
758
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
759
"""See BTreeIndex._find_ancestors."""
760
# The api can be implemented as a trivial overlay on top of
761
# iter_entries, it is not an efficient implementation, but it at least
765
for index, key, value, refs in self.iter_entries(keys):
766
parent_keys = refs[ref_list_num]
768
parent_map[key] = parent_keys
769
search_keys.update(parent_keys)
770
# Figure out what, if anything, was missing
771
missing_keys.update(set(keys).difference(found_keys))
772
search_keys = search_keys.difference(parent_map)
775
706
def key_count(self):
776
707
"""Return an estimate of the number of keys in this index.
1271
1186
self._indices = indices
1272
1187
self._reload_func = reload_func
1273
# Sibling indices are other CombinedGraphIndex that we should call
1274
# _move_to_front_by_name on when we auto-reorder ourself.
1275
self._sibling_indices = []
1276
# A list of names that corresponds to the instances in self._indices,
1277
# so _index_names[0] is always the name for _indices[0], etc. Sibling
1278
# indices must all use the same set of names as each other.
1279
self._index_names = [None] * len(self._indices)
1281
1189
def __repr__(self):
1282
1190
return "%s(%s)" % (
1283
1191
self.__class__.__name__,
1284
1192
', '.join(map(repr, self._indices)))
1286
def clear_cache(self):
1287
"""See GraphIndex.clear_cache()"""
1288
for index in self._indices:
1291
1194
def get_parent_map(self, keys):
1292
"""See graph.StackedParentsProvider.get_parent_map"""
1195
"""See graph._StackedParentsProvider.get_parent_map"""
1293
1196
search_keys = set(keys)
1294
if _mod_revision.NULL_REVISION in search_keys:
1295
search_keys.discard(_mod_revision.NULL_REVISION)
1296
found_parents = {_mod_revision.NULL_REVISION:[]}
1197
if NULL_REVISION in search_keys:
1198
search_keys.discard(NULL_REVISION)
1199
found_parents = {NULL_REVISION:[]}
1298
1201
found_parents = {}
1299
1202
for index, key, value, refs in self.iter_entries(search_keys):
1300
1203
parents = refs[0]
1301
1204
if not parents:
1302
parents = (_mod_revision.NULL_REVISION,)
1205
parents = (NULL_REVISION,)
1303
1206
found_parents[key] = parents
1304
1207
return found_parents
1306
1209
has_key = _has_key_from_parent_map
1308
def insert_index(self, pos, index, name=None):
1211
def insert_index(self, pos, index):
1309
1212
"""Insert a new index in the list of indices to query.
1311
1214
:param pos: The position to insert the index.
1312
1215
:param index: The index to insert.
1313
:param name: a name for this index, e.g. a pack name. These names can
1314
be used to reflect index reorderings to related CombinedGraphIndex
1315
instances that use the same names. (see set_sibling_indices)
1317
1217
self._indices.insert(pos, index)
1318
self._index_names.insert(pos, name)
1320
1219
def iter_all_entries(self):
1321
1220
"""Iterate over all keys within the index
1395
1288
seen_keys = set()
1399
1291
for index in self._indices:
1401
1292
for node in index.iter_entries_prefix(keys):
1402
1293
if node[1] in seen_keys:
1404
1295
seen_keys.add(node[1])
1408
hit_indices.append(index)
1410
1298
except errors.NoSuchFile:
1411
1299
self._reload_or_raise()
1412
self._move_to_front(hit_indices)
1414
def _move_to_front(self, hit_indices):
1415
"""Rearrange self._indices so that hit_indices are first.
1417
Order is maintained as much as possible, e.g. the first unhit index
1418
will be the first index in _indices after the hit_indices, and the
1419
hit_indices will be present in exactly the order they are passed to
1422
_move_to_front propagates to all objects in self._sibling_indices by
1423
calling _move_to_front_by_name.
1425
if self._indices[:len(hit_indices)] == hit_indices:
1426
# The 'hit_indices' are already at the front (and in the same
1427
# order), no need to re-order
1429
hit_names = self._move_to_front_by_index(hit_indices)
1430
for sibling_idx in self._sibling_indices:
1431
sibling_idx._move_to_front_by_name(hit_names)
1433
def _move_to_front_by_index(self, hit_indices):
1434
"""Core logic for _move_to_front.
1436
Returns a list of names corresponding to the hit_indices param.
1438
indices_info = zip(self._index_names, self._indices)
1439
if 'index' in debug.debug_flags:
1440
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1441
'promoting %r', indices_info, hit_indices)
1444
new_hit_indices = []
1447
for offset, (name, idx) in enumerate(indices_info):
1448
if idx in hit_indices:
1449
hit_names.append(name)
1450
new_hit_indices.append(idx)
1451
if len(new_hit_indices) == len(hit_indices):
1452
# We've found all of the hit entries, everything else is
1454
unhit_names.extend(self._index_names[offset+1:])
1455
unhit_indices.extend(self._indices[offset+1:])
1458
unhit_names.append(name)
1459
unhit_indices.append(idx)
1461
self._indices = new_hit_indices + unhit_indices
1462
self._index_names = hit_names + unhit_names
1463
if 'index' in debug.debug_flags:
1464
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1467
def _move_to_front_by_name(self, hit_names):
1468
"""Moves indices named by 'hit_names' to front of the search order, as
1469
described in _move_to_front.
1471
# Translate names to index instances, and then call
1472
# _move_to_front_by_index.
1473
indices_info = zip(self._index_names, self._indices)
1475
for name, idx in indices_info:
1476
if name in hit_names:
1477
hit_indices.append(idx)
1478
self._move_to_front_by_index(hit_indices)
1480
def find_ancestry(self, keys, ref_list_num):
1481
"""Find the complete ancestry for the given set of keys.
1483
Note that this is a whole-ancestry request, so it should be used
1486
:param keys: An iterable of keys to look for
1487
:param ref_list_num: The reference list which references the parents
1489
:return: (parent_map, missing_keys)
1491
# XXX: make this call _move_to_front?
1492
missing_keys = set()
1494
keys_to_lookup = set(keys)
1496
while keys_to_lookup:
1497
# keys that *all* indexes claim are missing, stop searching them
1499
all_index_missing = None
1500
# print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
1501
# print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
1503
# len(missing_keys))
1504
for index_idx, index in enumerate(self._indices):
1505
# TODO: we should probably be doing something with
1506
# 'missing_keys' since we've already determined that
1507
# those revisions have not been found anywhere
1508
index_missing_keys = set()
1509
# Find all of the ancestry we can from this index
1510
# keep looking until the search_keys set is empty, which means
1511
# things we didn't find should be in index_missing_keys
1512
search_keys = keys_to_lookup
1514
# print ' \t%2d\t\t%4d\t%5d\t%5d' % (
1515
# index_idx, len(search_keys),
1516
# len(parent_map), len(index_missing_keys))
1519
# TODO: ref_list_num should really be a parameter, since
1520
# CombinedGraphIndex does not know what the ref lists
1522
search_keys = index._find_ancestors(search_keys,
1523
ref_list_num, parent_map, index_missing_keys)
1524
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1525
# sub_generation, len(search_keys),
1526
# len(parent_map), len(index_missing_keys))
1527
# Now set whatever was missing to be searched in the next index
1528
keys_to_lookup = index_missing_keys
1529
if all_index_missing is None:
1530
all_index_missing = set(index_missing_keys)
1532
all_index_missing.intersection_update(index_missing_keys)
1533
if not keys_to_lookup:
1535
if all_index_missing is None:
1536
# There were no indexes, so all search keys are 'missing'
1537
missing_keys.update(keys_to_lookup)
1538
keys_to_lookup = None
1540
missing_keys.update(all_index_missing)
1541
keys_to_lookup.difference_update(all_index_missing)
1542
return parent_map, missing_keys
1544
1301
def key_count(self):
1545
1302
"""Return an estimate of the number of keys in this index.