70
68
class GraphIndexBuilder(object):
71
69
"""A builder that can build a GraphIndex.
73
The resulting graph has the structure::
71
The resulting graph has the structure:
75
_SIGNATURE OPTIONS NODES NEWLINE
76
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
77
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
79
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
80
KEY := Not-whitespace-utf8
82
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
83
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
84
REFERENCE := DIGITS ; digits is the byte offset in the index of the
86
VALUE := no-newline-no-null-bytes
73
_SIGNATURE OPTIONS NODES NEWLINE
74
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
75
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
77
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
78
KEY := Not-whitespace-utf8
80
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
81
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
82
REFERENCE := DIGITS ; digits is the byte offset in the index of the
84
VALUE := no-newline-no-null-bytes
89
87
def __init__(self, reference_lists=0, key_elements=1):
231
224
# There may be duplicates, but I don't think it is worth worrying
233
226
self._nodes[reference] = ('a', (), '')
234
self._absent_keys.update(absent_references)
235
self._absent_keys.discard(key)
236
227
self._nodes[key] = ('', node_refs, value)
237
229
if self._nodes_by_key is not None and self._key_length > 1:
238
230
self._update_nodes_by_key(key, value, node_refs)
240
def clear_cache(self):
241
"""See GraphIndex.clear_cache()
243
This is a no-op, but we need the api to conform to a generic 'Index'
247
232
def finish(self):
250
:returns: cStringIO holding the full context of the index as it
251
should be written to disk.
253
233
lines = [_SIGNATURE]
254
234
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
255
235
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
256
key_count = len(self._nodes) - len(self._absent_keys)
257
lines.append(_OPTION_LEN + str(key_count) + '\n')
236
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
258
237
prefix_length = sum(len(x) for x in lines)
259
238
# references are byte offsets. To avoid having to do nasty
260
239
# polynomial work to resolve offsets (references to later in the
760
702
# the last thing looked up was a terminal element
761
703
yield (self, ) + key_dict
763
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
764
"""See BTreeIndex._find_ancestors."""
765
# The api can be implemented as a trivial overlay on top of
766
# iter_entries, it is not an efficient implementation, but it at least
770
for index, key, value, refs in self.iter_entries(keys):
771
parent_keys = refs[ref_list_num]
773
parent_map[key] = parent_keys
774
search_keys.update(parent_keys)
775
# Figure out what, if anything, was missing
776
missing_keys.update(set(keys).difference(found_keys))
777
search_keys = search_keys.difference(parent_map)
780
705
def key_count(self):
781
706
"""Return an estimate of the number of keys in this index.
1276
1185
self._indices = indices
1277
1186
self._reload_func = reload_func
1278
# Sibling indices are other CombinedGraphIndex that we should call
1279
# _move_to_front_by_name on when we auto-reorder ourself.
1280
self._sibling_indices = []
1281
# A list of names that corresponds to the instances in self._indices,
1282
# so _index_names[0] is always the name for _indices[0], etc. Sibling
1283
# indices must all use the same set of names as each other.
1284
self._index_names = [None] * len(self._indices)
1286
1188
def __repr__(self):
1287
1189
return "%s(%s)" % (
1288
1190
self.__class__.__name__,
1289
1191
', '.join(map(repr, self._indices)))
1291
def clear_cache(self):
1292
"""See GraphIndex.clear_cache()"""
1293
for index in self._indices:
1296
1193
def get_parent_map(self, keys):
1297
1194
"""See graph.StackedParentsProvider.get_parent_map"""
1298
1195
search_keys = set(keys)
1299
if _mod_revision.NULL_REVISION in search_keys:
1300
search_keys.discard(_mod_revision.NULL_REVISION)
1301
found_parents = {_mod_revision.NULL_REVISION:[]}
1196
if NULL_REVISION in search_keys:
1197
search_keys.discard(NULL_REVISION)
1198
found_parents = {NULL_REVISION:[]}
1303
1200
found_parents = {}
1304
1201
for index, key, value, refs in self.iter_entries(search_keys):
1305
1202
parents = refs[0]
1306
1203
if not parents:
1307
parents = (_mod_revision.NULL_REVISION,)
1204
parents = (NULL_REVISION,)
1308
1205
found_parents[key] = parents
1309
1206
return found_parents
1311
1208
has_key = _has_key_from_parent_map
1313
def insert_index(self, pos, index, name=None):
1210
def insert_index(self, pos, index):
1314
1211
"""Insert a new index in the list of indices to query.
1316
1213
:param pos: The position to insert the index.
1317
1214
:param index: The index to insert.
1318
:param name: a name for this index, e.g. a pack name. These names can
1319
be used to reflect index reorderings to related CombinedGraphIndex
1320
instances that use the same names. (see set_sibling_indices)
1322
1216
self._indices.insert(pos, index)
1323
self._index_names.insert(pos, name)
1325
1218
def iter_all_entries(self):
1326
1219
"""Iterate over all keys within the index
1400
1287
seen_keys = set()
1404
1290
for index in self._indices:
1406
1291
for node in index.iter_entries_prefix(keys):
1407
1292
if node[1] in seen_keys:
1409
1294
seen_keys.add(node[1])
1413
hit_indices.append(index)
1415
1297
except errors.NoSuchFile:
1416
1298
self._reload_or_raise()
1417
self._move_to_front(hit_indices)
1419
def _move_to_front(self, hit_indices):
1420
"""Rearrange self._indices so that hit_indices are first.
1422
Order is maintained as much as possible, e.g. the first unhit index
1423
will be the first index in _indices after the hit_indices, and the
1424
hit_indices will be present in exactly the order they are passed to
1427
_move_to_front propagates to all objects in self._sibling_indices by
1428
calling _move_to_front_by_name.
1430
if self._indices[:len(hit_indices)] == hit_indices:
1431
# The 'hit_indices' are already at the front (and in the same
1432
# order), no need to re-order
1434
hit_names = self._move_to_front_by_index(hit_indices)
1435
for sibling_idx in self._sibling_indices:
1436
sibling_idx._move_to_front_by_name(hit_names)
1438
def _move_to_front_by_index(self, hit_indices):
1439
"""Core logic for _move_to_front.
1441
Returns a list of names corresponding to the hit_indices param.
1443
indices_info = zip(self._index_names, self._indices)
1444
if 'index' in debug.debug_flags:
1445
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1446
'promoting %r', indices_info, hit_indices)
1449
new_hit_indices = []
1452
for offset, (name, idx) in enumerate(indices_info):
1453
if idx in hit_indices:
1454
hit_names.append(name)
1455
new_hit_indices.append(idx)
1456
if len(new_hit_indices) == len(hit_indices):
1457
# We've found all of the hit entries, everything else is
1459
unhit_names.extend(self._index_names[offset+1:])
1460
unhit_indices.extend(self._indices[offset+1:])
1463
unhit_names.append(name)
1464
unhit_indices.append(idx)
1466
self._indices = new_hit_indices + unhit_indices
1467
self._index_names = hit_names + unhit_names
1468
if 'index' in debug.debug_flags:
1469
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1472
def _move_to_front_by_name(self, hit_names):
1473
"""Moves indices named by 'hit_names' to front of the search order, as
1474
described in _move_to_front.
1476
# Translate names to index instances, and then call
1477
# _move_to_front_by_index.
1478
indices_info = zip(self._index_names, self._indices)
1480
for name, idx in indices_info:
1481
if name in hit_names:
1482
hit_indices.append(idx)
1483
self._move_to_front_by_index(hit_indices)
1485
def find_ancestry(self, keys, ref_list_num):
1486
"""Find the complete ancestry for the given set of keys.
1488
Note that this is a whole-ancestry request, so it should be used
1491
:param keys: An iterable of keys to look for
1492
:param ref_list_num: The reference list which references the parents
1494
:return: (parent_map, missing_keys)
1496
# XXX: make this call _move_to_front?
1497
missing_keys = set()
1499
keys_to_lookup = set(keys)
1501
while keys_to_lookup:
1502
# keys that *all* indexes claim are missing, stop searching them
1504
all_index_missing = None
1505
# print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
1506
# print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
1508
# len(missing_keys))
1509
for index_idx, index in enumerate(self._indices):
1510
# TODO: we should probably be doing something with
1511
# 'missing_keys' since we've already determined that
1512
# those revisions have not been found anywhere
1513
index_missing_keys = set()
1514
# Find all of the ancestry we can from this index
1515
# keep looking until the search_keys set is empty, which means
1516
# things we didn't find should be in index_missing_keys
1517
search_keys = keys_to_lookup
1519
# print ' \t%2d\t\t%4d\t%5d\t%5d' % (
1520
# index_idx, len(search_keys),
1521
# len(parent_map), len(index_missing_keys))
1524
# TODO: ref_list_num should really be a parameter, since
1525
# CombinedGraphIndex does not know what the ref lists
1527
search_keys = index._find_ancestors(search_keys,
1528
ref_list_num, parent_map, index_missing_keys)
1529
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1530
# sub_generation, len(search_keys),
1531
# len(parent_map), len(index_missing_keys))
1532
# Now set whatever was missing to be searched in the next index
1533
keys_to_lookup = index_missing_keys
1534
if all_index_missing is None:
1535
all_index_missing = set(index_missing_keys)
1537
all_index_missing.intersection_update(index_missing_keys)
1538
if not keys_to_lookup:
1540
if all_index_missing is None:
1541
# There were no indexes, so all search keys are 'missing'
1542
missing_keys.update(keys_to_lookup)
1543
keys_to_lookup = None
1545
missing_keys.update(all_index_missing)
1546
keys_to_lookup.difference_update(all_index_missing)
1547
return parent_map, missing_keys
1549
1300
def key_count(self):
1550
1301
"""Return an estimate of the number of keys in this index.