382
382
suitable for production use. :XXX
385
def __init__(self, transport, name, size, unlimited_cache=False):
385
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
386
386
"""Open an index called name on transport.
388
388
:param transport: A bzrlib.transport.Transport.
444
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
445
448
if stream is None:
446
449
stream = self._transport.get(self._name)
450
if self._base_offset != 0:
451
# This is wasteful, but it is better than dealing with
452
# adjusting all the offsets, etc.
453
stream = StringIO(stream.read()[self._base_offset:])
447
454
self._read_prefix(stream)
448
455
self._expected_elements = 3 + self._key_length
1190
1197
self._buffer_all()
1200
base_offset = self._base_offset
1201
if base_offset != 0:
1202
# Rewrite the ranges for the offset
1203
readv_ranges = [(start+base_offset, size)
1204
for start, size in readv_ranges]
1193
1205
readv_data = self._transport.readv(self._name, readv_ranges, True,
1206
self._size + self._base_offset)
1196
1208
for offset, data in readv_data:
1209
offset -= base_offset
1197
1210
self._bytes_read += len(data)
1212
# transport.readv() expanded to extra data which isn't part of
1214
data = data[-offset:]
1198
1216
if offset == 0 and len(data) == self._size:
1199
1217
# We read the whole range, most likely because the
1200
1218
# Transport upcast our readv ranges into one long request
1229
1247
Queries against the combined index will be made against the first index,
1230
and then the second and so on. The order of index's can thus influence
1248
and then the second and so on. The order of indices can thus influence
1231
1249
performance significantly. For example, if one index is on local disk and a
1232
1250
second on a remote server, the local disk index should be before the other
1233
1251
in the index list.
1253
Also, queries tend to need results from the same indices as previous
1254
queries. So the indices will be reordered after every query to put the
1255
indices that had the result(s) of that query first (while otherwise
1256
preserving the relative ordering).
1236
1259
def __init__(self, indices, reload_func=None):
1244
1267
self._indices = indices
1245
1268
self._reload_func = reload_func
1269
# Sibling indices are other CombinedGraphIndex that we should call
1270
# _move_to_front_by_name on when we auto-reorder ourself.
1271
self._sibling_indices = []
1272
# A list of names that corresponds to the instances in self._indices,
1273
# so _index_names[0] is always the name for _indices[0], etc. Sibling
1274
# indices must all use the same set of names as each other.
1275
self._index_names = [None] * len(self._indices)
1247
1277
def __repr__(self):
1248
1278
return "%s(%s)" % (
1272
1302
has_key = _has_key_from_parent_map
1274
def insert_index(self, pos, index):
1304
def insert_index(self, pos, index, name=None):
1275
1305
"""Insert a new index in the list of indices to query.
1277
1307
:param pos: The position to insert the index.
1278
1308
:param index: The index to insert.
1309
:param name: a name for this index, e.g. a pack name. These names can
1310
be used to reflect index reorderings to related CombinedGraphIndex
1311
instances that use the same names. (see set_sibling_indices)
1280
1313
self._indices.insert(pos, index)
1314
self._index_names.insert(pos, name)
1282
1316
def iter_all_entries(self):
1283
1317
"""Iterate over all keys within the index
1308
1342
value and are only reported once.
1310
1344
:param keys: An iterable providing the keys to be retrieved.
1311
:return: An iterable of (index, key, reference_lists, value). There is no
1312
defined order for the result iteration - it will be in the most
1345
:return: An iterable of (index, key, reference_lists, value). There is
1346
no defined order for the result iteration - it will be in the most
1313
1347
efficient order for the index.
1315
1349
keys = set(keys)
1318
1353
for index in self._indices:
1321
1357
for node in index.iter_entries(keys):
1322
1358
keys.remove(node[1])
1362
hit_indices.append(index)
1325
1364
except errors.NoSuchFile:
1326
1365
self._reload_or_raise()
1366
self._move_to_front(hit_indices)
1328
1368
def iter_entries_prefix(self, keys):
1329
1369
"""Iterate over keys within the index using prefix matching.
1351
1391
seen_keys = set()
1354
1395
for index in self._indices:
1355
1397
for node in index.iter_entries_prefix(keys):
1356
1398
if node[1] in seen_keys:
1358
1400
seen_keys.add(node[1])
1404
hit_indices.append(index)
1361
1406
except errors.NoSuchFile:
1362
1407
self._reload_or_raise()
1408
self._move_to_front(hit_indices)
1410
def _move_to_front(self, hit_indices):
1411
"""Rearrange self._indices so that hit_indices are first.
1413
Order is maintained as much as possible, e.g. the first unhit index
1414
will be the first index in _indices after the hit_indices, and the
1415
hit_indices will be present in exactly the order they are passed to
1418
_move_to_front propagates to all objects in self._sibling_indices by
1419
calling _move_to_front_by_name.
1421
if self._indices[:len(hit_indices)] == hit_indices:
1422
# The 'hit_indices' are already at the front (and in the same
1423
# order), no need to re-order
1425
hit_names = self._move_to_front_by_index(hit_indices)
1426
for sibling_idx in self._sibling_indices:
1427
sibling_idx._move_to_front_by_name(hit_names)
1429
def _move_to_front_by_index(self, hit_indices):
1430
"""Core logic for _move_to_front.
1432
Returns a list of names corresponding to the hit_indices param.
1434
indices_info = zip(self._index_names, self._indices)
1435
if 'index' in debug.debug_flags:
1436
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1437
indices_info, hit_indices)
1438
hit_indices_info = []
1440
unhit_indices_info = []
1441
for name, idx in indices_info:
1442
if idx in hit_indices:
1443
info = hit_indices_info
1444
hit_names.append(name)
1446
info = unhit_indices_info
1447
info.append((name, idx))
1448
final_info = hit_indices_info + unhit_indices_info
1449
self._indices = [idx for (name, idx) in final_info]
1450
self._index_names = [name for (name, idx) in final_info]
1451
if 'index' in debug.debug_flags:
1452
mutter('CombinedGraphIndex reordered: %r', self._indices)
1455
def _move_to_front_by_name(self, hit_names):
1456
"""Moves indices named by 'hit_names' to front of the search order, as
1457
described in _move_to_front.
1459
# Translate names to index instances, and then call
1460
# _move_to_front_by_index.
1461
indices_info = zip(self._index_names, self._indices)
1463
for name, idx in indices_info:
1464
if name in hit_names:
1465
hit_indices.append(idx)
1466
self._move_to_front_by_index(hit_indices)
1364
1468
def find_ancestry(self, keys, ref_list_num):
1365
1469
"""Find the complete ancestry for the given set of keys.
1457
1562
' Raising original exception.')
1458
1563
raise exc_type, exc_value, exc_traceback
1565
def set_sibling_indices(self, sibling_combined_graph_indices):
1566
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1568
self._sibling_indices = sibling_combined_graph_indices
1460
1570
def validate(self):
1461
1571
"""Validate that everything in the index can be accessed."""