13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
17
"""Indexing facilities."""
52
52
_newline_null_re = re.compile('[\n\0]')
55
def _has_key_from_parent_map(self, key):
56
"""Check if this index has one key.
58
If it's possible to check for multiple keys at once through
59
calling get_parent_map that should be faster.
61
return (key in self.get_parent_map([key]))
64
def _missing_keys_from_parent_map(self, keys):
65
return set(keys) - set(self.get_parent_map(keys))
68
55
class GraphIndexBuilder(object):
69
56
"""A builder that can build a GraphIndex.
71
58
The resulting graph has the structure:
73
60
_SIGNATURE OPTIONS NODES NEWLINE
74
61
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
75
62
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
98
85
self._nodes_by_key = None
99
86
self._key_length = key_elements
100
self._optimize_for_size = False
101
self._combine_backing_indices = True
103
88
def _check_key(self, key):
104
89
"""Raise BadIndexKey if key is not a valid key for this index."""
110
95
if not element or _whitespace_re.search(element) is not None:
111
96
raise errors.BadIndexKey(element)
113
def _external_references(self):
114
"""Return references that are not present in this index.
118
# TODO: JAM 2008-11-21 This makes an assumption about how the reference
119
# lists are used. It is currently correct for pack-0.92 through
120
# 1.9, which use the node references (3rd column) second
121
# reference list as the compression parent. Perhaps this should
122
# be moved into something higher up the stack, since it
123
# makes assumptions about how the index is used.
124
if self.reference_lists > 1:
125
for node in self.iter_all_entries():
127
refs.update(node[3][1])
130
# If reference_lists == 0 there can be no external references, and
131
# if reference_lists == 1, then there isn't a place to store the
135
98
def _get_nodes_by_key(self):
136
99
if self._nodes_by_key is None:
137
100
nodes_by_key = {}
315
278
(len(result.getvalue()), expected_bytes))
318
def set_optimize(self, for_size=None, combine_backing_indices=None):
319
"""Change how the builder tries to optimize the result.
321
:param for_size: Tell the builder to try and make the index as small as
323
:param combine_backing_indices: If the builder spills to disk to save
324
memory, should the on-disk indices be combined. Set to True if you
325
are going to be probing the index, but to False if you are not. (If
326
you are not querying, then the time spent combining is wasted.)
329
# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
331
if for_size is not None:
332
self._optimize_for_size = for_size
333
if combine_backing_indices is not None:
334
self._combine_backing_indices = combine_backing_indices
337
282
class GraphIndex(object):
338
283
"""An index for data with embedded graphs.
340
285
The index maps keys to a list of key reference lists, and a value.
341
286
Each node has the same number of key reference lists. Each key reference
342
287
list can be empty or an arbitrary length. The value is an opaque NULL
343
terminated string without any newlines. The storage of the index is
288
terminated string without any newlines. The storage of the index is
344
289
hidden in the interface: keys and key references are always tuples of
345
290
bytestrings, never the internal representation (e.g. dictionary offsets).
437
382
node_value = value
438
383
self._nodes[key] = node_value
384
if self._key_length > 1:
385
# TODO: We may want to do this lazily, but if we are calling
386
# _buffer_all, we are likely to be doing
387
# iter_entries_prefix
388
key_dict = self._nodes_by_key
389
if self.node_ref_lists:
390
key_value = key, node_value[0], node_value[1]
392
key_value = key, node_value
393
# For a key of (foo, bar, baz) create
394
# _nodes_by_key[foo][bar][baz] = key_value
395
for subkey in key[:-1]:
396
key_dict = key_dict.setdefault(subkey, {})
397
key_dict[key[-1]] = key_value
439
398
# cache the keys for quick set intersections
440
399
self._keys = set(self._nodes)
441
400
if trailers != 1:
442
401
# there must be one line - the empty trailer line.
443
402
raise errors.BadIndexData(self)
445
def external_references(self, ref_list_num):
446
"""Return references that are not present in this index.
449
if ref_list_num + 1 > self.node_ref_lists:
450
raise ValueError('No ref list %d, index has %d ref lists'
451
% (ref_list_num, self.node_ref_lists))
453
for key, (value, ref_lists) in self._nodes.iteritems():
454
ref_list = ref_lists[ref_list_num]
455
refs.update(ref_list)
456
return refs - self._keys
458
def _get_nodes_by_key(self):
459
if self._nodes_by_key is None:
461
if self.node_ref_lists:
462
for key, (value, references) in self._nodes.iteritems():
463
key_dict = nodes_by_key
464
for subkey in key[:-1]:
465
key_dict = key_dict.setdefault(subkey, {})
466
key_dict[key[-1]] = key, value, references
468
for key, value in self._nodes.iteritems():
469
key_dict = nodes_by_key
470
for subkey in key[:-1]:
471
key_dict = key_dict.setdefault(subkey, {})
472
key_dict[key[-1]] = key, value
473
self._nodes_by_key = nodes_by_key
474
return self._nodes_by_key
476
404
def iter_all_entries(self):
477
405
"""Iterate over all keys within the index.
523
451
def _resolve_references(self, references):
524
452
"""Return the resolved key references for references.
526
454
References are resolved by looking up the location of the key in the
527
455
_keys_by_offset map and substituting the key name, preserving ordering.
529
:param references: An iterable of iterables of key locations. e.g.
457
:param references: An iterable of iterables of key locations. e.g.
530
458
[[123, 456], [123]]
531
459
:return: A tuple of tuples of keys.
741
668
# We have the key parsed.
743
670
index = self._parsed_key_index(key)
744
if (len(self._parsed_key_map) and
671
if (len(self._parsed_key_map) and
745
672
self._parsed_key_map[index][0] <= key and
746
673
(self._parsed_key_map[index][1] >= key or
747
674
# end of the file has been parsed
752
679
# - if we have examined this part of the file already - yes
753
680
index = self._parsed_byte_index(location)
754
if (len(self._parsed_byte_map) and
681
if (len(self._parsed_byte_map) and
755
682
self._parsed_byte_map[index][0] <= location and
756
683
self._parsed_byte_map[index][1] > location):
757
684
# the byte region has been parsed, so no read is needed.
1012
939
# adjust offset and data to the parseable data.
1013
940
trimmed_data = data[trim_start:trim_end]
1014
941
if not (trimmed_data):
1015
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
942
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
1016
943
% (trim_start, trim_end, offset, offset + len(data)))
1018
945
offset += trim_start
1046
973
raise errors.BadIndexData(self)
1047
974
# keys are tuples. Each element is a string that may occur many
1048
975
# times, so we intern them to save space. AB, RC, 200807
1049
key = tuple([intern(element) for element in elements[:self._key_length]])
976
key = tuple(intern(element) for element in elements[:self._key_length])
1050
977
if first_key is None:
1052
979
absent, references, value = elements[-3:]
1174
1101
in the index list.
1177
def __init__(self, indices, reload_func=None):
1104
def __init__(self, indices):
1178
1105
"""Create a CombinedGraphIndex backed by indices.
1180
1107
:param indices: An ordered list of indices to query for data.
1181
:param reload_func: A function to call if we find we are missing an
1182
index. Should have the form reload_func() => True/False to indicate
1183
if reloading actually changed anything.
1185
1109
self._indices = indices
1186
self._reload_func = reload_func
1188
1111
def __repr__(self):
1189
1112
return "%s(%s)" % (
1190
1113
self.__class__.__name__,
1191
1114
', '.join(map(repr, self._indices)))
1116
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1117
def get_parents(self, revision_ids):
1118
"""See graph._StackedParentsProvider.get_parents.
1120
This implementation thunks the graph.Graph.get_parents api across to
1123
:param revision_ids: An iterable of graph keys for this graph.
1124
:return: A list of parent details for each key in revision_ids.
1125
Each parent details will be one of:
1126
* None when the key was missing
1127
* (NULL_REVISION,) when the key has no parents.
1128
* (parent_key, parent_key...) otherwise.
1130
parent_map = self.get_parent_map(revision_ids)
1131
return [parent_map.get(r, None) for r in revision_ids]
1193
1133
def get_parent_map(self, keys):
1194
"""See graph.StackedParentsProvider.get_parent_map"""
1134
"""See graph._StackedParentsProvider.get_parent_map"""
1195
1135
search_keys = set(keys)
1196
1136
if NULL_REVISION in search_keys:
1197
1137
search_keys.discard(NULL_REVISION)
1226
1164
the most efficient order for the index.
1228
1166
seen_keys = set()
1231
for index in self._indices:
1232
for node in index.iter_all_entries():
1233
if node[1] not in seen_keys:
1235
seen_keys.add(node[1])
1237
except errors.NoSuchFile:
1238
self._reload_or_raise()
1167
for index in self._indices:
1168
for node in index.iter_all_entries():
1169
if node[1] not in seen_keys:
1171
seen_keys.add(node[1])
1240
1173
def iter_entries(self, keys):
1241
1174
"""Iterate over keys within the index.
1249
1182
efficient order for the index.
1251
1184
keys = set(keys)
1254
for index in self._indices:
1257
for node in index.iter_entries(keys):
1258
keys.remove(node[1])
1185
for index in self._indices:
1261
except errors.NoSuchFile:
1262
self._reload_or_raise()
1188
for node in index.iter_entries(keys):
1189
keys.remove(node[1])
1264
1192
def iter_entries_prefix(self, keys):
1265
1193
"""Iterate over keys within the index using prefix matching.
1287
1215
seen_keys = set()
1290
for index in self._indices:
1291
for node in index.iter_entries_prefix(keys):
1292
if node[1] in seen_keys:
1294
seen_keys.add(node[1])
1297
except errors.NoSuchFile:
1298
self._reload_or_raise()
1216
for index in self._indices:
1217
for node in index.iter_entries_prefix(keys):
1218
if node[1] in seen_keys:
1220
seen_keys.add(node[1])
1300
1223
def key_count(self):
1301
1224
"""Return an estimate of the number of keys in this index.
1303
1226
For CombinedGraphIndex this is approximated by the sum of the keys of
1304
1227
the child indices. As child indices may have duplicate keys this can
1305
1228
have a maximum error of the number of child indices * largest number of
1306
1229
keys in any index.
1310
return sum((index.key_count() for index in self._indices), 0)
1311
except errors.NoSuchFile:
1312
self._reload_or_raise()
1314
missing_keys = _missing_keys_from_parent_map
1316
def _reload_or_raise(self):
1317
"""We just got a NoSuchFile exception.
1319
Try to reload the indices, if it fails, just raise the current
1322
if self._reload_func is None:
1324
exc_type, exc_value, exc_traceback = sys.exc_info()
1325
trace.mutter('Trying to reload after getting exception: %s',
1327
if not self._reload_func():
1328
# We tried to reload, but nothing changed, so we fail anyway
1329
trace.mutter('_reload_func indicated nothing has changed.'
1330
' Raising original exception.')
1331
raise exc_type, exc_value, exc_traceback
1231
return sum((index.key_count() for index in self._indices), 0)
1333
1233
def validate(self):
1334
1234
"""Validate that everything in the index can be accessed."""
1337
for index in self._indices:
1340
except errors.NoSuchFile:
1341
self._reload_or_raise()
1235
for index in self._indices:
1344
1239
class InMemoryGraphIndex(GraphIndexBuilder):
1488
1383
Queries against this will emit queries against the adapted Graph with the
1489
1384
prefix added, queries for all items use iter_entries_prefix. The returned
1490
nodes will have their keys and node references adjusted to remove the
1385
nodes will have their keys and node references adjusted to remove the
1491
1386
prefix. Finally, an add_nodes_callback can be supplied - when called the
1492
1387
nodes and references being added will have prefix prepended.
1520
1415
adjusted_references))
1521
1416
except ValueError:
1522
1417
# XXX: TODO add an explicit interface for getting the reference list
1523
# status, to handle this bit of user-friendliness in the API more
1418
# status, to handle this bit of user-friendliness in the API more
1525
1420
for (key, value) in nodes:
1526
1421
translated_nodes.append((self.prefix + key, value))