1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Indexing facilities."""
23
'GraphIndexPrefixAdapter',
27
from cStringIO import StringIO
30
from bzrlib.lazy_import import lazy_import
31
lazy_import(globals(), """
32
from bzrlib import trace
33
from bzrlib.trace import mutter
35
from bzrlib import debug, errors
37
_OPTION_KEY_ELEMENTS = "key_elements="
39
_OPTION_NODE_REFS = "node_ref_lists="
40
_SIGNATURE = "Bazaar Graph Index 1\n"
43
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
44
_newline_null_re = re.compile('[\n\0]')
47
class GraphIndexBuilder(object):
48
"""A builder that can build a GraphIndex.
50
The resulting graph has the structure:
52
_SIGNATURE OPTIONS NODES NEWLINE
53
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
54
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
56
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
57
KEY := Not-whitespace-utf8
59
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
60
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
61
REFERENCE := DIGITS ; digits is the byte offset in the index of the
63
VALUE := no-newline-no-null-bytes
66
def __init__(self, reference_lists=0, key_elements=1):
67
"""Create a GraphIndex builder.
69
:param reference_lists: The number of node references lists for each
71
:param key_elements: The number of bytestrings in each key.
73
self.reference_lists = reference_lists
76
self._nodes_by_key = {}
77
self._key_length = key_elements
79
def _check_key(self, key):
80
"""Raise BadIndexKey if key is not a valid key for this index."""
81
if type(key) != tuple:
82
raise errors.BadIndexKey(key)
83
if self._key_length != len(key):
84
raise errors.BadIndexKey(key)
86
if not element or _whitespace_re.search(element) is not None:
87
raise errors.BadIndexKey(element)
89
def add_node(self, key, value, references=()):
90
"""Add a node to the index.
92
:param key: The key. keys are non-empty tuples containing
93
as many whitespace-free utf8 bytestrings as the key length
94
defined for this index.
95
:param references: An iterable of iterables of keys. Each is a
96
reference to another key.
97
:param value: The value to associate with the key. It may be any
98
bytes as long as it does not contain \0 or \n.
101
if _newline_null_re.search(value) is not None:
102
raise errors.BadIndexValue(value)
103
if len(references) != self.reference_lists:
104
raise errors.BadIndexValue(references)
106
for reference_list in references:
107
for reference in reference_list:
108
self._check_key(reference)
109
if reference not in self._nodes:
110
self._nodes[reference] = ('a', (), '')
111
node_refs.append(tuple(reference_list))
112
if key in self._nodes and self._nodes[key][0] == '':
113
raise errors.BadIndexDuplicateKey(key, self)
114
self._nodes[key] = ('', tuple(node_refs), value)
116
if self._key_length > 1:
117
key_dict = self._nodes_by_key
118
if self.reference_lists:
119
key_value = key, value, tuple(node_refs)
121
key_value = key, value
122
# possibly should do this on-demand, but it seems likely it is
124
# For a key of (foo, bar, baz) create
125
# _nodes_by_key[foo][bar][baz] = key_value
126
for subkey in key[:-1]:
127
key_dict = key_dict.setdefault(subkey, {})
128
key_dict[key[-1]] = key_value
132
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
133
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
134
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
135
prefix_length = sum(len(x) for x in lines)
136
# references are byte offsets. To avoid having to do nasty
137
# polynomial work to resolve offsets (references to later in the
138
# file cannot be determined until all the inbetween references have
139
# been calculated too) we pad the offsets with 0's to make them be
140
# of consistent length. Using binary offsets would break the trivial
142
# to calculate the width of zero's needed we do three passes:
143
# one to gather all the non-reference data and the number of references.
144
# one to pad all the data with reference-length and determine entry
148
# forward sorted by key. In future we may consider topological sorting,
149
# at the cost of table scans for direct lookup, or a second index for
151
nodes = sorted(self._nodes.items())
152
# if we do not prepass, we don't know how long it will be up front.
153
expected_bytes = None
154
# we only need to pre-pass if we have reference lists at all.
155
if self.reference_lists:
157
non_ref_bytes = prefix_length
159
# TODO use simple multiplication for the constants in this loop.
160
for key, (absent, references, value) in nodes:
161
# record the offset known *so far* for this key:
162
# the non reference bytes to date, and the total references to
163
# date - saves reaccumulating on the second pass
164
key_offset_info.append((key, non_ref_bytes, total_references))
165
# key is literal, value is literal, there are 3 null's, 1 NL
166
# key is variable length tuple, \x00 between elements
167
non_ref_bytes += sum(len(element) for element in key)
168
if self._key_length > 1:
169
non_ref_bytes += self._key_length - 1
170
# value is literal bytes, there are 3 null's, 1 NL.
171
non_ref_bytes += len(value) + 3 + 1
172
# one byte for absent if set.
175
elif self.reference_lists:
176
# (ref_lists -1) tabs
177
non_ref_bytes += self.reference_lists - 1
178
# (ref-1 cr's per ref_list)
179
for ref_list in references:
180
# how many references across the whole file?
181
total_references += len(ref_list)
182
# accrue reference separators
184
non_ref_bytes += len(ref_list) - 1
185
# how many digits are needed to represent the total byte count?
187
possible_total_bytes = non_ref_bytes + total_references*digits
188
while 10 ** digits < possible_total_bytes:
190
possible_total_bytes = non_ref_bytes + total_references*digits
191
expected_bytes = possible_total_bytes + 1 # terminating newline
192
# resolve key addresses.
194
for key, non_ref_bytes, total_references in key_offset_info:
195
key_addresses[key] = non_ref_bytes + total_references*digits
197
format_string = '%%0%sd' % digits
198
for key, (absent, references, value) in nodes:
199
flattened_references = []
200
for ref_list in references:
202
for reference in ref_list:
203
ref_addresses.append(format_string % key_addresses[reference])
204
flattened_references.append('\r'.join(ref_addresses))
205
string_key = '\x00'.join(key)
206
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
207
'\t'.join(flattened_references), value))
209
result = StringIO(''.join(lines))
210
if expected_bytes and len(result.getvalue()) != expected_bytes:
211
raise errors.BzrError('Failed index creation. Internal error:'
212
' mismatched output length and expected length: %d %d' %
213
(len(result.getvalue()), expected_bytes))
214
return StringIO(''.join(lines))
217
class GraphIndex(object):
218
"""An index for data with embedded graphs.
220
The index maps keys to a list of key reference lists, and a value.
221
Each node has the same number of key reference lists. Each key reference
222
list can be empty or an arbitrary length. The value is an opaque NULL
223
terminated string without any newlines. The storage of the index is
224
hidden in the interface: keys and key references are always tuples of
225
bytestrings, never the internal representation (e.g. dictionary offsets).
227
It is presumed that the index will not be mutated - it is static data.
229
Successive iter_all_entries calls will read the entire index each time.
230
Additionally, iter_entries calls will read the index linearly until the
231
desired keys are found. XXX: This must be fixed before the index is
232
suitable for production use. :XXX
235
def __init__(self, transport, name):
236
"""Open an index called name on transport.
238
:param transport: A bzrlib.transport.Transport.
239
:param name: A path to provide to transport API calls.
241
self._transport = transport
244
self._key_count = None
245
self._keys_by_offset = None
246
self._nodes_by_key = None
248
def _buffer_all(self):
249
"""Buffer all the index data.
251
Mutates self._nodes and self.keys_by_offset.
253
if 'index' in debug.debug_flags:
254
mutter('Reading entire index %s', self._transport.abspath(self._name))
255
stream = self._transport.get(self._name)
256
self._read_prefix(stream)
257
expected_elements = 3 + self._key_length
259
# raw data keyed by offset
260
self._keys_by_offset = {}
261
# ready-to-return key:value or key:value, node_ref_lists
263
self._nodes_by_key = {}
266
for line in stream.readlines():
270
elements = line.split('\0')
271
if len(elements) != expected_elements:
272
raise errors.BadIndexData(self)
274
key = tuple(elements[:self._key_length])
275
absent, references, value = elements[-3:]
276
value = value[:-1] # remove the newline
278
for ref_string in references.split('\t'):
279
ref_lists.append(tuple([
280
int(ref) for ref in ref_string.split('\r') if ref
282
ref_lists = tuple(ref_lists)
283
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
285
for key, absent, references, value in self._keys_by_offset.itervalues():
288
# resolve references:
289
if self.node_ref_lists:
291
for ref_list in references:
292
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
293
node_value = (value, tuple(node_refs))
296
self._nodes[key] = node_value
297
if self._key_length > 1:
298
subkey = list(reversed(key[:-1]))
299
key_dict = self._nodes_by_key
300
if self.node_ref_lists:
301
key_value = key, node_value[0], node_value[1]
303
key_value = key, node_value
304
# possibly should do this on-demand, but it seems likely it is
306
# For a key of (foo, bar, baz) create
307
# _nodes_by_key[foo][bar][baz] = key_value
308
for subkey in key[:-1]:
309
key_dict = key_dict.setdefault(subkey, {})
310
key_dict[key[-1]] = key_value
311
# cache the keys for quick set intersections
312
self._keys = set(self._nodes)
314
# there must be one line - the empty trailer line.
315
raise errors.BadIndexData(self)
317
def iter_all_entries(self):
318
"""Iterate over all keys within the index.
320
:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
321
The former tuple is used when there are no reference lists in the
322
index, making the API compatible with simple key:value index types.
323
There is no defined order for the result iteration - it will be in
324
the most efficient order for the index.
326
if 'evil' in debug.debug_flags:
327
trace.mutter_callsite(3,
328
"iter_all_entries scales with size of history.")
329
if self._nodes is None:
331
if self.node_ref_lists:
332
for key, (value, node_ref_lists) in self._nodes.iteritems():
333
yield self, key, value, node_ref_lists
335
for key, value in self._nodes.iteritems():
336
yield self, key, value
338
def _read_prefix(self, stream):
339
signature = stream.read(len(self._signature()))
340
if not signature == self._signature():
341
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
342
options_line = stream.readline()
343
if not options_line.startswith(_OPTION_NODE_REFS):
344
raise errors.BadIndexOptions(self)
346
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
348
raise errors.BadIndexOptions(self)
349
options_line = stream.readline()
350
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
351
raise errors.BadIndexOptions(self)
353
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
355
raise errors.BadIndexOptions(self)
356
options_line = stream.readline()
357
if not options_line.startswith(_OPTION_LEN):
358
raise errors.BadIndexOptions(self)
360
self._key_count = int(options_line[len(_OPTION_LEN):-1])
362
raise errors.BadIndexOptions(self)
364
def iter_entries(self, keys):
365
"""Iterate over keys within the index.
367
:param keys: An iterable providing the keys to be retrieved.
368
:return: An iterable as per iter_all_entries, but restricted to the
369
keys supplied. No additional keys will be returned, and every
370
key supplied that is in the index will be returned.
375
if self._nodes is None:
377
keys = keys.intersection(self._keys)
378
if self.node_ref_lists:
380
value, node_refs = self._nodes[key]
381
yield self, key, value, node_refs
384
yield self, key, self._nodes[key]
386
def iter_entries_prefix(self, keys):
387
"""Iterate over keys within the index using prefix matching.
389
Prefix matching is applied within the tuple of a key, not to within
390
the bytestring of each key element. e.g. if you have the keys ('foo',
391
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
392
only the former key is returned.
394
:param keys: An iterable providing the key prefixes to be retrieved.
395
Each key prefix takes the form of a tuple the length of a key, but
396
with the last N elements 'None' rather than a regular bytestring.
397
The first element cannot be 'None'.
398
:return: An iterable as per iter_all_entries, but restricted to the
399
keys with a matching prefix to those supplied. No additional keys
400
will be returned, and every match that is in the index will be
406
# load data - also finds key lengths
407
if self._nodes is None:
409
if self._key_length == 1:
413
raise errors.BadIndexKey(key)
414
if len(key) != self._key_length:
415
raise errors.BadIndexKey(key)
416
if self.node_ref_lists:
417
value, node_refs = self._nodes[key]
418
yield self, key, value, node_refs
420
yield self, key, self._nodes[key]
425
raise errors.BadIndexKey(key)
426
if len(key) != self._key_length:
427
raise errors.BadIndexKey(key)
428
# find what it refers to:
429
key_dict = self._nodes_by_key
431
# find the subdict whose contents should be returned.
433
while len(elements) and elements[0] is not None:
434
key_dict = key_dict[elements[0]]
437
# a non-existant lookup.
442
key_dict = dicts.pop(-1)
443
# can't be empty or would not exist
444
item, value = key_dict.iteritems().next()
445
if type(value) == dict:
447
dicts.extend(key_dict.itervalues())
450
for value in key_dict.itervalues():
451
# each value is the key:value:node refs tuple
453
yield (self, ) + value
455
# the last thing looked up was a terminal element
456
yield (self, ) + key_dict
459
"""Return an estimate of the number of keys in this index.
461
For GraphIndex the estimate is exact.
463
if self._key_count is None:
464
# really this should just read the prefix
466
return self._key_count
468
def _signature(self):
469
"""The file signature for this index type."""
473
"""Validate that everything in the index can be accessed."""
474
# iter_all validates completely at the moment, so just do that.
475
for node in self.iter_all_entries():
479
class CombinedGraphIndex(object):
480
"""A GraphIndex made up from smaller GraphIndices.
482
The backing indices must implement GraphIndex, and are presumed to be
485
Queries against the combined index will be made against the first index,
486
and then the second and so on. The order of index's can thus influence
487
performance significantly. For example, if one index is on local disk and a
488
second on a remote server, the local disk index should be before the other
492
def __init__(self, indices):
493
"""Create a CombinedGraphIndex backed by indices.
495
:param indices: An ordered list of indices to query for data.
497
self._indices = indices
499
def insert_index(self, pos, index):
500
"""Insert a new index in the list of indices to query.
502
:param pos: The position to insert the index.
503
:param index: The index to insert.
505
self._indices.insert(pos, index)
507
def iter_all_entries(self):
508
"""Iterate over all keys within the index
510
Duplicate keys across child indices are presumed to have the same
511
value and are only reported once.
513
:return: An iterable of (index, key, reference_lists, value).
514
There is no defined order for the result iteration - it will be in
515
the most efficient order for the index.
518
for index in self._indices:
519
for node in index.iter_all_entries():
520
if node[1] not in seen_keys:
522
seen_keys.add(node[1])
524
def iter_entries(self, keys):
525
"""Iterate over keys within the index.
527
Duplicate keys across child indices are presumed to have the same
528
value and are only reported once.
530
:param keys: An iterable providing the keys to be retrieved.
531
:return: An iterable of (index, key, reference_lists, value). There is no
532
defined order for the result iteration - it will be in the most
533
efficient order for the index.
536
for index in self._indices:
539
for node in index.iter_entries(keys):
543
def iter_entries_prefix(self, keys):
544
"""Iterate over keys within the index using prefix matching.
546
Duplicate keys across child indices are presumed to have the same
547
value and are only reported once.
549
Prefix matching is applied within the tuple of a key, not to within
550
the bytestring of each key element. e.g. if you have the keys ('foo',
551
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
552
only the former key is returned.
554
:param keys: An iterable providing the key prefixes to be retrieved.
555
Each key prefix takes the form of a tuple the length of a key, but
556
with the last N elements 'None' rather than a regular bytestring.
557
The first element cannot be 'None'.
558
:return: An iterable as per iter_all_entries, but restricted to the
559
keys with a matching prefix to those supplied. No additional keys
560
will be returned, and every match that is in the index will be
567
for index in self._indices:
568
for node in index.iter_entries_prefix(keys):
569
if node[1] in seen_keys:
571
seen_keys.add(node[1])
575
"""Return an estimate of the number of keys in this index.
577
For CombinedGraphIndex this is approximated by the sum of the keys of
578
the child indices. As child indices may have duplicate keys this can
579
have a maximum error of the number of child indices * largest number of
582
return sum((index.key_count() for index in self._indices), 0)
585
"""Validate that everything in the index can be accessed."""
586
for index in self._indices:
590
class InMemoryGraphIndex(GraphIndexBuilder):
591
"""A GraphIndex which operates entirely out of memory and is mutable.
593
This is designed to allow the accumulation of GraphIndex entries during a
594
single write operation, where the accumulated entries need to be immediately
595
available - for example via a CombinedGraphIndex.
598
def add_nodes(self, nodes):
599
"""Add nodes to the index.
601
:param nodes: An iterable of (key, node_refs, value) entries to add.
603
if self.reference_lists:
604
for (key, value, node_refs) in nodes:
605
self.add_node(key, value, node_refs)
607
for (key, value) in nodes:
608
self.add_node(key, value)
610
def iter_all_entries(self):
611
"""Iterate over all keys within the index
613
:return: An iterable of (index, key, reference_lists, value). There is no
614
defined order for the result iteration - it will be in the most
615
efficient order for the index (in this case dictionary hash order).
617
if 'evil' in debug.debug_flags:
618
trace.mutter_callsite(3,
619
"iter_all_entries scales with size of history.")
620
if self.reference_lists:
621
for key, (absent, references, value) in self._nodes.iteritems():
623
yield self, key, value, references
625
for key, (absent, references, value) in self._nodes.iteritems():
627
yield self, key, value
629
def iter_entries(self, keys):
630
"""Iterate over keys within the index.
632
:param keys: An iterable providing the keys to be retrieved.
633
:return: An iterable of (index, key, reference_lists, value). There is no
634
defined order for the result iteration - it will be in the most
635
efficient order for the index (keys iteration order in this case).
638
if self.reference_lists:
639
for key in keys.intersection(self._keys):
640
node = self._nodes[key]
642
yield self, key, node[2], node[1]
644
for key in keys.intersection(self._keys):
645
node = self._nodes[key]
647
yield self, key, node[2]
649
def iter_entries_prefix(self, keys):
650
"""Iterate over keys within the index using prefix matching.
652
Prefix matching is applied within the tuple of a key, not to within
653
the bytestring of each key element. e.g. if you have the keys ('foo',
654
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
655
only the former key is returned.
657
:param keys: An iterable providing the key prefixes to be retrieved.
658
Each key prefix takes the form of a tuple the length of a key, but
659
with the last N elements 'None' rather than a regular bytestring.
660
The first element cannot be 'None'.
661
:return: An iterable as per iter_all_entries, but restricted to the
662
keys with a matching prefix to those supplied. No additional keys
663
will be returned, and every match that is in the index will be
666
# XXX: To much duplication with the GraphIndex class; consider finding
667
# a good place to pull out the actual common logic.
671
if self._key_length == 1:
675
raise errors.BadIndexKey(key)
676
if len(key) != self._key_length:
677
raise errors.BadIndexKey(key)
678
node = self._nodes[key]
681
if self.reference_lists:
682
yield self, key, node[2], node[1]
684
yield self, key, node[2]
689
raise errors.BadIndexKey(key)
690
if len(key) != self._key_length:
691
raise errors.BadIndexKey(key)
692
# find what it refers to:
693
key_dict = self._nodes_by_key
695
# find the subdict to return
697
while len(elements) and elements[0] is not None:
698
key_dict = key_dict[elements[0]]
701
# a non-existant lookup.
706
key_dict = dicts.pop(-1)
707
# can't be empty or would not exist
708
item, value = key_dict.iteritems().next()
709
if type(value) == dict:
711
dicts.extend(key_dict.itervalues())
714
for value in key_dict.itervalues():
715
yield (self, ) + value
717
yield (self, ) + key_dict
720
"""Return an estimate of the number of keys in this index.
722
For InMemoryGraphIndex the estimate is exact.
724
return len(self._keys)
727
"""In memory index's have no known corruption at the moment."""
730
class GraphIndexPrefixAdapter(object):
731
"""An adapter between GraphIndex with different key lengths.
733
Queries against this will emit queries against the adapted Graph with the
734
prefix added, queries for all items use iter_entries_prefix. The returned
735
nodes will have their keys and node references adjusted to remove the
736
prefix. Finally, an add_nodes_callback can be supplied - when called the
737
nodes and references being added will have prefix prepended.
740
def __init__(self, adapted, prefix, missing_key_length,
741
add_nodes_callback=None):
742
"""Construct an adapter against adapted with prefix."""
743
self.adapted = adapted
744
self.prefix_key = prefix + (None,)*missing_key_length
746
self.prefix_len = len(prefix)
747
self.add_nodes_callback = add_nodes_callback
749
def add_nodes(self, nodes):
750
"""Add nodes to the index.
752
:param nodes: An iterable of (key, node_refs, value) entries to add.
754
# save nodes in case its an iterator
756
translated_nodes = []
758
# Add prefix_key to each reference node_refs is a tuple of tuples,
759
# so split it apart, and add prefix_key to the internal reference
760
for (key, value, node_refs) in nodes:
761
adjusted_references = (
762
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
763
for ref_list in node_refs))
764
translated_nodes.append((self.prefix + key, value,
765
adjusted_references))
767
# XXX: TODO add an explicit interface for getting the reference list
768
# status, to handle this bit of user-friendliness in the API more
770
for (key, value) in nodes:
771
translated_nodes.append((self.prefix + key, value))
772
self.add_nodes_callback(translated_nodes)
774
def add_node(self, key, value, references=()):
775
"""Add a node to the index.
777
:param key: The key. keys are non-empty tuples containing
778
as many whitespace-free utf8 bytestrings as the key length
779
defined for this index.
780
:param references: An iterable of iterables of keys. Each is a
781
reference to another key.
782
:param value: The value to associate with the key. It may be any
783
bytes as long as it does not contain \0 or \n.
785
self.add_nodes(((key, value, references), ))
787
def _strip_prefix(self, an_iter):
788
"""Strip prefix data from nodes and return it."""
791
if node[1][:self.prefix_len] != self.prefix:
792
raise errors.BadIndexData(self)
793
for ref_list in node[3]:
794
for ref_node in ref_list:
795
if ref_node[:self.prefix_len] != self.prefix:
796
raise errors.BadIndexData(self)
797
yield node[0], node[1][self.prefix_len:], node[2], (
798
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
799
for ref_list in node[3]))
801
def iter_all_entries(self):
802
"""Iterate over all keys within the index
804
iter_all_entries is implemented against the adapted index using
807
:return: An iterable of (index, key, reference_lists, value). There is no
808
defined order for the result iteration - it will be in the most
809
efficient order for the index (in this case dictionary hash order).
811
return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))
813
def iter_entries(self, keys):
814
"""Iterate over keys within the index.
816
:param keys: An iterable providing the keys to be retrieved.
817
:return: An iterable of (key, reference_lists, value). There is no
818
defined order for the result iteration - it will be in the most
819
efficient order for the index (keys iteration order in this case).
821
return self._strip_prefix(self.adapted.iter_entries(
822
self.prefix + key for key in keys))
824
def iter_entries_prefix(self, keys):
825
"""Iterate over keys within the index using prefix matching.
827
Prefix matching is applied within the tuple of a key, not to within
828
the bytestring of each key element. e.g. if you have the keys ('foo',
829
'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
830
only the former key is returned.
832
:param keys: An iterable providing the key prefixes to be retrieved.
833
Each key prefix takes the form of a tuple the length of a key, but
834
with the last N elements 'None' rather than a regular bytestring.
835
The first element cannot be 'None'.
836
:return: An iterable as per iter_all_entries, but restricted to the
837
keys with a matching prefix to those supplied. No additional keys
838
will be returned, and every match that is in the index will be
841
return self._strip_prefix(self.adapted.iter_entries_prefix(
842
self.prefix + key for key in keys))
845
"""Return an estimate of the number of keys in this index.
847
For GraphIndexPrefixAdapter this is relatively expensive - key
848
iteration with the prefix is done.
850
return len(list(self.iter_all_entries()))
853
"""Call the adapted's validate."""
854
self.adapted.validate()