2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
1 |
# Copyright (C) 2007 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Indexing facilities."""
|
|
18 |
||
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
19 |
__all__ = [ |
20 |
'CombinedGraphIndex', |
|
21 |
'GraphIndex', |
|
22 |
'GraphIndexBuilder', |
|
23 |
'InMemoryGraphIndex', |
|
24 |
]
|
|
2592.1.32
by Robert Collins
Add __all__ to index. |
25 |
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
26 |
from cStringIO import StringIO |
2592.1.12
by Robert Collins
Handle basic node adds. |
27 |
import re |
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
28 |
|
2592.1.5
by Robert Collins
Trivial index reading. |
29 |
from bzrlib import errors |
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
30 |
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
31 |
_OPTION_NODE_REFS = "node_ref_lists=" |
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
32 |
_SIGNATURE = "Bazaar Graph Index 1\n" |
33 |
||
34 |
||
2592.1.14
by Robert Collins
Detect bad reference key values. |
35 |
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]') |
2592.1.12
by Robert Collins
Handle basic node adds. |
36 |
_newline_null_re = re.compile('[\n\0]') |
37 |
||
38 |
||
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
39 |
class GraphIndexBuilder(object): |
2592.1.18
by Robert Collins
Add space to mark absent nodes. |
40 |
"""A builder that can build a GraphIndex.
|
41 |
|
|
42 |
The resulting graph has the structure:
|
|
43 |
|
|
44 |
_SIGNATURE OPTIONS NODES NEWLINE
|
|
45 |
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
|
|
46 |
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
|
|
47 |
NODES := NODE*
|
|
48 |
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
|
|
49 |
KEY := Not-whitespace-utf8
|
|
50 |
ABSENT := 'a'
|
|
2592.1.19
by Robert Collins
Node references are tab separated. |
51 |
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
|
52 |
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
|
|
53 |
REFERENCE := DIGITS ; digits is the byte offset in the index of the
|
|
54 |
; referenced key.
|
|
2592.1.18
by Robert Collins
Add space to mark absent nodes. |
55 |
VALUE := no-newline-no-null-bytes
|
56 |
"""
|
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
57 |
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
58 |
def __init__(self, reference_lists=0): |
59 |
"""Create a GraphIndex builder.
|
|
60 |
||
61 |
:param reference_lists: The number of node references lists for each
|
|
62 |
entry.
|
|
63 |
"""
|
|
64 |
self.reference_lists = reference_lists |
|
2592.1.15
by Robert Collins
Detect duplicate key insertion. |
65 |
self._nodes = {} |
2592.1.12
by Robert Collins
Handle basic node adds. |
66 |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
67 |
def add_node(self, key, value, references=()): |
2592.1.12
by Robert Collins
Handle basic node adds. |
68 |
"""Add a node to the index.
|
69 |
||
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
70 |
:param key: The key. keys must be whitespace-free utf8.
|
2592.1.12
by Robert Collins
Handle basic node adds. |
71 |
:param references: An iterable of iterables of keys. Each is a
|
72 |
reference to another key.
|
|
73 |
:param value: The value to associate with the key. It may be any
|
|
74 |
bytes as long as it does not contain \0 or \n.
|
|
75 |
"""
|
|
2592.1.20
by Robert Collins
Empty keys are invalid. |
76 |
if not key or _whitespace_re.search(key) is not None: |
2592.1.12
by Robert Collins
Handle basic node adds. |
77 |
raise errors.BadIndexKey(key) |
78 |
if _newline_null_re.search(value) is not None: |
|
79 |
raise errors.BadIndexValue(value) |
|
2592.1.13
by Robert Collins
Handle mismatched numbers of reference lists. |
80 |
if len(references) != self.reference_lists: |
81 |
raise errors.BadIndexValue(references) |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
82 |
node_refs = [] |
2592.1.14
by Robert Collins
Detect bad reference key values. |
83 |
for reference_list in references: |
84 |
for reference in reference_list: |
|
85 |
if _whitespace_re.search(reference) is not None: |
|
86 |
raise errors.BadIndexKey(reference) |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
87 |
if reference not in self._nodes: |
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
88 |
self._nodes[reference] = ('a', (), '') |
89 |
node_refs.append(tuple(reference_list)) |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
90 |
if key in self._nodes and self._nodes[key][0] == '': |
2592.1.15
by Robert Collins
Detect duplicate key insertion. |
91 |
raise errors.BadIndexDuplicateKey(key, self) |
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
92 |
self._nodes[key] = ('', tuple(node_refs), value) |
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
93 |
|
2592.1.4
by Robert Collins
Create a GraphIndexBuilder. |
94 |
def finish(self): |
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
95 |
lines = [_SIGNATURE] |
96 |
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n') |
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
97 |
prefix_length = len(lines[0]) + len(lines[1]) |
98 |
# references are byte offsets. To avoid having to do nasty
|
|
99 |
# polynomial work to resolve offsets (references to later in the
|
|
100 |
# file cannot be determined until all the inbetween references have
|
|
101 |
# been calculated too) we pad the offsets with 0's to make them be
|
|
102 |
# of consistent length. Using binary offsets would break the trivial
|
|
103 |
# file parsing.
|
|
104 |
# to calculate the width of zero's needed we do three passes:
|
|
105 |
# one to gather all the non-reference data and the number of references.
|
|
106 |
# one to pad all the data with reference-length and determine entry
|
|
107 |
# addresses.
|
|
108 |
# One to serialise.
|
|
2592.1.40
by Robert Collins
Reverse index ordering - we do not have date prefixed revids. |
109 |
|
110 |
# forward sorted by key. In future we may consider topological sorting,
|
|
111 |
# at the cost of table scans for direct lookup, or a second index for
|
|
112 |
# direct lookup
|
|
113 |
nodes = sorted(self._nodes.items()) |
|
2592.1.42
by Robert Collins
Check the index length is as expected, when we have done preprocessing. |
114 |
# if we do not prepass, we don't know how long it will be up front.
|
115 |
expected_bytes = None |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
116 |
# we only need to pre-pass if we have reference lists at all.
|
117 |
if self.reference_lists: |
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
118 |
key_offset_info = [] |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
119 |
non_ref_bytes = prefix_length |
120 |
total_references = 0 |
|
121 |
# TODO use simple multiplication for the constants in this loop.
|
|
122 |
for key, (absent, references, value) in nodes: |
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
123 |
# record the offset known *so far* for this key:
|
124 |
# the non reference bytes to date, and the total references to
|
|
125 |
# date - saves reaccumulating on the second pass
|
|
126 |
key_offset_info.append((key, non_ref_bytes, total_references)) |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
127 |
# key is literal, value is literal, there are 3 null's, 1 NL
|
128 |
non_ref_bytes += len(key) + len(value) + 3 + 1 |
|
129 |
# one byte for absent if set.
|
|
130 |
if absent: |
|
131 |
non_ref_bytes += 1 |
|
2592.1.36
by Robert Collins
Bugfix incorrect offset generation when an absent record is before a referenced record. |
132 |
elif self.reference_lists: |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
133 |
# (ref_lists -1) tabs
|
134 |
non_ref_bytes += self.reference_lists - 1 |
|
135 |
# (ref-1 cr's per ref_list)
|
|
136 |
for ref_list in references: |
|
137 |
# how many references across the whole file?
|
|
138 |
total_references += len(ref_list) |
|
139 |
# accrue reference separators
|
|
140 |
if ref_list: |
|
141 |
non_ref_bytes += len(ref_list) - 1 |
|
142 |
# how many digits are needed to represent the total byte count?
|
|
143 |
digits = 1 |
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
144 |
possible_total_bytes = non_ref_bytes + total_references*digits |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
145 |
while 10 ** digits < possible_total_bytes: |
146 |
digits += 1 |
|
147 |
possible_total_bytes = non_ref_bytes + total_references*digits |
|
2592.1.42
by Robert Collins
Check the index length is as expected, when we have done preprocessing. |
148 |
expected_bytes = possible_total_bytes + 1 # terminating newline |
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
149 |
# resolve key addresses.
|
150 |
key_addresses = {} |
|
2592.1.41
by Robert Collins
Remove duplication in the index serialisation logic with John's suggestion. |
151 |
for key, non_ref_bytes, total_references in key_offset_info: |
152 |
key_addresses[key] = non_ref_bytes + total_references*digits |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
153 |
# serialise
|
154 |
format_string = '%%0%sd' % digits |
|
155 |
for key, (absent, references, value) in nodes: |
|
2592.1.19
by Robert Collins
Node references are tab separated. |
156 |
flattened_references = [] |
157 |
for ref_list in references: |
|
2592.1.22
by Robert Collins
Node references are byte offsets. |
158 |
ref_addresses = [] |
159 |
for reference in ref_list: |
|
160 |
ref_addresses.append(format_string % key_addresses[reference]) |
|
161 |
flattened_references.append('\r'.join(ref_addresses)) |
|
2592.1.25
by Robert Collins
Fix and tune node offset calculation. |
162 |
lines.append("%s\0%s\0%s\0%s\n" % (key, absent, |
2592.1.19
by Robert Collins
Node references are tab separated. |
163 |
'\t'.join(flattened_references), value)) |
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
164 |
lines.append('\n') |
2592.1.42
by Robert Collins
Check the index length is as expected, when we have done preprocessing. |
165 |
result = StringIO(''.join(lines)) |
166 |
if expected_bytes and len(result.getvalue()) != expected_bytes: |
|
167 |
raise errors.BzrError('Failed index creation. Internal error:' |
|
168 |
' mismatched output length and expected length: %d %d' % |
|
169 |
(len(result.getvalue()), expected_bytes)) |
|
2592.1.6
by Robert Collins
Record the number of node reference lists a particular index has. |
170 |
return StringIO(''.join(lines)) |
2592.1.5
by Robert Collins
Trivial index reading. |
171 |
|
172 |
||
173 |
class GraphIndex(object): |
|
174 |
"""An index for data with embedded graphs.
|
|
2592.1.10
by Robert Collins
Make validate detect node reference parsing errors. |
175 |
|
176 |
The index maps keys to a list of key reference lists, and a value.
|
|
177 |
Each node has the same number of key reference lists. Each key reference
|
|
178 |
list can be empty or an arbitrary length. The value is an opaque NULL
|
|
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
179 |
terminated string without any newlines. The storage of the index is
|
180 |
hidden in the interface: keys and key references are always bytestrings,
|
|
181 |
never the internal representation (e.g. dictionary offsets).
|
|
2592.1.30
by Robert Collins
Absent entries are not yeilded. |
182 |
|
183 |
It is presumed that the index will not be mutated - it is static data.
|
|
2592.1.34
by Robert Collins
Cleanup docs. |
184 |
|
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
185 |
Successive iter_all_entries calls will read the entire index each time.
|
186 |
Additionally, iter_entries calls will read the index linearly until the
|
|
187 |
desired keys are found. XXX: This must be fixed before the index is
|
|
2592.1.34
by Robert Collins
Cleanup docs. |
188 |
suitable for production use. :XXX
|
2592.1.5
by Robert Collins
Trivial index reading. |
189 |
"""
|
190 |
||
191 |
def __init__(self, transport, name): |
|
192 |
"""Open an index called name on transport.
|
|
193 |
||
194 |
:param transport: A bzrlib.transport.Transport.
|
|
195 |
:param name: A path to provide to transport API calls.
|
|
196 |
"""
|
|
197 |
self._transport = transport |
|
198 |
self._name = name |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
199 |
self._nodes = None |
200 |
self._keys_by_offset = None |
|
201 |
||
202 |
def _buffer_all(self): |
|
203 |
"""Buffer all the index data.
|
|
204 |
||
205 |
Mutates self._nodes and self.keys_by_offset.
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
206 |
"""
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
207 |
stream = self._transport.get(self._name) |
208 |
self._read_prefix(stream) |
|
209 |
line_count = 0 |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
210 |
# raw data keyed by offset
|
211 |
self._keys_by_offset = {} |
|
212 |
# ready-to-return key:value or key:value, node_ref_lists
|
|
213 |
self._nodes = {} |
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
214 |
trailers = 0 |
215 |
pos = stream.tell() |
|
216 |
for line in stream.readlines(): |
|
217 |
if line == '\n': |
|
218 |
trailers += 1 |
|
219 |
continue
|
|
2592.1.43
by Robert Collins
Various index tweaks and test clarity from John's review. |
220 |
key, absent, references, value = line.split('\0') |
221 |
value = value[:-1] # remove the newline |
|
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
222 |
ref_lists = [] |
223 |
for ref_string in references.split('\t'): |
|
224 |
ref_lists.append(tuple([ |
|
225 |
int(ref) for ref in ref_string.split('\r') if ref |
|
226 |
]))
|
|
227 |
ref_lists = tuple(ref_lists) |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
228 |
self._keys_by_offset[pos] = (key, absent, ref_lists, value) |
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
229 |
pos += len(line) |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
230 |
for key, absent, references, value in self._keys_by_offset.itervalues(): |
2592.1.30
by Robert Collins
Absent entries are not yeilded. |
231 |
if absent: |
232 |
continue
|
|
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
233 |
# resolve references:
|
234 |
if self.node_ref_lists: |
|
235 |
node_refs = [] |
|
236 |
for ref_list in references: |
|
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
237 |
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list])) |
238 |
self._nodes[key] = (value, tuple(node_refs)) |
|
2592.1.28
by Robert Collins
Basic two pass iter_all_entries. |
239 |
else: |
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
240 |
self._nodes[key] = value |
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
241 |
if trailers != 1: |
242 |
# there must be one line - the empty trailer line.
|
|
243 |
raise errors.BadIndexData(self) |
|
244 |
||
2624.2.2
by Robert Collins
Temporary performance hack for GraphIndex : load the entire index once and only once into ram. |
245 |
def iter_all_entries(self): |
246 |
"""Iterate over all keys within the index.
|
|
247 |
||
248 |
:return: An iterable of (key, value) or (key, value, reference_lists).
|
|
249 |
The former tuple is used when there are no reference lists in the
|
|
250 |
index, making the API compatible with simple key:value index types.
|
|
251 |
There is no defined order for the result iteration - it will be in
|
|
252 |
the most efficient order for the index.
|
|
253 |
"""
|
|
254 |
if self._nodes is None: |
|
255 |
self._buffer_all() |
|
256 |
if self.node_ref_lists: |
|
257 |
for key, (value, node_ref_lists) in self._nodes.iteritems(): |
|
258 |
yield key, value, node_ref_lists |
|
259 |
else: |
|
260 |
for key, value in self._nodes.iteritems(): |
|
261 |
yield key, value |
|
262 |
||
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
263 |
def _read_prefix(self, stream): |
264 |
signature = stream.read(len(self._signature())) |
|
265 |
if not signature == self._signature(): |
|
266 |
raise errors.BadIndexFormatSignature(self._name, GraphIndex) |
|
267 |
options_line = stream.readline() |
|
268 |
if not options_line.startswith(_OPTION_NODE_REFS): |
|
269 |
raise errors.BadIndexOptions(self) |
|
270 |
try: |
|
271 |
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1]) |
|
272 |
except ValueError: |
|
273 |
raise errors.BadIndexOptions(self) |
|
2592.1.5
by Robert Collins
Trivial index reading. |
274 |
|
275 |
def iter_entries(self, keys): |
|
276 |
"""Iterate over keys within the index.
|
|
277 |
||
278 |
:param keys: An iterable providing the keys to be retrieved.
|
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
279 |
:return: An iterable as per iter_all_entries, but restricted to the
|
280 |
keys supplied. No additional keys will be returned, and every
|
|
281 |
key supplied that is in the index will be returned.
|
|
2592.1.5
by Robert Collins
Trivial index reading. |
282 |
"""
|
2592.1.29
by Robert Collins
Basic iter_entries working. |
283 |
keys = set(keys) |
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
284 |
if not keys: |
285 |
return
|
|
2624.2.3
by Robert Collins
Make GraphIndex.iter_entries do hash lookups rather than table scans. |
286 |
if self._nodes is None: |
287 |
self._buffer_all() |
|
288 |
keys = keys.intersection(self._nodes) |
|
289 |
if self.node_ref_lists: |
|
290 |
for key in keys: |
|
291 |
value, node_refs = self._nodes[key] |
|
292 |
yield key, value, node_refs |
|
293 |
else: |
|
294 |
for key in keys: |
|
295 |
yield key, self._nodes[key] |
|
2592.1.7
by Robert Collins
A validate that goes boom. |
296 |
|
2592.1.8
by Robert Collins
Empty files should validate ok. |
297 |
def _signature(self): |
298 |
"""The file signature for this index type."""
|
|
299 |
return _SIGNATURE |
|
300 |
||
2592.1.7
by Robert Collins
A validate that goes boom. |
301 |
def validate(self): |
302 |
"""Validate that everything in the index can be accessed."""
|
|
2592.1.27
by Robert Collins
Test missing end lines with non-empty indices. |
303 |
# iter_all validates completely at the moment, so just do that.
|
304 |
for node in self.iter_all_entries(): |
|
305 |
pass
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
306 |
|
307 |
||
308 |
class CombinedGraphIndex(object): |
|
309 |
"""A GraphIndex made up from smaller GraphIndices.
|
|
310 |
|
|
311 |
The backing indices must implement GraphIndex, and are presumed to be
|
|
312 |
static data.
|
|
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
313 |
|
314 |
Queries against the combined index will be made against the first index,
|
|
315 |
and then the second and so on. The order of index's can thus influence
|
|
316 |
performance significantly. For example, if one index is on local disk and a
|
|
317 |
second on a remote server, the local disk index should be before the other
|
|
318 |
in the index list.
|
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
319 |
"""
|
320 |
||
321 |
def __init__(self, indices): |
|
322 |
"""Create a CombinedGraphIndex backed by indices.
|
|
323 |
||
2592.1.45
by Robert Collins
Tweak documentation as per Aaron's review. |
324 |
:param indices: An ordered list of indices to query for data.
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
325 |
"""
|
326 |
self._indices = indices |
|
2592.1.37
by Robert Collins
Add CombinedGraphIndex.insert_index. |
327 |
|
328 |
def insert_index(self, pos, index): |
|
329 |
"""Insert a new index in the list of indices to query.
|
|
330 |
||
331 |
:param pos: The position to insert the index.
|
|
332 |
:param index: The index to insert.
|
|
333 |
"""
|
|
334 |
self._indices.insert(pos, index) |
|
335 |
||
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
336 |
def iter_all_entries(self): |
337 |
"""Iterate over all keys within the index
|
|
338 |
||
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
339 |
Duplicate keys across child indices are presumed to have the same
|
340 |
value and are only reported once.
|
|
341 |
||
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
342 |
:return: An iterable of (key, reference_lists, value). There is no
|
343 |
defined order for the result iteration - it will be in the most
|
|
344 |
efficient order for the index.
|
|
345 |
"""
|
|
346 |
seen_keys = set() |
|
347 |
for index in self._indices: |
|
348 |
for node in index.iter_all_entries(): |
|
349 |
if node[0] not in seen_keys: |
|
350 |
yield node |
|
351 |
seen_keys.add(node[0]) |
|
352 |
||
353 |
def iter_entries(self, keys): |
|
354 |
"""Iterate over keys within the index.
|
|
355 |
||
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
356 |
Duplicate keys across child indices are presumed to have the same
|
357 |
value and are only reported once.
|
|
358 |
||
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
359 |
:param keys: An iterable providing the keys to be retrieved.
|
360 |
:return: An iterable of (key, reference_lists, value). There is no
|
|
361 |
defined order for the result iteration - it will be in the most
|
|
362 |
efficient order for the index.
|
|
363 |
"""
|
|
364 |
keys = set(keys) |
|
2592.1.39
by Robert Collins
CombinedGraphIndex.iter_entries does not need to see all entries. |
365 |
for index in self._indices: |
2592.1.44
by Robert Collins
Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review. |
366 |
if not keys: |
367 |
return
|
|
2592.1.39
by Robert Collins
CombinedGraphIndex.iter_entries does not need to see all entries. |
368 |
for node in index.iter_entries(keys): |
369 |
keys.remove(node[0]) |
|
2592.1.31
by Robert Collins
Build a combined graph index to use multiple indices at once. |
370 |
yield node |
371 |
||
372 |
def validate(self): |
|
373 |
"""Validate that everything in the index can be accessed."""
|
|
374 |
for index in self._indices: |
|
375 |
index.validate() |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
376 |
|
377 |
||
378 |
class InMemoryGraphIndex(GraphIndexBuilder): |
|
379 |
"""A GraphIndex which operates entirely out of memory and is mutable.
|
|
380 |
||
381 |
This is designed to allow the accumulation of GraphIndex entries during a
|
|
382 |
single write operation, where the accumulated entries need to be immediately
|
|
383 |
available - for example via a CombinedGraphIndex.
|
|
384 |
"""
|
|
385 |
||
386 |
def add_nodes(self, nodes): |
|
387 |
"""Add nodes to the index.
|
|
388 |
||
389 |
:param nodes: An iterable of (key, node_refs, value) entries to add.
|
|
390 |
"""
|
|
2592.3.39
by Robert Collins
Fugly version to remove signatures.kndx |
391 |
if self.reference_lists: |
392 |
for (key, value, node_refs) in nodes: |
|
393 |
self.add_node(key, value, node_refs) |
|
394 |
else: |
|
395 |
for (key, value) in nodes: |
|
396 |
self.add_node(key, value) |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
397 |
|
398 |
def iter_all_entries(self): |
|
399 |
"""Iterate over all keys within the index
|
|
400 |
||
401 |
:return: An iterable of (key, reference_lists, value). There is no
|
|
402 |
defined order for the result iteration - it will be in the most
|
|
403 |
efficient order for the index (in this case dictionary hash order).
|
|
404 |
"""
|
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
405 |
if self.reference_lists: |
406 |
for key, (absent, references, value) in self._nodes.iteritems(): |
|
407 |
if not absent: |
|
408 |
yield key, value, references |
|
409 |
else: |
|
410 |
for key, (absent, references, value) in self._nodes.iteritems(): |
|
411 |
if not absent: |
|
412 |
yield key, value |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
413 |
|
414 |
def iter_entries(self, keys): |
|
415 |
"""Iterate over keys within the index.
|
|
416 |
||
417 |
:param keys: An iterable providing the keys to be retrieved.
|
|
418 |
:return: An iterable of (key, reference_lists, value). There is no
|
|
419 |
defined order for the result iteration - it will be in the most
|
|
420 |
efficient order for the index (keys iteration order in this case).
|
|
421 |
"""
|
|
422 |
keys = set(keys) |
|
2592.1.46
by Robert Collins
Make GraphIndex accept nodes as key, value, references, so that the method |
423 |
if self.reference_lists: |
424 |
for key in keys.intersection(self._nodes): |
|
425 |
node = self._nodes[key] |
|
426 |
if not node[0]: |
|
427 |
yield key, node[2], node[1] |
|
428 |
else: |
|
429 |
for key in keys.intersection(self._nodes): |
|
430 |
node = self._nodes[key] |
|
431 |
if not node[0]: |
|
432 |
yield key, node[2] |
|
2592.1.38
by Robert Collins
Create an InMemoryGraphIndex for temporary indexing. |
433 |
|
434 |
def validate(self): |
|
435 |
"""In memory index's have no known corruption at the moment."""
|