94
95
if not element or _whitespace_re.search(element) is not None:
95
96
raise errors.BadIndexKey(element)
97
def add_node(self, key, value, references=()):
98
"""Add a node to the index.
100
:param key: The key. keys are non-empty tuples containing
101
as many whitespace-free utf8 bytestrings as the key length
102
defined for this index.
103
:param references: An iterable of iterables of keys. Each is a
104
reference to another key.
105
:param value: The value to associate with the key. It may be any
106
bytes as long as it does not contain \0 or \n.
98
def _get_nodes_by_key(self):
99
if self._nodes_by_key is None:
101
if self.reference_lists:
102
for key, (absent, references, value) in self._nodes.iteritems():
105
key_dict = nodes_by_key
106
for subkey in key[:-1]:
107
key_dict = key_dict.setdefault(subkey, {})
108
key_dict[key[-1]] = key, value, references
110
for key, (absent, references, value) in self._nodes.iteritems():
113
key_dict = nodes_by_key
114
for subkey in key[:-1]:
115
key_dict = key_dict.setdefault(subkey, {})
116
key_dict[key[-1]] = key, value
117
self._nodes_by_key = nodes_by_key
118
return self._nodes_by_key
120
def _update_nodes_by_key(self, key, value, node_refs):
121
"""Update the _nodes_by_key dict with a new key.
123
For a key of (foo, bar, baz) create
124
_nodes_by_key[foo][bar][baz] = key_value
126
if self._nodes_by_key is None:
128
key_dict = self._nodes_by_key
129
if self.reference_lists:
130
key_value = key, value, node_refs
132
key_value = key, value
133
for subkey in key[:-1]:
134
key_dict = key_dict.setdefault(subkey, {})
135
key_dict[key[-1]] = key_value
137
def _check_key_ref_value(self, key, references, value):
138
"""Check that 'key' and 'references' are all valid.
140
:param key: A key tuple. Must conform to the key interface (be a tuple,
141
be of the right length, not have any whitespace or nulls in any key
143
:param references: An iterable of reference lists. Something like
144
[[(ref, key)], [(ref, key), (other, key)]]
145
:param value: The value associate with this key. Must not contain
146
newlines or null characters.
147
:return: (node_refs, absent_references)
148
node_refs basically a packed form of 'references' where all
150
absent_references reference keys that are not in self._nodes.
151
This may contain duplicates if the same key is
152
referenced in multiple lists.
108
154
self._check_key(key)
109
155
if _newline_null_re.search(value) is not None:
111
157
if len(references) != self.reference_lists:
112
158
raise errors.BadIndexValue(references)
160
absent_references = []
114
161
for reference_list in references:
115
162
for reference in reference_list:
116
self._check_key(reference)
163
# If reference *is* in self._nodes, then we know it has already
117
165
if reference not in self._nodes:
118
self._nodes[reference] = ('a', (), '')
166
self._check_key(reference)
167
absent_references.append(reference)
119
168
node_refs.append(tuple(reference_list))
120
if key in self._nodes and self._nodes[key][0] == '':
169
return tuple(node_refs), absent_references
171
def add_node(self, key, value, references=()):
172
"""Add a node to the index.
174
:param key: The key. keys are non-empty tuples containing
175
as many whitespace-free utf8 bytestrings as the key length
176
defined for this index.
177
:param references: An iterable of iterables of keys. Each is a
178
reference to another key.
179
:param value: The value to associate with the key. It may be any
180
bytes as long as it does not contain \0 or \n.
183
absent_references) = self._check_key_ref_value(key, references, value)
184
if key in self._nodes and self._nodes[key][0] != 'a':
121
185
raise errors.BadIndexDuplicateKey(key, self)
122
self._nodes[key] = ('', tuple(node_refs), value)
186
for reference in absent_references:
187
# There may be duplicates, but I don't think it is worth worrying
189
self._nodes[reference] = ('a', (), '')
190
self._nodes[key] = ('', node_refs, value)
123
191
self._keys.add(key)
124
if self._key_length > 1:
125
key_dict = self._nodes_by_key
126
if self.reference_lists:
127
key_value = key, value, tuple(node_refs)
129
key_value = key, value
130
# possibly should do this on-demand, but it seems likely it is
132
# For a key of (foo, bar, baz) create
133
# _nodes_by_key[foo][bar][baz] = key_value
134
for subkey in key[:-1]:
135
key_dict = key_dict.setdefault(subkey, {})
136
key_dict[key[-1]] = key_value
192
if self._nodes_by_key is not None and self._key_length > 1:
193
self._update_nodes_by_key(key, value, node_refs)
138
195
def finish(self):
139
196
lines = [_SIGNATURE]
142
199
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
143
200
prefix_length = sum(len(x) for x in lines)
144
201
# references are byte offsets. To avoid having to do nasty
145
# polynomial work to resolve offsets (references to later in the
202
# polynomial work to resolve offsets (references to later in the
146
203
# file cannot be determined until all the inbetween references have
147
204
# been calculated too) we pad the offsets with 0's to make them be
148
205
# of consistent length. Using binary offsets would break the trivial
288
347
return "%s(%r)" % (self.__class__.__name__,
289
348
self._transport.abspath(self._name))
291
def _buffer_all(self):
350
def _buffer_all(self, stream=None):
292
351
"""Buffer all the index data.
294
353
Mutates self._nodes and self.keys_by_offset.
355
if self._nodes is not None:
356
# We already did this
296
358
if 'index' in debug.debug_flags:
297
359
mutter('Reading entire index %s', self._transport.abspath(self._name))
298
stream = self._transport.get(self._name)
361
stream = self._transport.get(self._name)
299
362
self._read_prefix(stream)
300
363
self._expected_elements = 3 + self._key_length
319
382
node_value = value
320
383
self._nodes[key] = node_value
321
384
if self._key_length > 1:
322
subkey = list(reversed(key[:-1]))
385
# TODO: We may want to do this lazily, but if we are calling
386
# _buffer_all, we are likely to be doing
387
# iter_entries_prefix
323
388
key_dict = self._nodes_by_key
324
389
if self.node_ref_lists:
325
390
key_value = key, node_value[0], node_value[1]
327
392
key_value = key, node_value
328
# possibly should do this on-demand, but it seems likely it is
330
393
# For a key of (foo, bar, baz) create
331
394
# _nodes_by_key[foo][bar][baz] = key_value
332
395
for subkey in key[:-1]:
629
693
if self._bisect_nodes is None:
630
694
readv_ranges.append(_HEADER_READV)
631
695
self._read_and_parse(readv_ranges)
697
if self._nodes is not None:
698
# _read_and_parse triggered a _buffer_all because we requested the
700
for location, key in location_keys:
701
if key not in self._nodes: # not present
702
result.append(((location, key), False))
703
elif self.node_ref_lists:
704
value, refs = self._nodes[key]
705
result.append(((location, key),
706
(self, key, value, refs)))
708
result.append(((location, key),
709
(self, key, self._nodes[key])))
632
711
# generate results:
633
712
# - figure out <, >, missing, present
634
713
# - result present references so we can return them.
636
714
# keys that we cannot answer until we resolve references
637
715
pending_references = []
638
716
pending_locations = set()
689
767
readv_ranges.append((location, length))
690
768
self._read_and_parse(readv_ranges)
769
if self._nodes is not None:
770
# The _read_and_parse triggered a _buffer_all, grab the data and
772
for location, key in pending_references:
773
value, refs = self._nodes[key]
774
result.append(((location, key), (self, key, value, refs)))
691
776
for location, key in pending_references:
692
777
# answer key references we had to look-up-late.
693
index = self._parsed_key_index(key)
694
778
value, refs = self._bisect_nodes[key]
695
779
result.append(((location, key), (self, key,
696
780
value, self._resolve_references(refs))))
967
1051
:param readv_ranges: A prepared readv range list.
970
readv_data = self._transport.readv(self._name, readv_ranges, True,
973
for offset, data in readv_data:
974
if self._bisect_nodes is None:
975
# this must be the start
976
if not (offset == 0):
977
raise AssertionError()
978
offset, data = self._parse_header_from_bytes(data)
979
# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
980
self._parse_region(offset, data)
1053
if not readv_ranges:
1055
if self._nodes is None and self._bytes_read * 2 >= self._size:
1056
# We've already read more than 50% of the file and we are about to
1057
# request more data, just _buffer_all() and be done
1061
readv_data = self._transport.readv(self._name, readv_ranges, True,
1064
for offset, data in readv_data:
1065
self._bytes_read += len(data)
1066
if offset == 0 and len(data) == self._size:
1067
# We read the whole range, most likely because the
1068
# Transport upcast our readv ranges into one long request
1069
# for enough total data to grab the whole index.
1070
self._buffer_all(StringIO(data))
1072
if self._bisect_nodes is None:
1073
# this must be the start
1074
if not (offset == 0):
1075
raise AssertionError()
1076
offset, data = self._parse_header_from_bytes(data)
1077
# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
1078
self._parse_region(offset, data)
982
1080
def _signature(self):
983
1081
"""The file signature for this index type."""