95
94
if not element or _whitespace_re.search(element) is not None:
96
95
raise errors.BadIndexKey(element)
98
def _get_nodes_by_key(self):
99
if self._nodes_by_key is None:
101
if self.reference_lists:
102
for key, (absent, references, value) in self._nodes.iteritems():
105
key_dict = nodes_by_key
106
for subkey in key[:-1]:
107
key_dict = key_dict.setdefault(subkey, {})
108
key_dict[key[-1]] = key, value, references
110
for key, (absent, references, value) in self._nodes.iteritems():
113
key_dict = nodes_by_key
114
for subkey in key[:-1]:
115
key_dict = key_dict.setdefault(subkey, {})
116
key_dict[key[-1]] = key, value
117
self._nodes_by_key = nodes_by_key
118
return self._nodes_by_key
120
def _update_nodes_by_key(self, key, value, node_refs):
121
"""Update the _nodes_by_key dict with a new key.
123
For a key of (foo, bar, baz) create
124
_nodes_by_key[foo][bar][baz] = key_value
126
if self._nodes_by_key is None:
128
key_dict = self._nodes_by_key
129
if self.reference_lists:
130
key_value = key, value, node_refs
132
key_value = key, value
133
for subkey in key[:-1]:
134
key_dict = key_dict.setdefault(subkey, {})
135
key_dict[key[-1]] = key_value
137
def _check_key_ref_value(self, key, references, value):
138
"""Check that 'key' and 'references' are all valid.
140
:param key: A key tuple. Must conform to the key interface (be a tuple,
141
be of the right length, not have any whitespace or nulls in any key
143
:param references: An iterable of reference lists. Something like
144
[[(ref, key)], [(ref, key), (other, key)]]
145
:param value: The value associate with this key. Must not contain
146
newlines or null characters.
147
:return: (node_refs, absent_references)
148
node_refs basically a packed form of 'references' where all
150
absent_references reference keys that are not in self._nodes.
151
This may contain duplicates if the same key is
152
referenced in multiple lists.
97
def add_node(self, key, value, references=()):
98
"""Add a node to the index.
100
:param key: The key. keys are non-empty tuples containing
101
as many whitespace-free utf8 bytestrings as the key length
102
defined for this index.
103
:param references: An iterable of iterables of keys. Each is a
104
reference to another key.
105
:param value: The value to associate with the key. It may be any
106
bytes as long as it does not contain \0 or \n.
154
108
self._check_key(key)
155
109
if _newline_null_re.search(value) is not None:
157
111
if len(references) != self.reference_lists:
158
112
raise errors.BadIndexValue(references)
160
absent_references = []
161
114
for reference_list in references:
162
115
for reference in reference_list:
163
# If reference *is* in self._nodes, then we know it has already
116
self._check_key(reference)
165
117
if reference not in self._nodes:
166
self._check_key(reference)
167
absent_references.append(reference)
118
self._nodes[reference] = ('a', (), '')
168
119
node_refs.append(tuple(reference_list))
169
return tuple(node_refs), absent_references
171
def add_node(self, key, value, references=()):
172
"""Add a node to the index.
174
:param key: The key. keys are non-empty tuples containing
175
as many whitespace-free utf8 bytestrings as the key length
176
defined for this index.
177
:param references: An iterable of iterables of keys. Each is a
178
reference to another key.
179
:param value: The value to associate with the key. It may be any
180
bytes as long as it does not contain \0 or \n.
183
absent_references) = self._check_key_ref_value(key, references, value)
184
if key in self._nodes and self._nodes[key][0] != 'a':
120
if key in self._nodes and self._nodes[key][0] == '':
185
121
raise errors.BadIndexDuplicateKey(key, self)
186
for reference in absent_references:
187
# There may be duplicates, but I don't think it is worth worrying
189
self._nodes[reference] = ('a', (), '')
190
self._nodes[key] = ('', node_refs, value)
122
self._nodes[key] = ('', tuple(node_refs), value)
191
123
self._keys.add(key)
192
if self._nodes_by_key is not None and self._key_length > 1:
193
self._update_nodes_by_key(key, value, node_refs)
124
if self._key_length > 1:
125
key_dict = self._nodes_by_key
126
if self.reference_lists:
127
key_value = key, value, tuple(node_refs)
129
key_value = key, value
130
# possibly should do this on-demand, but it seems likely it is
132
# For a key of (foo, bar, baz) create
133
# _nodes_by_key[foo][bar][baz] = key_value
134
for subkey in key[:-1]:
135
key_dict = key_dict.setdefault(subkey, {})
136
key_dict[key[-1]] = key_value
195
138
def finish(self):
196
139
lines = [_SIGNATURE]
199
142
lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')
200
143
prefix_length = sum(len(x) for x in lines)
201
144
# references are byte offsets. To avoid having to do nasty
202
# polynomial work to resolve offsets (references to later in the
145
# polynomial work to resolve offsets (references to later in the
203
146
# file cannot be determined until all the inbetween references have
204
147
# been calculated too) we pad the offsets with 0's to make them be
205
148
# of consistent length. Using binary offsets would break the trivial
347
288
return "%s(%r)" % (self.__class__.__name__,
348
289
self._transport.abspath(self._name))
350
def _buffer_all(self, stream=None):
291
def _buffer_all(self):
351
292
"""Buffer all the index data.
353
294
Mutates self._nodes and self.keys_by_offset.
355
if self._nodes is not None:
356
# We already did this
358
296
if 'index' in debug.debug_flags:
359
297
mutter('Reading entire index %s', self._transport.abspath(self._name))
361
stream = self._transport.get(self._name)
298
stream = self._transport.get(self._name)
362
299
self._read_prefix(stream)
363
300
self._expected_elements = 3 + self._key_length
382
319
node_value = value
383
320
self._nodes[key] = node_value
384
321
if self._key_length > 1:
385
# TODO: We may want to do this lazily, but if we are calling
386
# _buffer_all, we are likely to be doing
387
# iter_entries_prefix
322
subkey = list(reversed(key[:-1]))
388
323
key_dict = self._nodes_by_key
389
324
if self.node_ref_lists:
390
325
key_value = key, node_value[0], node_value[1]
392
327
key_value = key, node_value
328
# possibly should do this on-demand, but it seems likely it is
393
330
# For a key of (foo, bar, baz) create
394
331
# _nodes_by_key[foo][bar][baz] = key_value
395
332
for subkey in key[:-1]:
693
629
if self._bisect_nodes is None:
694
630
readv_ranges.append(_HEADER_READV)
695
631
self._read_and_parse(readv_ranges)
697
if self._nodes is not None:
698
# _read_and_parse triggered a _buffer_all because we requested the
700
for location, key in location_keys:
701
if key not in self._nodes: # not present
702
result.append(((location, key), False))
703
elif self.node_ref_lists:
704
value, refs = self._nodes[key]
705
result.append(((location, key),
706
(self, key, value, refs)))
708
result.append(((location, key),
709
(self, key, self._nodes[key])))
711
632
# generate results:
712
633
# - figure out <, >, missing, present
713
634
# - result present references so we can return them.
714
636
# keys that we cannot answer until we resolve references
715
637
pending_references = []
716
638
pending_locations = set()
767
689
readv_ranges.append((location, length))
768
690
self._read_and_parse(readv_ranges)
769
if self._nodes is not None:
770
# The _read_and_parse triggered a _buffer_all, grab the data and
772
for location, key in pending_references:
773
value, refs = self._nodes[key]
774
result.append(((location, key), (self, key, value, refs)))
776
691
for location, key in pending_references:
777
692
# answer key references we had to look-up-late.
693
index = self._parsed_key_index(key)
778
694
value, refs = self._bisect_nodes[key]
779
695
result.append(((location, key), (self, key,
780
696
value, self._resolve_references(refs))))
1051
967
:param readv_ranges: A prepared readv range list.
1053
if not readv_ranges:
1055
if self._nodes is None and self._bytes_read * 2 >= self._size:
1056
# We've already read more than 50% of the file and we are about to
1057
# request more data, just _buffer_all() and be done
1061
readv_data = self._transport.readv(self._name, readv_ranges, True,
1064
for offset, data in readv_data:
1065
self._bytes_read += len(data)
1066
if offset == 0 and len(data) == self._size:
1067
# We read the whole range, most likely because the
1068
# Transport upcast our readv ranges into one long request
1069
# for enough total data to grab the whole index.
1070
self._buffer_all(StringIO(data))
1072
if self._bisect_nodes is None:
1073
# this must be the start
1074
if not (offset == 0):
1075
raise AssertionError()
1076
offset, data = self._parse_header_from_bytes(data)
1077
# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
1078
self._parse_region(offset, data)
970
readv_data = self._transport.readv(self._name, readv_ranges, True,
973
for offset, data in readv_data:
974
if self._bisect_nodes is None:
975
# this must be the start
976
if not (offset == 0):
977
raise AssertionError()
978
offset, data = self._parse_header_from_bytes(data)
979
# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
980
self._parse_region(offset, data)
1080
982
def _signature(self):
1081
983
"""The file signature for this index type."""