39
39
from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
40
from bzrlib.osutils import basename, dirname
41
40
from bzrlib.transport import get_transport
79
78
skipped_bytes = padding
80
79
self.spool.writelines(byte_lines)
81
if (self.spool.tell() + skipped_bytes) % _PAGE_SIZE != 0:
82
raise AssertionError("incorrect node length")
80
remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE
82
raise AssertionError("incorrect node length: %d, %d"
83
% (self.spool.tell(), remainder))
135
136
key_elements=key_elements)
136
137
self._spill_at = spill_at
137
138
self._backing_indices = []
139
# A map of {key: (node_refs, value)}
141
# Indicate it hasn't been built yet
142
self._nodes_by_key = None
139
144
def add_node(self, key, value, references=()):
140
145
"""Add a node to the index.
150
155
:param value: The value to associate with the key. It may be any
151
156
bytes as long as it does not contain \0 or \n.
153
index.GraphIndexBuilder.add_node(self, key, value, references=references)
158
# we don't care about absent_references
159
node_refs, _ = self._check_key_ref_value(key, references, value)
160
if key in self._nodes:
161
raise errors.BadIndexDuplicateKey(key, self)
162
self._nodes[key] = (node_refs, value)
164
if self._nodes_by_key is not None and self._key_length > 1:
165
self._update_nodes_by_key(key, value, node_refs)
154
166
if len(self._keys) < self._spill_at:
156
iterators_to_combine = [iter(sorted(self._iter_mem_nodes()))]
168
self._spill_mem_keys_to_disk()
170
def _spill_mem_keys_to_disk(self):
171
"""Write the in memory keys down to disk to cap memory consumption.
173
If we already have some keys written to disk, we will combine them so
174
as to preserve the sorted order. The algorithm for combining uses
175
powers of two. So on the first spill, write all mem nodes into a
176
single index. On the second spill, combine the mem nodes with the nodes
177
on disk to create a 2x sized disk index and get rid of the first index.
178
On the third spill, create a single new disk index, which will contain
179
the mem nodes, and preserve the existing 2x sized index. On the fourth,
180
combine mem with the first and second indexes, creating a new one of
181
size 4x. On the fifth create a single new one, etc.
183
iterators_to_combine = [self._iter_mem_nodes()]
158
185
for pos, backing in enumerate(self._backing_indices):
159
186
if backing is None:
163
190
backing_pos = pos + 1
164
191
new_backing_file, size = \
165
192
self._write_nodes(self._iter_smallest(iterators_to_combine))
166
new_backing = BTreeGraphIndex(
167
get_transport(dirname(new_backing_file.name)),
168
basename(new_backing_file.name), size)
193
dir_path, base_name = osutils.split(new_backing_file.name)
194
# Note: The transport here isn't strictly needed, because we will use
195
# direct access to the new_backing._file object
196
new_backing = BTreeGraphIndex(get_transport(dir_path),
169
198
# GC will clean up the file
170
199
new_backing._file = new_backing_file
171
200
if len(self._backing_indices) == backing_pos:
175
204
self._backing_indices[pos] = None
176
205
self._keys = set()
178
self._nodes_by_key = {}
207
self._nodes_by_key = None
180
209
def add_nodes(self, nodes):
181
210
"""Add nodes to the index.
192
221
def _iter_mem_nodes(self):
193
222
"""Iterate over the nodes held in memory."""
194
224
if self.reference_lists:
195
for key, (absent, references, value) in self._nodes.iteritems():
197
yield self, key, value, references
225
for key in sorted(nodes):
226
references, value = nodes[key]
227
yield self, key, value, references
199
for key, (absent, references, value) in self._nodes.iteritems():
201
yield self, key, value
229
for key in sorted(nodes):
230
references, value = nodes[key]
231
yield self, key, value
203
233
def _iter_smallest(self, iterators_to_combine):
204
234
if len(iterators_to_combine) == 1:
358
388
copied_len = osutils.pumpfile(row.spool, result)
359
389
if copied_len != (row.nodes - 1) * _PAGE_SIZE:
360
390
if type(row) != _LeafBuilderRow:
361
raise AssertionError("Not enough data copied")
391
raise AssertionError("Incorrect amount of data copied"
392
" expected: %d, got: %d"
393
% ((row.nodes - 1) * _PAGE_SIZE,
363
396
size = result.tell()
384
417
"iter_all_entries scales with size of history.")
385
418
# Doing serial rather than ordered would be faster; but this shouldn't
386
419
# be getting called routinely anyway.
387
iterators = [iter(sorted(self._iter_mem_nodes()))]
420
iterators = [self._iter_mem_nodes()]
388
421
for backing in self._backing_indices:
389
422
if backing is not None:
390
423
iterators.append(backing.iter_all_entries())
404
437
if self.reference_lists:
405
438
for key in keys.intersection(self._keys):
406
439
node = self._nodes[key]
408
yield self, key, node[2], node[1]
440
yield self, key, node[1], node[0]
410
442
for key in keys.intersection(self._keys):
411
443
node = self._nodes[key]
413
yield self, key, node[2]
444
yield self, key, node[1]
414
445
keys.difference_update(self._keys)
415
446
for backing in self._backing_indices:
416
447
if backing is None:
459
490
node = self._nodes[key]
464
493
if self.reference_lists:
465
yield self, key, node[2], node[1]
494
yield self, key, node[1], node[0]
467
yield self, key, node[2]
496
yield self, key, node[1]
473
502
if len(key) != self._key_length:
474
503
raise errors.BadIndexKey(key)
475
504
# find what it refers to:
476
key_dict = self._nodes_by_key
505
key_dict = self._get_nodes_by_key()
477
506
elements = list(key)
478
507
# find the subdict to return
500
529
yield (self, ) + key_dict
531
def _get_nodes_by_key(self):
532
if self._nodes_by_key is None:
534
if self.reference_lists:
535
for key, (references, value) in self._nodes.iteritems():
536
key_dict = nodes_by_key
537
for subkey in key[:-1]:
538
key_dict = key_dict.setdefault(subkey, {})
539
key_dict[key[-1]] = key, value, references
541
for key, (references, value) in self._nodes.iteritems():
542
key_dict = nodes_by_key
543
for subkey in key[:-1]:
544
key_dict = key_dict.setdefault(subkey, {})
545
key_dict[key[-1]] = key, value
546
self._nodes_by_key = nodes_by_key
547
return self._nodes_by_key
502
549
def key_count(self):
503
550
"""Return an estimate of the number of keys in this index.