13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
"""B+Tree indices"""
181
180
combine mem with the first and second indexes, creating a new one of
182
181
size 4x. On the fifth create a single new one, etc.
184
if self._combine_backing_indices:
185
(new_backing_file, size,
186
backing_pos) = self._spill_mem_keys_and_combine()
188
new_backing_file, size = self._spill_mem_keys_without_combining()
183
iterators_to_combine = [self._iter_mem_nodes()]
185
for pos, backing in enumerate(self._backing_indices):
189
iterators_to_combine.append(backing.iter_all_entries())
190
backing_pos = pos + 1
191
new_backing_file, size = \
192
self._write_nodes(self._iter_smallest(iterators_to_combine))
189
193
dir_path, base_name = osutils.split(new_backing_file.name)
190
194
# Note: The transport here isn't strictly needed, because we will use
191
195
# direct access to the new_backing._file object
194
198
# GC will clean up the file
195
199
new_backing._file = new_backing_file
196
if self._combine_backing_indices:
197
if len(self._backing_indices) == backing_pos:
198
self._backing_indices.append(None)
199
self._backing_indices[backing_pos] = new_backing
200
for backing_pos in range(backing_pos):
201
self._backing_indices[backing_pos] = None
203
self._backing_indices.append(new_backing)
200
if len(self._backing_indices) == backing_pos:
201
self._backing_indices.append(None)
202
self._backing_indices[backing_pos] = new_backing
203
for pos in range(backing_pos):
204
self._backing_indices[pos] = None
204
205
self._keys = set()
206
207
self._nodes_by_key = None
208
def _spill_mem_keys_without_combining(self):
209
return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)
211
def _spill_mem_keys_and_combine(self):
212
iterators_to_combine = [self._iter_mem_nodes()]
214
for pos, backing in enumerate(self._backing_indices):
218
iterators_to_combine.append(backing.iter_all_entries())
219
backing_pos = pos + 1
220
new_backing_file, size = \
221
self._write_nodes(self._iter_smallest(iterators_to_combine),
222
allow_optimize=False)
223
return new_backing_file, size, backing_pos
225
209
def add_nodes(self, nodes):
226
210
"""Add nodes to the index.
278
262
except StopIteration:
279
263
current_values[pos] = None
281
def _add_key(self, string_key, line, rows, allow_optimize=True):
265
def _add_key(self, string_key, line, rows):
282
266
"""Add a key to the current chunk.
284
268
:param string_key: The key to add.
285
269
:param line: The fully serialised key and value.
286
:param allow_optimize: If set to False, prevent setting the optimize
287
flag when writing out. This is used by the _spill_mem_keys_to_disk
290
271
if rows[-1].writer is None:
291
272
# opening a new leaf chunk;
296
277
length = _PAGE_SIZE
297
278
if internal_row.nodes == 0:
298
279
length -= _RESERVED_HEADER_BYTES # padded
300
optimize_for_size = self._optimize_for_size
302
optimize_for_size = False
303
280
internal_row.writer = chunk_writer.ChunkWriter(length, 0,
304
optimize_for_size=optimize_for_size)
281
optimize_for_size=self._optimize_for_size)
305
282
internal_row.writer.write(_INTERNAL_FLAG)
306
283
internal_row.writer.write(_INTERNAL_OFFSET +
307
284
str(rows[pos + 1].nodes) + "\n")
320
297
for row in reversed(rows[:-1]):
321
298
# Mark the start of the next node in the node above. If it
322
# doesn't fit then propagate upwards until we find one that
299
# doesn't fit then propogate upwards until we find one that
323
300
# it does fit into.
324
301
if row.writer.write(key_line):
325
302
row.finish_node()
345
322
new_row.writer.write(_INTERNAL_OFFSET +
346
323
str(rows[1].nodes - 1) + "\n")
347
324
new_row.writer.write(key_line)
348
self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
325
self._add_key(string_key, line, rows)
350
def _write_nodes(self, node_iterator, allow_optimize=True):
327
def _write_nodes(self, node_iterator):
351
328
"""Write node_iterator out as a B+Tree.
353
330
:param node_iterator: An iterator of sorted nodes. Each node should
354
331
match the output given by iter_all_entries.
355
:param allow_optimize: If set to False, prevent setting the optimize
356
flag when writing out. This is used by the _spill_mem_keys_to_disk
358
332
:return: A file handle for a temporary file containing a B+Tree for
370
344
self.row_lengths = []
371
345
# Loop over all nodes adding them to the bottom row
372
346
# (rows[-1]). When we finish a chunk in a row,
373
# propagate the key that didn't fit (comes after the chunk) to the
347
# propogate the key that didn't fit (comes after the chunk) to the
374
348
# row above, transitively.
375
349
for node in node_iterator:
376
350
if key_count == 0:
380
354
string_key, line = _btree_serializer._flatten_node(node,
381
355
self.reference_lists)
382
self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
356
self._add_key(string_key, line, rows)
383
357
for row in reversed(rows):
384
358
pad = (type(row) != _LeafBuilderRow)
385
359
row.finish_node(pad=pad)
386
result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
360
result = tempfile.NamedTemporaryFile()
387
361
lines = [_BTSIGNATURE]
388
362
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
389
363
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
642
616
# Default max size is 100,000 leave values
643
617
self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
644
618
self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)
645
# We could limit this, but even a 300k record btree has only 3k leaf
646
# nodes, and only 20 internal nodes. So the default of 100 nodes in an
647
# LRU would mean we always cache everything anyway, no need to pay the
649
self._internal_node_cache = fifo_cache.FIFOCache(100)
619
self._internal_node_cache = lru_cache.LRUCache()
650
620
self._key_count = None
651
621
self._row_lengths = None
652
622
self._row_offsets = None # Start of each row, [-1] is the end