13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
18
"""B+Tree indices"""
180
181
combine mem with the first and second indexes, creating a new one of
181
182
size 4x. On the fifth create a single new one, etc.
183
iterators_to_combine = [self._iter_mem_nodes()]
185
for pos, backing in enumerate(self._backing_indices):
189
iterators_to_combine.append(backing.iter_all_entries())
190
backing_pos = pos + 1
191
new_backing_file, size = \
192
self._write_nodes(self._iter_smallest(iterators_to_combine))
184
if self._combine_backing_indices:
185
(new_backing_file, size,
186
backing_pos) = self._spill_mem_keys_and_combine()
188
new_backing_file, size = self._spill_mem_keys_without_combining()
193
189
dir_path, base_name = osutils.split(new_backing_file.name)
194
190
# Note: The transport here isn't strictly needed, because we will use
195
191
# direct access to the new_backing._file object
198
194
# GC will clean up the file
199
195
new_backing._file = new_backing_file
200
if len(self._backing_indices) == backing_pos:
201
self._backing_indices.append(None)
202
self._backing_indices[backing_pos] = new_backing
203
for pos in range(backing_pos):
204
self._backing_indices[pos] = None
196
if self._combine_backing_indices:
197
if len(self._backing_indices) == backing_pos:
198
self._backing_indices.append(None)
199
self._backing_indices[backing_pos] = new_backing
200
for backing_pos in range(backing_pos):
201
self._backing_indices[backing_pos] = None
203
self._backing_indices.append(new_backing)
205
204
self._keys = set()
207
206
self._nodes_by_key = None
208
def _spill_mem_keys_without_combining(self):
209
return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)
211
def _spill_mem_keys_and_combine(self):
212
iterators_to_combine = [self._iter_mem_nodes()]
214
for pos, backing in enumerate(self._backing_indices):
218
iterators_to_combine.append(backing.iter_all_entries())
219
backing_pos = pos + 1
220
new_backing_file, size = \
221
self._write_nodes(self._iter_smallest(iterators_to_combine),
222
allow_optimize=False)
223
return new_backing_file, size, backing_pos
209
225
def add_nodes(self, nodes):
210
226
"""Add nodes to the index.
262
278
except StopIteration:
263
279
current_values[pos] = None
265
def _add_key(self, string_key, line, rows):
281
def _add_key(self, string_key, line, rows, allow_optimize=True):
266
282
"""Add a key to the current chunk.
268
284
:param string_key: The key to add.
269
285
:param line: The fully serialised key and value.
286
:param allow_optimize: If set to False, prevent setting the optimize
287
flag when writing out. This is used by the _spill_mem_keys_to_disk
271
290
if rows[-1].writer is None:
272
291
# opening a new leaf chunk;
277
296
length = _PAGE_SIZE
278
297
if internal_row.nodes == 0:
279
298
length -= _RESERVED_HEADER_BYTES # padded
300
optimize_for_size = self._optimize_for_size
302
optimize_for_size = False
280
303
internal_row.writer = chunk_writer.ChunkWriter(length, 0,
281
optimize_for_size=self._optimize_for_size)
304
optimize_for_size=optimize_for_size)
282
305
internal_row.writer.write(_INTERNAL_FLAG)
283
306
internal_row.writer.write(_INTERNAL_OFFSET +
284
307
str(rows[pos + 1].nodes) + "\n")
322
345
new_row.writer.write(_INTERNAL_OFFSET +
323
346
str(rows[1].nodes - 1) + "\n")
324
347
new_row.writer.write(key_line)
325
self._add_key(string_key, line, rows)
348
self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
327
def _write_nodes(self, node_iterator):
350
def _write_nodes(self, node_iterator, allow_optimize=True):
328
351
"""Write node_iterator out as a B+Tree.
330
353
:param node_iterator: An iterator of sorted nodes. Each node should
331
354
match the output given by iter_all_entries.
355
:param allow_optimize: If set to False, prevent setting the optimize
356
flag when writing out. This is used by the _spill_mem_keys_to_disk
332
358
:return: A file handle for a temporary file containing a B+Tree for
354
380
string_key, line = _btree_serializer._flatten_node(node,
355
381
self.reference_lists)
356
self._add_key(string_key, line, rows)
382
self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
357
383
for row in reversed(rows):
358
384
pad = (type(row) != _LeafBuilderRow)
359
385
row.finish_node(pad=pad)
360
result = tempfile.NamedTemporaryFile()
386
result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
361
387
lines = [_BTSIGNATURE]
362
388
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
363
389
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
616
642
# Default max size is 100,000 leave values
617
643
self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
618
644
self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)
619
self._internal_node_cache = lru_cache.LRUCache()
645
# We could limit this, but even a 300k record btree has only 3k leaf
646
# nodes, and only 20 internal nodes. So the default of 100 nodes in an
647
# LRU would mean we always cache everything anyway, no need to pay the
649
self._internal_node_cache = fifo_cache.FIFOCache(100)
620
650
self._key_count = None
621
651
self._row_lengths = None
622
652
self._row_offsets = None # Start of each row, [-1] is the end