~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2009-03-06 06:48:25 UTC
  • mfrom: (4070.8.6 debug-config)
  • Revision ID: pqm@pqm.ubuntu.com-20090306064825-kbpwggw21dygeix6
(mbp) debug_flags configuration option

Show diffs side-by-side

added added

removed removed

Lines of Context:
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
16
#
17
17
 
18
18
"""B+Tree indices"""
30
30
    chunk_writer,
31
31
    debug,
32
32
    errors,
33
 
    fifo_cache,
34
33
    index,
35
34
    lru_cache,
36
35
    osutils,
181
180
        combine mem with the first and second indexes, creating a new one of
182
181
        size 4x. On the fifth create a single new one, etc.
183
182
        """
184
 
        if self._combine_backing_indices:
185
 
            (new_backing_file, size,
186
 
             backing_pos) = self._spill_mem_keys_and_combine()
187
 
        else:
188
 
            new_backing_file, size = self._spill_mem_keys_without_combining()
 
183
        iterators_to_combine = [self._iter_mem_nodes()]
 
184
        pos = -1
 
185
        for pos, backing in enumerate(self._backing_indices):
 
186
            if backing is None:
 
187
                pos -= 1
 
188
                break
 
189
            iterators_to_combine.append(backing.iter_all_entries())
 
190
        backing_pos = pos + 1
 
191
        new_backing_file, size = \
 
192
            self._write_nodes(self._iter_smallest(iterators_to_combine))
189
193
        dir_path, base_name = osutils.split(new_backing_file.name)
190
194
        # Note: The transport here isn't strictly needed, because we will use
191
195
        #       direct access to the new_backing._file object
193
197
                                      base_name, size)
194
198
        # GC will clean up the file
195
199
        new_backing._file = new_backing_file
196
 
        if self._combine_backing_indices:
197
 
            if len(self._backing_indices) == backing_pos:
198
 
                self._backing_indices.append(None)
199
 
            self._backing_indices[backing_pos] = new_backing
200
 
            for backing_pos in range(backing_pos):
201
 
                self._backing_indices[backing_pos] = None
202
 
        else:
203
 
            self._backing_indices.append(new_backing)
 
200
        if len(self._backing_indices) == backing_pos:
 
201
            self._backing_indices.append(None)
 
202
        self._backing_indices[backing_pos] = new_backing
 
203
        for pos in range(backing_pos):
 
204
            self._backing_indices[pos] = None
204
205
        self._keys = set()
205
206
        self._nodes = {}
206
207
        self._nodes_by_key = None
207
208
 
208
 
    def _spill_mem_keys_without_combining(self):
209
 
        return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)
210
 
 
211
 
    def _spill_mem_keys_and_combine(self):
212
 
        iterators_to_combine = [self._iter_mem_nodes()]
213
 
        pos = -1
214
 
        for pos, backing in enumerate(self._backing_indices):
215
 
            if backing is None:
216
 
                pos -= 1
217
 
                break
218
 
            iterators_to_combine.append(backing.iter_all_entries())
219
 
        backing_pos = pos + 1
220
 
        new_backing_file, size = \
221
 
            self._write_nodes(self._iter_smallest(iterators_to_combine),
222
 
                              allow_optimize=False)
223
 
        return new_backing_file, size, backing_pos
224
 
 
225
209
    def add_nodes(self, nodes):
226
210
        """Add nodes to the index.
227
211
 
278
262
            except StopIteration:
279
263
                current_values[pos] = None
280
264
 
281
 
    def _add_key(self, string_key, line, rows, allow_optimize=True):
 
265
    def _add_key(self, string_key, line, rows):
282
266
        """Add a key to the current chunk.
283
267
 
284
268
        :param string_key: The key to add.
285
269
        :param line: The fully serialised key and value.
286
 
        :param allow_optimize: If set to False, prevent setting the optimize
287
 
            flag when writing out. This is used by the _spill_mem_keys_to_disk
288
 
            functionality.
289
270
        """
290
271
        if rows[-1].writer is None:
291
272
            # opening a new leaf chunk;
296
277
                    length = _PAGE_SIZE
297
278
                    if internal_row.nodes == 0:
298
279
                        length -= _RESERVED_HEADER_BYTES # padded
299
 
                    if allow_optimize:
300
 
                        optimize_for_size = self._optimize_for_size
301
 
                    else:
302
 
                        optimize_for_size = False
303
280
                    internal_row.writer = chunk_writer.ChunkWriter(length, 0,
304
 
                        optimize_for_size=optimize_for_size)
 
281
                        optimize_for_size=self._optimize_for_size)
305
282
                    internal_row.writer.write(_INTERNAL_FLAG)
306
283
                    internal_row.writer.write(_INTERNAL_OFFSET +
307
284
                        str(rows[pos + 1].nodes) + "\n")
319
296
            new_row = True
320
297
            for row in reversed(rows[:-1]):
321
298
                # Mark the start of the next node in the node above. If it
322
 
                # doesn't fit then propagate upwards until we find one that
 
299
                # doesn't fit then propogate upwards until we find one that
323
300
                # it does fit into.
324
301
                if row.writer.write(key_line):
325
302
                    row.finish_node()
345
322
                new_row.writer.write(_INTERNAL_OFFSET +
346
323
                    str(rows[1].nodes - 1) + "\n")
347
324
                new_row.writer.write(key_line)
348
 
            self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
 
325
            self._add_key(string_key, line, rows)
349
326
 
350
 
    def _write_nodes(self, node_iterator, allow_optimize=True):
 
327
    def _write_nodes(self, node_iterator):
351
328
        """Write node_iterator out as a B+Tree.
352
329
 
353
330
        :param node_iterator: An iterator of sorted nodes. Each node should
354
331
            match the output given by iter_all_entries.
355
 
        :param allow_optimize: If set to False, prevent setting the optimize
356
 
            flag when writing out. This is used by the _spill_mem_keys_to_disk
357
 
            functionality.
358
332
        :return: A file handle for a temporary file containing a B+Tree for
359
333
            the nodes.
360
334
        """
370
344
        self.row_lengths = []
371
345
        # Loop over all nodes adding them to the bottom row
372
346
        # (rows[-1]). When we finish a chunk in a row,
373
 
        # propagate the key that didn't fit (comes after the chunk) to the
 
347
        # propogate the key that didn't fit (comes after the chunk) to the
374
348
        # row above, transitively.
375
349
        for node in node_iterator:
376
350
            if key_count == 0:
379
353
            key_count += 1
380
354
            string_key, line = _btree_serializer._flatten_node(node,
381
355
                                    self.reference_lists)
382
 
            self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
 
356
            self._add_key(string_key, line, rows)
383
357
        for row in reversed(rows):
384
358
            pad = (type(row) != _LeafBuilderRow)
385
359
            row.finish_node(pad=pad)
386
 
        result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
 
360
        result = tempfile.NamedTemporaryFile()
387
361
        lines = [_BTSIGNATURE]
388
362
        lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
389
363
        lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
642
616
        # Default max size is 100,000 leave values
643
617
        self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
644
618
        self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)
645
 
        # We could limit this, but even a 300k record btree has only 3k leaf
646
 
        # nodes, and only 20 internal nodes. So the default of 100 nodes in an
647
 
        # LRU would mean we always cache everything anyway, no need to pay the
648
 
        # overhead of LRU
649
 
        self._internal_node_cache = fifo_cache.FIFOCache(100)
 
619
        self._internal_node_cache = lru_cache.LRUCache()
650
620
        self._key_count = None
651
621
        self._row_lengths = None
652
622
        self._row_offsets = None # Start of each row, [-1] is the end