~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

  • Committer: Matt Nordhoff
  • Date: 2009-04-04 02:50:01 UTC
  • mfrom: (4253 +trunk)
  • mto: This revision was merged to the branch mainline in revision 4256.
  • Revision ID: mnordhoff@mattnordhoff.com-20090404025001-z1403k0tatmc8l91
Merge bzr.dev, fixing conflicts.

Show diffs side-by-side

added added

removed removed

Lines of Context:
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
#
17
17
 
18
18
"""B+Tree indices"""
30
30
    chunk_writer,
31
31
    debug,
32
32
    errors,
 
33
    fifo_cache,
33
34
    index,
34
35
    lru_cache,
35
36
    osutils,
180
181
        combine mem with the first and second indexes, creating a new one of
181
182
        size 4x. On the fifth create a single new one, etc.
182
183
        """
183
 
        iterators_to_combine = [self._iter_mem_nodes()]
184
 
        pos = -1
185
 
        for pos, backing in enumerate(self._backing_indices):
186
 
            if backing is None:
187
 
                pos -= 1
188
 
                break
189
 
            iterators_to_combine.append(backing.iter_all_entries())
190
 
        backing_pos = pos + 1
191
 
        new_backing_file, size = \
192
 
            self._write_nodes(self._iter_smallest(iterators_to_combine))
 
184
        if self._combine_backing_indices:
 
185
            (new_backing_file, size,
 
186
             backing_pos) = self._spill_mem_keys_and_combine()
 
187
        else:
 
188
            new_backing_file, size = self._spill_mem_keys_without_combining()
193
189
        dir_path, base_name = osutils.split(new_backing_file.name)
194
190
        # Note: The transport here isn't strictly needed, because we will use
195
191
        #       direct access to the new_backing._file object
197
193
                                      base_name, size)
198
194
        # GC will clean up the file
199
195
        new_backing._file = new_backing_file
200
 
        if len(self._backing_indices) == backing_pos:
201
 
            self._backing_indices.append(None)
202
 
        self._backing_indices[backing_pos] = new_backing
203
 
        for pos in range(backing_pos):
204
 
            self._backing_indices[pos] = None
 
196
        if self._combine_backing_indices:
 
197
            if len(self._backing_indices) == backing_pos:
 
198
                self._backing_indices.append(None)
 
199
            self._backing_indices[backing_pos] = new_backing
 
200
            for backing_pos in range(backing_pos):
 
201
                self._backing_indices[backing_pos] = None
 
202
        else:
 
203
            self._backing_indices.append(new_backing)
205
204
        self._keys = set()
206
205
        self._nodes = {}
207
206
        self._nodes_by_key = None
208
207
 
 
208
    def _spill_mem_keys_without_combining(self):
 
209
        return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)
 
210
 
 
211
    def _spill_mem_keys_and_combine(self):
 
212
        iterators_to_combine = [self._iter_mem_nodes()]
 
213
        pos = -1
 
214
        for pos, backing in enumerate(self._backing_indices):
 
215
            if backing is None:
 
216
                pos -= 1
 
217
                break
 
218
            iterators_to_combine.append(backing.iter_all_entries())
 
219
        backing_pos = pos + 1
 
220
        new_backing_file, size = \
 
221
            self._write_nodes(self._iter_smallest(iterators_to_combine),
 
222
                              allow_optimize=False)
 
223
        return new_backing_file, size, backing_pos
 
224
 
209
225
    def add_nodes(self, nodes):
210
226
        """Add nodes to the index.
211
227
 
262
278
            except StopIteration:
263
279
                current_values[pos] = None
264
280
 
265
 
    def _add_key(self, string_key, line, rows):
 
281
    def _add_key(self, string_key, line, rows, allow_optimize=True):
266
282
        """Add a key to the current chunk.
267
283
 
268
284
        :param string_key: The key to add.
269
285
        :param line: The fully serialised key and value.
 
286
        :param allow_optimize: If set to False, prevent setting the optimize
 
287
            flag when writing out. This is used by the _spill_mem_keys_to_disk
 
288
            functionality.
270
289
        """
271
290
        if rows[-1].writer is None:
272
291
            # opening a new leaf chunk;
277
296
                    length = _PAGE_SIZE
278
297
                    if internal_row.nodes == 0:
279
298
                        length -= _RESERVED_HEADER_BYTES # padded
 
299
                    if allow_optimize:
 
300
                        optimize_for_size = self._optimize_for_size
 
301
                    else:
 
302
                        optimize_for_size = False
280
303
                    internal_row.writer = chunk_writer.ChunkWriter(length, 0,
281
 
                        optimize_for_size=self._optimize_for_size)
 
304
                        optimize_for_size=optimize_for_size)
282
305
                    internal_row.writer.write(_INTERNAL_FLAG)
283
306
                    internal_row.writer.write(_INTERNAL_OFFSET +
284
307
                        str(rows[pos + 1].nodes) + "\n")
322
345
                new_row.writer.write(_INTERNAL_OFFSET +
323
346
                    str(rows[1].nodes - 1) + "\n")
324
347
                new_row.writer.write(key_line)
325
 
            self._add_key(string_key, line, rows)
 
348
            self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
326
349
 
327
 
    def _write_nodes(self, node_iterator):
 
350
    def _write_nodes(self, node_iterator, allow_optimize=True):
328
351
        """Write node_iterator out as a B+Tree.
329
352
 
330
353
        :param node_iterator: An iterator of sorted nodes. Each node should
331
354
            match the output given by iter_all_entries.
 
355
        :param allow_optimize: If set to False, prevent setting the optimize
 
356
            flag when writing out. This is used by the _spill_mem_keys_to_disk
 
357
            functionality.
332
358
        :return: A file handle for a temporary file containing a B+Tree for
333
359
            the nodes.
334
360
        """
353
379
            key_count += 1
354
380
            string_key, line = _btree_serializer._flatten_node(node,
355
381
                                    self.reference_lists)
356
 
            self._add_key(string_key, line, rows)
 
382
            self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
357
383
        for row in reversed(rows):
358
384
            pad = (type(row) != _LeafBuilderRow)
359
385
            row.finish_node(pad=pad)
360
 
        result = tempfile.NamedTemporaryFile()
 
386
        result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
361
387
        lines = [_BTSIGNATURE]
362
388
        lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
363
389
        lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
616
642
        # Default max size is 100,000 leave values
617
643
        self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
618
644
        self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)
619
 
        self._internal_node_cache = lru_cache.LRUCache()
 
645
        # We could limit this, but even a 300k record btree has only 3k leaf
 
646
        # nodes, and only 20 internal nodes. So the default of 100 nodes in an
 
647
        # LRU would mean we always cache everything anyway, no need to pay the
 
648
        # overhead of LRU
 
649
        self._internal_node_cache = fifo_cache.FIFOCache(100)
620
650
        self._key_count = None
621
651
        self._row_lengths = None
622
652
        self._row_offsets = None # Start of each row, [-1] is the end