~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

Committer: Andrew Bennetts
Date: 2009-10-15 02:11:18 UTC
mfrom: (4744 +trunk)
mto: (4744.3.1 robust-cleanup-in-commit)
mto: This revision was merged to the branch mainline in revision 4775.
Revision ID: andrew.bennetts@canonical.com-20091015021118-2tlgbu01py4837zr

Merge lp:bzr.

files added:
bzrlib/_export_c_api.h

bzrlib/_import_c_api.h

bzrlib/_simple_set_pyx.pxd

bzrlib/_simple_set_pyx.pyx

bzrlib/_static_tuple_c.c

bzrlib/_static_tuple_c.h

bzrlib/_static_tuple_c.pxd

bzrlib/_static_tuple_py.py

bzrlib/tests/script.py

bzrlib/tests/test__simple_set.py

bzrlib/tests/test__static_tuple.py

bzrlib/tests/test_script.py

bzrlib/transport/pathfilter.py

tools/packaging/update-control.sh

files modified:
.bzrignore

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/annotate.py

bzrlib/bencode.py

bzrlib/branch.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bundle/apply_bundle.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/commands.py

bzrlib/decorators.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/rio.py

bzrlib/send.py

bzrlib/smart/bzrdir.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_utils.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_pack_repository.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_version.py

bzrlib/tests/transport_util.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/ssh.py

bzrlib/upgrade.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/HACKING.txt

doc/developers/bug-handling.txt

doc/developers/integration.txt

doc/developers/network-protocol.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/upgrade-guide/data_migration.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/writing_a_plugin.txt

setup.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

"""B+Tree indices"""

import cStringIO

from bisect import bisect_right

import math

import tempfile

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.spool = None# tempfile.TemporaryFile(prefix='bzr-index-row-')

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

self.spool = cStringIO.StringIO()

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

elif self.nodes == 1:

# We got bigger than 1 node, switch to a temp file

spool = tempfile.TemporaryFile(prefix='bzr-index-row-')

spool.write(self.spool.getvalue())

self.spool = spool

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

182

189

backing_pos) = self._spill_mem_keys_and_combine()

183

190

else:

184

191

new_backing_file, size = self._spill_mem_keys_without_combining()

185

dir_path, base_name = osutils.split(new_backing_file.name)

186

192

# Note: The transport here isn't strictly needed, because we will use

187

193

# direct access to the new_backing._file object

188

new_backing = BTreeGraphIndex(get_transport(dir_path),

189

base_name, size)

194

new_backing = BTreeGraphIndex(get_transport('.'), '<temp>', size)

190

195

# GC will clean up the file

191

196

new_backing._file = new_backing_file

192

197

if self._combine_backing_indices:

379

384

for row in reversed(rows):

380

385

pad = (type(row) != _LeafBuilderRow)

381

386

row.finish_node(pad=pad)

382

result = tempfile.NamedTemporaryFile(prefix='bzr-index-')

383

387

lines = [_BTSIGNATURE]

384

388

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

385

389

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

386

390

lines.append(_OPTION_LEN + str(key_count) + '\n')

387

391

row_lengths = [row.nodes for row in rows]

388

392

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

393

if row_lengths and row_lengths[-1] > 1:

394

result = tempfile.NamedTemporaryFile(prefix='bzr-index-')

395

else:

396

result = cStringIO.StringIO()

389

397

result.writelines(lines)

390

398

position = sum(map(len, lines))

391

399

root_row = True

628

636

memory except when very large walks are done.

629

637

"""

630

638

631

def __init__(self, transport, name, size):

639

def __init__(self, transport, name, size, unlimited_cache=False):

632

640

"""Create a B+Tree index object on the index name.

633

641

634

642

:param transport: The transport to read data for the index from.

638

646

the initial read (to read the root node header) can be done

639

647

without over-reading even on empty indices, and on small indices

640

648

allows single-IO to read the entire index.

649

:param unlimited_cache: If set to True, then instead of using an

650

LRUCache with size _NODE_CACHE_SIZE, we will use a dict and always

651

cache all leaf nodes.

641

652

"""

642

653

self._transport = transport

643

654

self._name = name

647

658

self._root_node = None

648

659

# Default max size is 100,000 leave values

649

660

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

650

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

651

# We could limit this, but even a 300k record btree has only 3k leaf

652

# nodes, and only 20 internal nodes. So the default of 100 nodes in an

653

# LRU would mean we always cache everything anyway, no need to pay the

654

# overhead of LRU

655

self._internal_node_cache = fifo_cache.FIFOCache(100)

661

if unlimited_cache:

662

self._leaf_node_cache = {}

663

self._internal_node_cache = {}

664

else:

665

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

666

# We use a FIFO here just to prevent possible blowout. However, a

667

# 300k record btree has only 3k leaf nodes, and only 20 internal

668

# nodes. A value of 100 scales to ~100*100*100 = 1M records.

669

self._internal_node_cache = fifo_cache.FIFOCache(100)

656

670

self._key_count = None

657

671

self._row_lengths = None

658

672

self._row_offsets = None # Start of each row, [-1] is the end

690

704

if start_of_leaves is None:

691

705

start_of_leaves = self._row_offsets[-2]

692

706

if node_pos < start_of_leaves:

693

self._internal_node_cache.add(node_pos, node)

707

self._internal_node_cache[node_pos] = node

694

708

else:

695

self._leaf_node_cache.add(node_pos, node)

709

self._leaf_node_cache[node_pos] = node

696

710

found[node_pos] = node

697

711

return found

698

712

1526

1540

1527

1541

try:

1528

1542

from bzrlib import _btree_serializer_pyx as _btree_serializer

1529

except ImportError:

1543

except ImportError, e:

1544

osutils.failed_to_load_extension(e)

1530

1545

from bzrlib import _btree_serializer_py as _btree_serializer

Older »