~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

Committer: Robert Collins
Date: 2005-08-23 06:52:09 UTC
mto: (974.1.50) (1185.1.10) (1092.3.1)
mto: This revision was merged to the branch mainline in revision 1139.
Revision ID: robertc@robertcollins.net-20050823065209-81cd5962c401751b

move io redirection into each test case from the global runner

files added:
build-api

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/plugins/checkperms

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

testsweet.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

bzr.ico

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/deprecated_graph.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files renamed:
doc/developers/HACKING.txt => HACKING

tools/doc_generate/autodoc_man.py => bzr-man.py

bzrlib/bundle/__init__.py => bzrlib/changeset.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/branch_implementations/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_diff.py => bzrlib/selftest/testdiff.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_inv.py => bzrlib/selftest/testinv.py

bzrlib/tests/test_log.py => bzrlib/selftest/testlog.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_revision.py => bzrlib/selftest/testrevision.py

bzrlib/tests/test_revisionspec.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/upgrade.py => tools/history2weaves.py

bzrlib/tests/test_weave.py => tools/testweave.py

doc/en/tutorials/tutorial.txt => tutorial.txt

files modified:
.bzrignore

.rsyncexclude

Makefile

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/bash/bzr.simple

contrib/pwk

contrib/zsh/_bzr

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""B+Tree indices"""

import array

import bisect

from bisect import bisect_right

from copy import deepcopy

import math

import struct

import tempfile

import zlib

from bzrlib import (

chunk_writer,

debug,

errors,

index,

lru_cache,

osutils,

trace,

)

from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN

from bzrlib.transport import get_transport

_BTSIGNATURE = "B+Tree Graph Index 2\n"

_OPTION_ROW_LENGTHS = "row_lengths="

_LEAF_FLAG = "type=leaf\n"

_INTERNAL_FLAG = "type=internal\n"

_INTERNAL_OFFSET = "offset="

_RESERVED_HEADER_BYTES = 120

_PAGE_SIZE = 4096

# 4K per page: 4MB - 1000 entries

_NODE_CACHE_SIZE = 1000

class _BuilderRow(object):

"""The stored state accumulated while writing out a row in the index.

:ivar spool: A temporary file used to accumulate nodes for this row

in the tree.

:ivar nodes: The count of nodes emitted so far.

"""

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

skipped_bytes = padding

self.spool.writelines(byte_lines)

remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE

if remainder != 0:

raise AssertionError("incorrect node length: %d, %d"

% (self.spool.tell(), remainder))

self.nodes += 1

self.writer = None

class _InternalBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out internal rows."""

def finish_node(self, pad=True):

if not pad:

raise AssertionError("Must pad internal nodes only.")

_BuilderRow.finish_node(self)

class _LeafBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out a leaf rows."""

100

class BTreeBuilder(index.GraphIndexBuilder):

101

"""A Builder for B+Tree based Graph indices.

102

103

The resulting graph has the structure:

104

105

_SIGNATURE OPTIONS NODES

106

_SIGNATURE := 'B+Tree Graph Index 1' NEWLINE

107

OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH

108

REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE

109

KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE

110

LENGTH := 'len=' DIGITS NEWLINE

111

ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)*

112

NODES := NODE_COMPRESSED*

113

NODE_COMPRESSED:= COMPRESSED_BYTES{4096}

114

NODE_RAW := INTERNAL | LEAF

115

INTERNAL := INTERNAL_FLAG POINTERS

116

LEAF := LEAF_FLAG ROWS

117

KEY_ELEMENT := Not-whitespace-utf8

118

KEY := KEY_ELEMENT (NULL KEY_ELEMENT)*

119

ROWS := ROW*

120

ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

121

ABSENT := 'a'

122

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

123

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

124

REFERENCE := KEY

125

VALUE := no-newline-no-null-bytes

126

"""

127

128

def __init__(self, reference_lists=0, key_elements=1, spill_at=100000):

129

"""See GraphIndexBuilder.__init__.

130

131

:param spill_at: Optional parameter controlling the maximum number

132

of nodes that BTreeBuilder will hold in memory.

133

"""

134

index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,

135

key_elements=key_elements)

136

self._spill_at = spill_at

137

self._backing_indices = []

138

# A map of {key: (node_refs, value)}

139

self._nodes = {}

140

# Indicate it hasn't been built yet

141

self._nodes_by_key = None

142

self._optimize_for_size = False

143

144

def add_node(self, key, value, references=()):

145

"""Add a node to the index.

146

147

If adding the node causes the builder to reach its spill_at threshold,

148

disk spilling will be triggered.

149

150

:param key: The key. keys are non-empty tuples containing

151

as many whitespace-free utf8 bytestrings as the key length

152

defined for this index.

153

:param references: An iterable of iterables of keys. Each is a

154

reference to another key.

155

:param value: The value to associate with the key. It may be any

156

bytes as long as it does not contain \0 or \n.

157

"""

158

# we don't care about absent_references

159

node_refs, _ = self._check_key_ref_value(key, references, value)

160

if key in self._nodes:

161

raise errors.BadIndexDuplicateKey(key, self)

162

self._nodes[key] = (node_refs, value)

163

self._keys.add(key)

164

if self._nodes_by_key is not None and self._key_length > 1:

165

self._update_nodes_by_key(key, value, node_refs)

166

if len(self._keys) < self._spill_at:

167

return

168

self._spill_mem_keys_to_disk()

169

170

def _spill_mem_keys_to_disk(self):

171

"""Write the in memory keys down to disk to cap memory consumption.

172

173

If we already have some keys written to disk, we will combine them so

174

as to preserve the sorted order. The algorithm for combining uses

175

powers of two. So on the first spill, write all mem nodes into a

176

single index. On the second spill, combine the mem nodes with the nodes

177

on disk to create a 2x sized disk index and get rid of the first index.

178

On the third spill, create a single new disk index, which will contain

179

the mem nodes, and preserve the existing 2x sized index. On the fourth,

180

combine mem with the first and second indexes, creating a new one of

181

size 4x. On the fifth create a single new one, etc.

182

"""

183

iterators_to_combine = [self._iter_mem_nodes()]

184

pos = -1

185

for pos, backing in enumerate(self._backing_indices):

186

if backing is None:

187

pos -= 1

188

break

189

iterators_to_combine.append(backing.iter_all_entries())

190

backing_pos = pos + 1

191

new_backing_file, size = \

192

self._write_nodes(self._iter_smallest(iterators_to_combine))

193

dir_path, base_name = osutils.split(new_backing_file.name)

194

# Note: The transport here isn't strictly needed, because we will use

195

# direct access to the new_backing._file object

196

new_backing = BTreeGraphIndex(get_transport(dir_path),

197

base_name, size)

198

# GC will clean up the file

199

new_backing._file = new_backing_file

200

if len(self._backing_indices) == backing_pos:

201

self._backing_indices.append(None)

202

self._backing_indices[backing_pos] = new_backing

203

for pos in range(backing_pos):

204

self._backing_indices[pos] = None

205

self._keys = set()

206

self._nodes = {}

207

self._nodes_by_key = None

208

209

def add_nodes(self, nodes):

210

"""Add nodes to the index.

211

212

:param nodes: An iterable of (key, node_refs, value) entries to add.

213

"""

214

if self.reference_lists:

215

for (key, value, node_refs) in nodes:

216

self.add_node(key, value, node_refs)

217

else:

218

for (key, value) in nodes:

219

self.add_node(key, value)

220

221

def _iter_mem_nodes(self):

222

"""Iterate over the nodes held in memory."""

223

nodes = self._nodes

224

if self.reference_lists:

225

for key in sorted(nodes):

226

references, value = nodes[key]

227

yield self, key, value, references

228

else:

229

for key in sorted(nodes):

230

references, value = nodes[key]

231

yield self, key, value

232

233

def _iter_smallest(self, iterators_to_combine):

234

if len(iterators_to_combine) == 1:

235

for value in iterators_to_combine[0]:

236

yield value

237

return

238

current_values = []

239

for iterator in iterators_to_combine:

240

try:

241

current_values.append(iterator.next())

242

except StopIteration:

243

current_values.append(None)

244

last = None

245

while True:

246

# Decorate candidates with the value to allow 2.4's min to be used.

247

candidates = [(item[1][1], item) for item

248

in enumerate(current_values) if item[1] is not None]

249

if not len(candidates):

250

return

251

selected = min(candidates)

252

# undecorate back to (pos, node)

253

selected = selected[1]

254

if last == selected[1][1]:

255

raise errors.BadIndexDuplicateKey(last, self)

256

last = selected[1][1]

257

# Yield, with self as the index

258

yield (self,) + selected[1][1:]

259

pos = selected[0]

260

try:

261

current_values[pos] = iterators_to_combine[pos].next()

262

except StopIteration:

263

current_values[pos] = None

264

265

def _add_key(self, string_key, line, rows):

266

"""Add a key to the current chunk.

267

268

:param string_key: The key to add.

269

:param line: The fully serialised key and value.

270

"""

271

if rows[-1].writer is None:

272

# opening a new leaf chunk;

273

for pos, internal_row in enumerate(rows[:-1]):

274

# flesh out any internal nodes that are needed to

275

# preserve the height of the tree

276

if internal_row.writer is None:

277

length = _PAGE_SIZE

278

if internal_row.nodes == 0:

279

length -= _RESERVED_HEADER_BYTES # padded

280

internal_row.writer = chunk_writer.ChunkWriter(length, 0,

281

optimize_for_size=self._optimize_for_size)

282

internal_row.writer.write(_INTERNAL_FLAG)

283

internal_row.writer.write(_INTERNAL_OFFSET +

284

str(rows[pos + 1].nodes) + "\n")

285

# add a new leaf

286

length = _PAGE_SIZE

287

if rows[-1].nodes == 0:

288

length -= _RESERVED_HEADER_BYTES # padded

289

rows[-1].writer = chunk_writer.ChunkWriter(length,

290

optimize_for_size=self._optimize_for_size)

291

rows[-1].writer.write(_LEAF_FLAG)

292

if rows[-1].writer.write(line):

293

# this key did not fit in the node:

294

rows[-1].finish_node()

295

key_line = string_key + "\n"

296

new_row = True

297

for row in reversed(rows[:-1]):

298

# Mark the start of the next node in the node above. If it

299

# doesn't fit then propogate upwards until we find one that

300

# it does fit into.

301

if row.writer.write(key_line):

302

row.finish_node()

303

else:

304

# We've found a node that can handle the pointer.

305

new_row = False

306

break

307

# If we reached the current root without being able to mark the

308

# division point, then we need a new root:

309

if new_row:

310

# We need a new row

311

if 'index' in debug.debug_flags:

312

trace.mutter('Inserting new global row.')

313

new_row = _InternalBuilderRow()

314

reserved_bytes = 0

315

rows.insert(0, new_row)

316

# This will be padded, hence the -100

317

new_row.writer = chunk_writer.ChunkWriter(

318

_PAGE_SIZE - _RESERVED_HEADER_BYTES,

319

reserved_bytes,

320

optimize_for_size=self._optimize_for_size)

321

new_row.writer.write(_INTERNAL_FLAG)

322

new_row.writer.write(_INTERNAL_OFFSET +

323

str(rows[1].nodes - 1) + "\n")

324

new_row.writer.write(key_line)

325

self._add_key(string_key, line, rows)

326

327

def _write_nodes(self, node_iterator):

328

"""Write node_iterator out as a B+Tree.

329

330

:param node_iterator: An iterator of sorted nodes. Each node should

331

match the output given by iter_all_entries.

332

:return: A file handle for a temporary file containing a B+Tree for

333

the nodes.

334

"""

335

# The index rows - rows[0] is the root, rows[1] is the layer under it

336

# etc.

337

rows = []

338

# forward sorted by key. In future we may consider topological sorting,

339

# at the cost of table scans for direct lookup, or a second index for

340

# direct lookup

341

key_count = 0

342

# A stack with the number of nodes of each size. 0 is the root node

343

# and must always be 1 (if there are any nodes in the tree).

344

self.row_lengths = []

345

# Loop over all nodes adding them to the bottom row

346

# (rows[-1]). When we finish a chunk in a row,

347

# propogate the key that didn't fit (comes after the chunk) to the

348

# row above, transitively.

349

for node in node_iterator:

350

if key_count == 0:

351

# First key triggers the first row

352

rows.append(_LeafBuilderRow())

353

key_count += 1

354

string_key, line = _btree_serializer._flatten_node(node,

355

self.reference_lists)

356

self._add_key(string_key, line, rows)

357

for row in reversed(rows):

358

pad = (type(row) != _LeafBuilderRow)

359

row.finish_node(pad=pad)

360

result = tempfile.NamedTemporaryFile()

361

lines = [_BTSIGNATURE]

362

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

363

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

364

lines.append(_OPTION_LEN + str(key_count) + '\n')

365

row_lengths = [row.nodes for row in rows]

366

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

367

result.writelines(lines)

368

position = sum(map(len, lines))

369

root_row = True

370

if position > _RESERVED_HEADER_BYTES:

371

raise AssertionError("Could not fit the header in the"

372

" reserved space: %d > %d"

373

% (position, _RESERVED_HEADER_BYTES))

374

# write the rows out:

375

for row in rows:

376

reserved = _RESERVED_HEADER_BYTES # reserved space for first node

377

row.spool.flush()

378

row.spool.seek(0)

379

# copy nodes to the finalised file.

380

# Special case the first node as it may be prefixed

381

node = row.spool.read(_PAGE_SIZE)

382

result.write(node[reserved:])

383

result.write("\x00" * (reserved - position))

384

position = 0 # Only the root row actually has an offset

385

copied_len = osutils.pumpfile(row.spool, result)

386

if copied_len != (row.nodes - 1) * _PAGE_SIZE:

387

if type(row) != _LeafBuilderRow:

388

raise AssertionError("Incorrect amount of data copied"

389

" expected: %d, got: %d"

390

% ((row.nodes - 1) * _PAGE_SIZE,

391

copied_len))

392

result.flush()

393

size = result.tell()

394

result.seek(0)

395

return result, size

396

397

def finish(self):

398

"""Finalise the index.

399

400

:return: A file handle for a temporary file containing the nodes added

401

to the index.

402

"""

403

return self._write_nodes(self.iter_all_entries())[0]

404

405

def iter_all_entries(self):

406

"""Iterate over all keys within the index

407

408

:return: An iterable of (index, key, reference_lists, value). There is no

409

defined order for the result iteration - it will be in the most

410

efficient order for the index (in this case dictionary hash order).

411

"""

412

if 'evil' in debug.debug_flags:

413

trace.mutter_callsite(3,

414

"iter_all_entries scales with size of history.")

415

# Doing serial rather than ordered would be faster; but this shouldn't

416

# be getting called routinely anyway.

417

iterators = [self._iter_mem_nodes()]

418

for backing in self._backing_indices:

419

if backing is not None:

420

iterators.append(backing.iter_all_entries())

421

if len(iterators) == 1:

422

return iterators[0]

423

return self._iter_smallest(iterators)

424

425

def iter_entries(self, keys):

426

"""Iterate over keys within the index.

427

428

:param keys: An iterable providing the keys to be retrieved.

429

:return: An iterable of (index, key, value, reference_lists). There is no

430

defined order for the result iteration - it will be in the most

431

efficient order for the index (keys iteration order in this case).

432

"""

433

keys = set(keys)

434

if self.reference_lists:

435

for key in keys.intersection(self._keys):

436

node = self._nodes[key]

437

yield self, key, node[1], node[0]

438

else:

439

for key in keys.intersection(self._keys):

440

node = self._nodes[key]

441

yield self, key, node[1]

442

keys.difference_update(self._keys)

443

for backing in self._backing_indices:

444

if backing is None:

445

continue

446

if not keys:

447

return

448

for node in backing.iter_entries(keys):

449

keys.remove(node[1])

450

yield (self,) + node[1:]

451

452

def iter_entries_prefix(self, keys):

453

"""Iterate over keys within the index using prefix matching.

454

455

Prefix matching is applied within the tuple of a key, not to within

456

the bytestring of each key element. e.g. if you have the keys ('foo',

457

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

458

only the former key is returned.

459

460

:param keys: An iterable providing the key prefixes to be retrieved.

461

Each key prefix takes the form of a tuple the length of a key, but

462

with the last N elements 'None' rather than a regular bytestring.

463

The first element cannot be 'None'.

464

:return: An iterable as per iter_all_entries, but restricted to the

465

keys with a matching prefix to those supplied. No additional keys

466

will be returned, and every match that is in the index will be

467

returned.

468

"""

469

# XXX: To much duplication with the GraphIndex class; consider finding

470

# a good place to pull out the actual common logic.

471

keys = set(keys)

472

if not keys:

473

return

474

for backing in self._backing_indices:

475

if backing is None:

476

continue

477

for node in backing.iter_entries_prefix(keys):

478

yield (self,) + node[1:]

479

if self._key_length == 1:

480

for key in keys:

481

# sanity check

482

if key[0] is None:

483

raise errors.BadIndexKey(key)

484

if len(key) != self._key_length:

485

raise errors.BadIndexKey(key)

486

try:

487

node = self._nodes[key]

488

except KeyError:

489

continue

490

if self.reference_lists:

491

yield self, key, node[1], node[0]

492

else:

493

yield self, key, node[1]

494

return

495

for key in keys:

496

# sanity check

497

if key[0] is None:

498

raise errors.BadIndexKey(key)

499

if len(key) != self._key_length:

500

raise errors.BadIndexKey(key)

501

# find what it refers to:

502

key_dict = self._get_nodes_by_key()

503

elements = list(key)

504

# find the subdict to return

505

try:

506

while len(elements) and elements[0] is not None:

507

key_dict = key_dict[elements[0]]

508

elements.pop(0)

509

except KeyError:

510

# a non-existant lookup.

511

continue

512

if len(elements):

513

dicts = [key_dict]

514

while dicts:

515

key_dict = dicts.pop(-1)

516

# can't be empty or would not exist

517

item, value = key_dict.iteritems().next()

518

if type(value) == dict:

519

# push keys

520

dicts.extend(key_dict.itervalues())

521

else:

522

# yield keys

523

for value in key_dict.itervalues():

524

yield (self, ) + value

525

else:

526

yield (self, ) + key_dict

527

528

def _get_nodes_by_key(self):

529

if self._nodes_by_key is None:

530

nodes_by_key = {}

531

if self.reference_lists:

532

for key, (references, value) in self._nodes.iteritems():

533

key_dict = nodes_by_key

534

for subkey in key[:-1]:

535

key_dict = key_dict.setdefault(subkey, {})

536

key_dict[key[-1]] = key, value, references

537

else:

538

for key, (references, value) in self._nodes.iteritems():

539

key_dict = nodes_by_key

540

for subkey in key[:-1]:

541

key_dict = key_dict.setdefault(subkey, {})

542

key_dict[key[-1]] = key, value

543

self._nodes_by_key = nodes_by_key

544

return self._nodes_by_key

545

546

def key_count(self):

547

"""Return an estimate of the number of keys in this index.

548

549

For InMemoryGraphIndex the estimate is exact.

550

"""

551

return len(self._keys) + sum(backing.key_count() for backing in

552

self._backing_indices if backing is not None)

553

554

def validate(self):

555

"""In memory index's have no known corruption at the moment."""

556

557

558

class _LeafNode(object):

559

"""A leaf node for a serialised B+Tree index."""

560

561

def __init__(self, bytes, key_length, ref_list_length):

562

"""Parse bytes to create a leaf node object."""

563

# splitlines mangles the \r delimiters.. don't use it.

564

self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,

565

key_length, ref_list_length))

566

567

568

class _InternalNode(object):

569

"""An internal node for a serialised B+Tree index."""

570

571

def __init__(self, bytes):

572

"""Parse bytes to create an internal node object."""

573

# splitlines mangles the \r delimiters.. don't use it.

574

self.keys = self._parse_lines(bytes.split('\n'))

575

576

def _parse_lines(self, lines):

577

nodes = []

578

self.offset = int(lines[1][7:])

579

for line in lines[2:]:

580

if line == '':

581

break

582

nodes.append(tuple(line.split('\0')))

583

return nodes

584

585

586

class BTreeGraphIndex(object):

587

"""Access to nodes via the standard GraphIndex interface for B+Tree's.

588

589

Individual nodes are held in a LRU cache. This holds the root node in

590

memory except when very large walks are done.

591

"""

592

593

def __init__(self, transport, name, size):

594

"""Create a B+Tree index object on the index name.

595

596

:param transport: The transport to read data for the index from.

597

:param name: The file name of the index on transport.

598

:param size: Optional size of the index in bytes. This allows

599

compatibility with the GraphIndex API, as well as ensuring that

600

the initial read (to read the root node header) can be done

601

without over-reading even on empty indices, and on small indices

602

allows single-IO to read the entire index.

603

"""

604

self._transport = transport

605

self._name = name

606

self._size = size

607

self._file = None

608

self._recommended_pages = self._compute_recommended_pages()

609

self._root_node = None

610

# Default max size is 100,000 leave values

611

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

612

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

613

self._internal_node_cache = lru_cache.LRUCache()

614

self._key_count = None

615

self._row_lengths = None

616

self._row_offsets = None # Start of each row, [-1] is the end

617

618

def __eq__(self, other):

619

"""Equal when self and other were created with the same parameters."""

620

return (

621

type(self) == type(other) and

622

self._transport == other._transport and

623

self._name == other._name and

624

self._size == other._size)

625

626

def __ne__(self, other):

627

return not self.__eq__(other)

628

629

def _get_and_cache_nodes(self, nodes):

630

"""Read nodes and cache them in the lru.

631

632

The nodes list supplied is sorted and then read from disk, each node

633

being inserted it into the _node_cache.

634

635

Note: Asking for more nodes than the _node_cache can contain will

636

result in some of the results being immediately discarded, to prevent

637

this an assertion is raised if more nodes are asked for than are

638

cachable.

639

640

:return: A dict of {node_pos: node}

641

"""

642

found = {}

643

start_of_leaves = None

644

for node_pos, node in self._read_nodes(sorted(nodes)):

645

if node_pos == 0: # Special case

646

self._root_node = node

647

else:

648

if start_of_leaves is None:

649

start_of_leaves = self._row_offsets[-2]

650

if node_pos < start_of_leaves:

651

self._internal_node_cache.add(node_pos, node)

652

else:

653

self._leaf_node_cache.add(node_pos, node)

654

found[node_pos] = node

655

return found

656

657

def _compute_recommended_pages(self):

658

"""Convert transport's recommended_page_size into btree pages.

659

660

recommended_page_size is in bytes, we want to know how many _PAGE_SIZE

661

pages fit in that length.

662

"""

663

recommended_read = self._transport.recommended_page_size()

664

recommended_pages = int(math.ceil(recommended_read /

665

float(_PAGE_SIZE)))

666

return recommended_pages

667

668

def _compute_total_pages_in_index(self):

669

"""How many pages are in the index.

670

671

If we have read the header we will use the value stored there.

672

Otherwise it will be computed based on the length of the index.

673

"""

674

if self._size is None:

675

raise AssertionError('_compute_total_pages_in_index should not be'

676

' called when self._size is None')

677

if self._root_node is not None:

678

# This is the number of pages as defined by the header

679

return self._row_offsets[-1]

680

# This is the number of pages as defined by the size of the index. They

681

# should be indentical.

682

total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))

683

return total_pages

684

685

def _expand_offsets(self, offsets):

686

"""Find extra pages to download.

687

688

The idea is that we always want to make big-enough requests (like 64kB

689

for http), so that we don't waste round trips. So given the entries

690

that we already have cached and the new pages being downloaded figure

691

out what other pages we might want to read.

692

693

See also doc/developers/btree_index_prefetch.txt for more details.

694

695

:param offsets: The offsets to be read

696

:return: A list of offsets to download

697

"""

698

if 'index' in debug.debug_flags:

699

trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)

700

701

if len(offsets) >= self._recommended_pages:

702

# Don't add more, we are already requesting more than enough

703

if 'index' in debug.debug_flags:

704

trace.mutter(' not expanding large request (%s >= %s)',

705

len(offsets), self._recommended_pages)

706

return offsets

707

if self._size is None:

708

# Don't try anything, because we don't know where the file ends

709

if 'index' in debug.debug_flags:

710

trace.mutter(' not expanding without knowing index size')

711

return offsets

712

total_pages = self._compute_total_pages_in_index()

713

cached_offsets = self._get_offsets_to_cached_pages()

714

# If reading recommended_pages would read the rest of the index, just

715

# do so.

716

if total_pages - len(cached_offsets) <= self._recommended_pages:

717

# Read whatever is left

718

if cached_offsets:

719

expanded = [x for x in xrange(total_pages)

720

if x not in cached_offsets]

721

else:

722

expanded = range(total_pages)

723

if 'index' in debug.debug_flags:

724

trace.mutter(' reading all unread pages: %s', expanded)

725

return expanded

726

727

if self._root_node is None:

728

# ATM on the first read of the root node of a large index, we don't

729

# bother pre-reading any other pages. This is because the

730

# likelyhood of actually reading interesting pages is very low.

731

# See doc/developers/btree_index_prefetch.txt for a discussion, and

732

# a possible implementation when we are guessing that the second

733

# layer index is small

734

final_offsets = offsets

735

else:

736

tree_depth = len(self._row_lengths)

737

if len(cached_offsets) < tree_depth and len(offsets) == 1:

738

# We haven't read enough to justify expansion

739

# If we are only going to read the root node, and 1 leaf node,

740

# then it isn't worth expanding our request. Once we've read at

741

# least 2 nodes, then we are probably doing a search, and we

742

# start expanding our requests.

743

if 'index' in debug.debug_flags:

744

trace.mutter(' not expanding on first reads')

745

return offsets

746

final_offsets = self._expand_to_neighbors(offsets, cached_offsets,

747

total_pages)

748

749

final_offsets = sorted(final_offsets)

750

if 'index' in debug.debug_flags:

751

trace.mutter('expanded: %s', final_offsets)

752

return final_offsets

753

754

def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):

755

"""Expand requests to neighbors until we have enough pages.

756

757

This is called from _expand_offsets after policy has determined that we

758

want to expand.

759

We only want to expand requests within a given layer. We cheat a little

760

bit and assume all requests will be in the same layer. This is true

761

given the current design, but if it changes this algorithm may perform

762

oddly.

763

764

:param offsets: requested offsets

765

:param cached_offsets: offsets for pages we currently have cached

766

:return: A set() of offsets after expansion

767

"""

768

final_offsets = set(offsets)

769

first = end = None

770

new_tips = set(final_offsets)

771

while len(final_offsets) < self._recommended_pages and new_tips:

772

next_tips = set()

773

for pos in new_tips:

774

if first is None:

775

first, end = self._find_layer_first_and_end(pos)

776

previous = pos - 1

777

if (previous > 0

778

and previous not in cached_offsets

779

and previous not in final_offsets

780

and previous >= first):

781

next_tips.add(previous)

782

after = pos + 1

783

if (after < total_pages

784

and after not in cached_offsets

785

and after not in final_offsets

786

and after < end):

787

next_tips.add(after)

788

# This would keep us from going bigger than

789

# recommended_pages by only expanding the first offsets.

790

# However, if we are making a 'wide' request, it is

791

# reasonable to expand all points equally.

792

# if len(final_offsets) > recommended_pages:

793

# break

794

final_offsets.update(next_tips)

795

new_tips = next_tips

796

return final_offsets

797

798

def _find_layer_first_and_end(self, offset):

799

"""Find the start/stop nodes for the layer corresponding to offset.

800

801

:return: (first, end)

802

first is the first node in this layer

803

end is the first node of the next layer

804

"""

805

first = end = 0

806

for roffset in self._row_offsets:

807

first = end

808

end = roffset

809

if offset < roffset:

810

break

811

return first, end

812

813

def _get_offsets_to_cached_pages(self):

814

"""Determine what nodes we already have cached."""

815

cached_offsets = set(self._internal_node_cache.keys())

816

cached_offsets.update(self._leaf_node_cache.keys())

817

if self._root_node is not None:

818

cached_offsets.add(0)

819

return cached_offsets

820

821

def _get_root_node(self):

822

if self._root_node is None:

823

# We may not have a root node yet

824

self._get_internal_nodes([0])

825

return self._root_node

826

827

def _get_nodes(self, cache, node_indexes):

828

found = {}

829

needed = []

830

for idx in node_indexes:

831

if idx == 0 and self._root_node is not None:

832

found[0] = self._root_node

833

continue

834

try:

835

found[idx] = cache[idx]

836

except KeyError:

837

needed.append(idx)

838

if not needed:

839

return found

840

needed = self._expand_offsets(needed)

841

found.update(self._get_and_cache_nodes(needed))

842

return found

843

844

def _get_internal_nodes(self, node_indexes):

845

"""Get a node, from cache or disk.

846

847

After getting it, the node will be cached.

848

"""

849

return self._get_nodes(self._internal_node_cache, node_indexes)

850

851

def _get_leaf_nodes(self, node_indexes):

852

"""Get a bunch of nodes, from cache or disk."""

853

found = self._get_nodes(self._leaf_node_cache, node_indexes)

854

if self._leaf_value_cache is not None:

855

for node in found.itervalues():

856

for key, value in node.keys.iteritems():

857

if key in self._leaf_value_cache:

858

# Don't add the rest of the keys, we've seen this node

859

# before.

860

break

861

self._leaf_value_cache[key] = value

862

return found

863

864

def iter_all_entries(self):

865

"""Iterate over all keys within the index.

866

867

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

868

The former tuple is used when there are no reference lists in the

869

index, making the API compatible with simple key:value index types.

870

There is no defined order for the result iteration - it will be in

871

the most efficient order for the index.

872

"""

873

if 'evil' in debug.debug_flags:

874

trace.mutter_callsite(3,

875

"iter_all_entries scales with size of history.")

876

if not self.key_count():

877

return

878

start_of_leaves = self._row_offsets[-2]

879

end_of_leaves = self._row_offsets[-1]

880

needed_nodes = range(start_of_leaves, end_of_leaves)

881

# We iterate strictly in-order so that we can use this function

882

# for spilling index builds to disk.

883

if self.node_ref_lists:

884

for _, node in self._read_nodes(needed_nodes):

885

for key, (value, refs) in sorted(node.keys.items()):

886

yield (self, key, value, refs)

887

else:

888

for _, node in self._read_nodes(needed_nodes):

889

for key, (value, refs) in sorted(node.keys.items()):

890

yield (self, key, value)

891

892

@staticmethod

893

def _multi_bisect_right(in_keys, fixed_keys):

894

"""Find the positions where each 'in_key' would fit in fixed_keys.

895

896

This is equivalent to doing "bisect_right" on each in_key into

897

fixed_keys

898

899

:param in_keys: A sorted list of keys to match with fixed_keys

900

:param fixed_keys: A sorted list of keys to match against

901

:return: A list of (integer position, [key list]) tuples.

902

"""

903

if not in_keys:

904

return []

905

if not fixed_keys:

906

# no pointers in the fixed_keys list, which means everything must

907

# fall to the left.

908

return [(0, in_keys)]

909

910

# TODO: Iterating both lists will generally take M + N steps

911

# Bisecting each key will generally take M * log2 N steps.

912

# If we had an efficient way to compare, we could pick the method

913

# based on which has the fewer number of steps.

914

# There is also the argument that bisect_right is a compiled

915

# function, so there is even more to be gained.

916

# iter_steps = len(in_keys) + len(fixed_keys)

917

# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)

918

if len(in_keys) == 1: # Bisect will always be faster for M = 1

919

return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]

920

# elif bisect_steps < iter_steps:

921

# offsets = {}

922

# for key in in_keys:

923

# offsets.setdefault(bisect_right(fixed_keys, key),

924

# []).append(key)

925

# return [(o, offsets[o]) for o in sorted(offsets)]

926

in_keys_iter = iter(in_keys)

927

fixed_keys_iter = enumerate(fixed_keys)

928

cur_in_key = in_keys_iter.next()

929

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

930

931

class InputDone(Exception): pass

932

class FixedDone(Exception): pass

933

934

output = []

935

cur_out = []

936

937

# TODO: Another possibility is that rather than iterating on each side,

938

# we could use a combination of bisecting and iterating. For

939

# example, while cur_in_key < fixed_key, bisect to find its

940

# point, then iterate all matching keys, then bisect (restricted

941

# to only the remainder) for the next one, etc.

942

try:

943

while True:

944

if cur_in_key < cur_fixed_key:

945

cur_keys = []

946

cur_out = (cur_fixed_offset, cur_keys)

947

output.append(cur_out)

948

while cur_in_key < cur_fixed_key:

949

cur_keys.append(cur_in_key)

950

try:

951

cur_in_key = in_keys_iter.next()

952

except StopIteration:

953

raise InputDone

954

# At this point cur_in_key must be >= cur_fixed_key

955

# step the cur_fixed_key until we pass the cur key, or walk off

956

# the end

957

while cur_in_key >= cur_fixed_key:

958

try:

959

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

960

except StopIteration:

961

raise FixedDone

962

except InputDone:

963

# We consumed all of the input, nothing more to do

964

pass

965

except FixedDone:

966

# There was some input left, but we consumed all of fixed, so we

967

# have to add one more for the tail

968

cur_keys = [cur_in_key]

969

cur_keys.extend(in_keys_iter)

970

cur_out = (len(fixed_keys), cur_keys)

971

output.append(cur_out)

972

return output

973

974

def iter_entries(self, keys):

975

"""Iterate over keys within the index.

976

977

:param keys: An iterable providing the keys to be retrieved.

978

:return: An iterable as per iter_all_entries, but restricted to the

979

keys supplied. No additional keys will be returned, and every

980

key supplied that is in the index will be returned.

981

"""

982

# 6 seconds spent in miss_torture using the sorted() line.

983

# Even with out of order disk IO it seems faster not to sort it when

984

# large queries are being made.

985

# However, now that we are doing multi-way bisecting, we need the keys

986

# in sorted order anyway. We could change the multi-way code to not

987

# require sorted order. (For example, it bisects for the first node,

988

# does an in-order search until a key comes before the current point,

989

# which it then bisects for, etc.)

990

keys = frozenset(keys)

991

if not keys:

992

return

993

994

if not self.key_count():

995

return

996

997

needed_keys = []

998

if self._leaf_value_cache is None:

999

needed_keys = keys

1000

else:

1001

for key in keys:

1002

value = self._leaf_value_cache.get(key, None)

1003

if value is not None:

1004

# This key is known not to be here, skip it

1005

value, refs = value

1006

if self.node_ref_lists:

1007

yield (self, key, value, refs)

1008

else:

1009

yield (self, key, value)

1010

else:

1011

needed_keys.append(key)

1012

1013

last_key = None

1014

needed_keys = keys

1015

if not needed_keys:

1016

return

1017

# 6 seconds spent in miss_torture using the sorted() line.

1018

# Even with out of order disk IO it seems faster not to sort it when

1019

# large queries are being made.

1020

needed_keys = sorted(needed_keys)

1021

1022

nodes_and_keys = [(0, needed_keys)]

1023

1024

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

1025

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1026

nodes = self._get_internal_nodes(node_indexes)

1027

1028

next_nodes_and_keys = []

1029

for node_index, sub_keys in nodes_and_keys:

1030

node = nodes[node_index]

1031

positions = self._multi_bisect_right(sub_keys, node.keys)

1032

node_offset = next_row_start + node.offset

1033

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

1034

for pos, s_keys in positions])

1035

nodes_and_keys = next_nodes_and_keys

1036

# We should now be at the _LeafNodes

1037

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1038

1039

# TODO: We may *not* want to always read all the nodes in one

1040

# big go. Consider setting a max size on this.

1041

1042

nodes = self._get_leaf_nodes(node_indexes)

1043

for node_index, sub_keys in nodes_and_keys:

1044

if not sub_keys:

1045

continue

1046

node = nodes[node_index]

1047

for next_sub_key in sub_keys:

1048

if next_sub_key in node.keys:

1049

value, refs = node.keys[next_sub_key]

1050

if self.node_ref_lists:

1051

yield (self, next_sub_key, value, refs)

1052

else:

1053

yield (self, next_sub_key, value)

1054

1055

def iter_entries_prefix(self, keys):

1056

"""Iterate over keys within the index using prefix matching.

1057

1058

Prefix matching is applied within the tuple of a key, not to within

1059

the bytestring of each key element. e.g. if you have the keys ('foo',

1060

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1061

only the former key is returned.

1062

1063

WARNING: Note that this method currently causes a full index parse

1064

unconditionally (which is reasonably appropriate as it is a means for

1065

thunking many small indices into one larger one and still supplies

1066

iter_all_entries at the thunk layer).

1067

1068

:param keys: An iterable providing the key prefixes to be retrieved.

1069

Each key prefix takes the form of a tuple the length of a key, but

1070

with the last N elements 'None' rather than a regular bytestring.

1071

The first element cannot be 'None'.

1072

:return: An iterable as per iter_all_entries, but restricted to the

1073

keys with a matching prefix to those supplied. No additional keys

1074

will be returned, and every match that is in the index will be

1075

returned.

1076

"""

1077

keys = sorted(set(keys))

1078

if not keys:

1079

return

1080

# Load if needed to check key lengths

1081

if self._key_count is None:

1082

self._get_root_node()

1083

# TODO: only access nodes that can satisfy the prefixes we are looking

1084

# for. For now, to meet API usage (as this function is not used by

1085

# current bzrlib) just suck the entire index and iterate in memory.

1086

nodes = {}

1087

if self.node_ref_lists:

1088

if self._key_length == 1:

1089

for _1, key, value, refs in self.iter_all_entries():

1090

nodes[key] = value, refs

1091

else:

1092

nodes_by_key = {}

1093

for _1, key, value, refs in self.iter_all_entries():

1094

key_value = key, value, refs

1095

# For a key of (foo, bar, baz) create

1096

# _nodes_by_key[foo][bar][baz] = key_value

1097

key_dict = nodes_by_key

1098

for subkey in key[:-1]:

1099

key_dict = key_dict.setdefault(subkey, {})

1100

key_dict[key[-1]] = key_value

1101

else:

1102

if self._key_length == 1:

1103

for _1, key, value in self.iter_all_entries():

1104

nodes[key] = value

1105

else:

1106

nodes_by_key = {}

1107

for _1, key, value in self.iter_all_entries():

1108

key_value = key, value

1109

# For a key of (foo, bar, baz) create

1110

# _nodes_by_key[foo][bar][baz] = key_value

1111

key_dict = nodes_by_key

1112

for subkey in key[:-1]:

1113

key_dict = key_dict.setdefault(subkey, {})

1114

key_dict[key[-1]] = key_value

1115

if self._key_length == 1:

1116

for key in keys:

1117

# sanity check

1118

if key[0] is None:

1119

raise errors.BadIndexKey(key)

1120

if len(key) != self._key_length:

1121

raise errors.BadIndexKey(key)

1122

try:

1123

if self.node_ref_lists:

1124

value, node_refs = nodes[key]

1125

yield self, key, value, node_refs

1126

else:

1127

yield self, key, nodes[key]

1128

except KeyError:

1129

pass

1130

return

1131

for key in keys:

1132

# sanity check

1133

if key[0] is None:

1134

raise errors.BadIndexKey(key)

1135

if len(key) != self._key_length:

1136

raise errors.BadIndexKey(key)

1137

# find what it refers to:

1138

key_dict = nodes_by_key

1139

elements = list(key)

1140

# find the subdict whose contents should be returned.

1141

try:

1142

while len(elements) and elements[0] is not None:

1143

key_dict = key_dict[elements[0]]

1144

elements.pop(0)

1145

except KeyError:

1146

# a non-existant lookup.

1147

continue

1148

if len(elements):

1149

dicts = [key_dict]

1150

while dicts:

1151

key_dict = dicts.pop(-1)

1152

# can't be empty or would not exist

1153

item, value = key_dict.iteritems().next()

1154

if type(value) == dict:

1155

# push keys

1156

dicts.extend(key_dict.itervalues())

1157

else:

1158

# yield keys

1159

for value in key_dict.itervalues():

1160

# each value is the key:value:node refs tuple

1161

# ready to yield.

1162

yield (self, ) + value

1163

else:

1164

# the last thing looked up was a terminal element

1165

yield (self, ) + key_dict

1166

1167

def key_count(self):

1168

"""Return an estimate of the number of keys in this index.

1169

1170

For BTreeGraphIndex the estimate is exact as it is contained in the

1171

header.

1172

"""

1173

if self._key_count is None:

1174

self._get_root_node()

1175

return self._key_count

1176

1177

def _compute_row_offsets(self):

1178

"""Fill out the _row_offsets attribute based on _row_lengths."""

1179

offsets = []

1180

row_offset = 0

1181

for row in self._row_lengths:

1182

offsets.append(row_offset)

1183

row_offset += row

1184

offsets.append(row_offset)

1185

self._row_offsets = offsets

1186

1187

def _parse_header_from_bytes(self, bytes):

1188

"""Parse the header from a region of bytes.

1189

1190

:param bytes: The data to parse.

1191

:return: An offset, data tuple such as readv yields, for the unparsed

1192

data. (which may be of length 0).

1193

"""

1194

signature = bytes[0:len(self._signature())]

1195

if not signature == self._signature():

1196

raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)

1197

lines = bytes[len(self._signature()):].splitlines()

1198

options_line = lines[0]

1199

if not options_line.startswith(_OPTION_NODE_REFS):

1200

raise errors.BadIndexOptions(self)

1201

try:

1202

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

1203

except ValueError:

1204

raise errors.BadIndexOptions(self)

1205

options_line = lines[1]

1206

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

1207

raise errors.BadIndexOptions(self)

1208

try:

1209

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

1210

except ValueError:

1211

raise errors.BadIndexOptions(self)

1212

options_line = lines[2]

1213

if not options_line.startswith(_OPTION_LEN):

1214

raise errors.BadIndexOptions(self)

1215

try:

1216

self._key_count = int(options_line[len(_OPTION_LEN):])

1217

except ValueError:

1218

raise errors.BadIndexOptions(self)

1219

options_line = lines[3]

1220

if not options_line.startswith(_OPTION_ROW_LENGTHS):

1221

raise errors.BadIndexOptions(self)

1222

try:

1223

self._row_lengths = map(int, [length for length in

1224

options_line[len(_OPTION_ROW_LENGTHS):].split(',')

1225

if len(length)])

1226

except ValueError:

1227

raise errors.BadIndexOptions(self)

1228

self._compute_row_offsets()

1229

1230

# calculate the bytes we have processed

1231

header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)

1232

return header_end, bytes[header_end:]

1233

1234

def _read_nodes(self, nodes):

1235

"""Read some nodes from disk into the LRU cache.

1236

1237

This performs a readv to get the node data into memory, and parses each

1238

node, the yields it to the caller. The nodes are requested in the

1239

supplied order. If possible doing sort() on the list before requesting

1240

a read may improve performance.

1241

1242

:param nodes: The nodes to read. 0 - first node, 1 - second node etc.

1243

:return: None

1244

"""

1245

ranges = []

1246

for index in nodes:

1247

offset = index * _PAGE_SIZE

1248

size = _PAGE_SIZE

1249

if index == 0:

1250

# Root node - special case

1251

if self._size:

1252

size = min(_PAGE_SIZE, self._size)

1253

else:

1254

stream = self._transport.get(self._name)

1255

start = stream.read(_PAGE_SIZE)

1256

# Avoid doing this again

1257

self._size = len(start)

1258

size = min(_PAGE_SIZE, self._size)

1259

else:

1260

if offset > self._size:

1261

raise AssertionError('tried to read past the end'

1262

' of the file %s > %s'

1263

% (offset, self._size))

1264

size = min(size, self._size - offset)

1265

ranges.append((offset, size))

1266

if not ranges:

1267

return

1268

if self._file is None:

1269

data_ranges = self._transport.readv(self._name, ranges)

1270

else:

1271

data_ranges = []

1272

for offset, size in ranges:

1273

self._file.seek(offset)

1274

data_ranges.append((offset, self._file.read(size)))

1275

for offset, data in data_ranges:

1276

if offset == 0:

1277

# extract the header

1278

offset, data = self._parse_header_from_bytes(data)

1279

if len(data) == 0:

1280

continue

1281

bytes = zlib.decompress(data)

1282

if bytes.startswith(_LEAF_FLAG):

1283

node = _LeafNode(bytes, self._key_length, self.node_ref_lists)

1284

elif bytes.startswith(_INTERNAL_FLAG):

1285

node = _InternalNode(bytes)

1286

else:

1287

raise AssertionError("Unknown node type for %r" % bytes)

1288

yield offset / _PAGE_SIZE, node

1289

1290

def _signature(self):

1291

"""The file signature for this index type."""

1292

return _BTSIGNATURE

1293

1294

def validate(self):

1295

"""Validate that everything in the index can be accessed."""

1296

# just read and parse every node.

1297

self._get_root_node()

1298

if len(self._row_lengths) > 1:

1299

start_node = self._row_offsets[1]

1300

else:

1301

# We shouldn't be reading anything anyway

1302

start_node = 1

1303

node_end = self._row_offsets[-1]

1304

for node in self._read_nodes(range(start_node, node_end)):

1305

pass

1306

1307

1308

try:

1309

from bzrlib import _btree_serializer_c as _btree_serializer

1310

except ImportError:

1311

from bzrlib import _btree_serializer_py as _btree_serializer

Older »