~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

Committer: Martin Pool
Date: 2005-07-22 18:05:47 UTC
Revision ID: mbp@sourcefrog.net-20050722180547-fbfa10a567eca667

- refactor imports and stats for hashcache

files added:
build-api

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

Makefile

bzr.ico

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/emacs

contrib/emacs/bzr-mode.el

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

bzrlib/bundle/__init__.py => bzrlib/changeset.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/branch_implementations/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_revisionspec.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/util/effbot/ => effbot/

bzrlib/util/elementtree/ => elementtree/

bzrlib/plugins/ => plugins/

bzrlib/tests/test_weave.py => tools/testweave.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/bash/bzr.simple

contrib/pwk

contrib/zsh/_bzr

elementtree/ElementTree.py

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""B+Tree indices"""

import array

import bisect

from bisect import bisect_right

from copy import deepcopy

import math

import struct

import tempfile

import zlib

from bzrlib import (

chunk_writer,

debug,

errors,

index,

lru_cache,

osutils,

trace,

)

from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN

from bzrlib.transport import get_transport

_BTSIGNATURE = "B+Tree Graph Index 2\n"

_OPTION_ROW_LENGTHS = "row_lengths="

_LEAF_FLAG = "type=leaf\n"

_INTERNAL_FLAG = "type=internal\n"

_INTERNAL_OFFSET = "offset="

_RESERVED_HEADER_BYTES = 120

_PAGE_SIZE = 4096

# 4K per page: 4MB - 1000 entries

_NODE_CACHE_SIZE = 1000

class _BuilderRow(object):

"""The stored state accumulated while writing out a row in the index.

:ivar spool: A temporary file used to accumulate nodes for this row

in the tree.

:ivar nodes: The count of nodes emitted so far.

"""

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

skipped_bytes = padding

self.spool.writelines(byte_lines)

remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE

if remainder != 0:

raise AssertionError("incorrect node length: %d, %d"

% (self.spool.tell(), remainder))

self.nodes += 1

self.writer = None

class _InternalBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out internal rows."""

def finish_node(self, pad=True):

if not pad:

raise AssertionError("Must pad internal nodes only.")

_BuilderRow.finish_node(self)

class _LeafBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out a leaf rows."""

100

class BTreeBuilder(index.GraphIndexBuilder):

101

"""A Builder for B+Tree based Graph indices.

102

103

The resulting graph has the structure:

104

105

_SIGNATURE OPTIONS NODES

106

_SIGNATURE := 'B+Tree Graph Index 1' NEWLINE

107

OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH

108

REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE

109

KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE

110

LENGTH := 'len=' DIGITS NEWLINE

111

ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)*

112

NODES := NODE_COMPRESSED*

113

NODE_COMPRESSED:= COMPRESSED_BYTES{4096}

114

NODE_RAW := INTERNAL | LEAF

115

INTERNAL := INTERNAL_FLAG POINTERS

116

LEAF := LEAF_FLAG ROWS

117

KEY_ELEMENT := Not-whitespace-utf8

118

KEY := KEY_ELEMENT (NULL KEY_ELEMENT)*

119

ROWS := ROW*

120

ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

121

ABSENT := 'a'

122

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

123

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

124

REFERENCE := KEY

125

VALUE := no-newline-no-null-bytes

126

"""

127

128

def __init__(self, reference_lists=0, key_elements=1, spill_at=100000):

129

"""See GraphIndexBuilder.__init__.

130

131

:param spill_at: Optional parameter controlling the maximum number

132

of nodes that BTreeBuilder will hold in memory.

133

"""

134

index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,

135

key_elements=key_elements)

136

self._spill_at = spill_at

137

self._backing_indices = []

138

# A map of {key: (node_refs, value)}

139

self._nodes = {}

140

# Indicate it hasn't been built yet

141

self._nodes_by_key = None

142

143

def add_node(self, key, value, references=()):

144

"""Add a node to the index.

145

146

If adding the node causes the builder to reach its spill_at threshold,

147

disk spilling will be triggered.

148

149

:param key: The key. keys are non-empty tuples containing

150

as many whitespace-free utf8 bytestrings as the key length

151

defined for this index.

152

:param references: An iterable of iterables of keys. Each is a

153

reference to another key.

154

:param value: The value to associate with the key. It may be any

155

bytes as long as it does not contain \0 or \n.

156

"""

157

# we don't care about absent_references

158

node_refs, _ = self._check_key_ref_value(key, references, value)

159

if key in self._nodes:

160

raise errors.BadIndexDuplicateKey(key, self)

161

self._nodes[key] = (node_refs, value)

162

self._keys.add(key)

163

if self._nodes_by_key is not None and self._key_length > 1:

164

self._update_nodes_by_key(key, value, node_refs)

165

if len(self._keys) < self._spill_at:

166

return

167

self._spill_mem_keys_to_disk()

168

169

def _spill_mem_keys_to_disk(self):

170

"""Write the in memory keys down to disk to cap memory consumption.

171

172

If we already have some keys written to disk, we will combine them so

173

as to preserve the sorted order. The algorithm for combining uses

174

powers of two. So on the first spill, write all mem nodes into a

175

single index. On the second spill, combine the mem nodes with the nodes

176

on disk to create a 2x sized disk index and get rid of the first index.

177

On the third spill, create a single new disk index, which will contain

178

the mem nodes, and preserve the existing 2x sized index. On the fourth,

179

combine mem with the first and second indexes, creating a new one of

180

size 4x. On the fifth create a single new one, etc.

181

"""

182

iterators_to_combine = [self._iter_mem_nodes()]

183

pos = -1

184

for pos, backing in enumerate(self._backing_indices):

185

if backing is None:

186

pos -= 1

187

break

188

iterators_to_combine.append(backing.iter_all_entries())

189

backing_pos = pos + 1

190

new_backing_file, size = \

191

self._write_nodes(self._iter_smallest(iterators_to_combine))

192

dir_path, base_name = osutils.split(new_backing_file.name)

193

# Note: The transport here isn't strictly needed, because we will use

194

# direct access to the new_backing._file object

195

new_backing = BTreeGraphIndex(get_transport(dir_path),

196

base_name, size)

197

# GC will clean up the file

198

new_backing._file = new_backing_file

199

if len(self._backing_indices) == backing_pos:

200

self._backing_indices.append(None)

201

self._backing_indices[backing_pos] = new_backing

202

for pos in range(backing_pos):

203

self._backing_indices[pos] = None

204

self._keys = set()

205

self._nodes = {}

206

self._nodes_by_key = None

207

208

def add_nodes(self, nodes):

209

"""Add nodes to the index.

210

211

:param nodes: An iterable of (key, node_refs, value) entries to add.

212

"""

213

if self.reference_lists:

214

for (key, value, node_refs) in nodes:

215

self.add_node(key, value, node_refs)

216

else:

217

for (key, value) in nodes:

218

self.add_node(key, value)

219

220

def _iter_mem_nodes(self):

221

"""Iterate over the nodes held in memory."""

222

nodes = self._nodes

223

if self.reference_lists:

224

for key in sorted(nodes):

225

references, value = nodes[key]

226

yield self, key, value, references

227

else:

228

for key in sorted(nodes):

229

references, value = nodes[key]

230

yield self, key, value

231

232

def _iter_smallest(self, iterators_to_combine):

233

if len(iterators_to_combine) == 1:

234

for value in iterators_to_combine[0]:

235

yield value

236

return

237

current_values = []

238

for iterator in iterators_to_combine:

239

try:

240

current_values.append(iterator.next())

241

except StopIteration:

242

current_values.append(None)

243

last = None

244

while True:

245

# Decorate candidates with the value to allow 2.4's min to be used.

246

candidates = [(item[1][1], item) for item

247

in enumerate(current_values) if item[1] is not None]

248

if not len(candidates):

249

return

250

selected = min(candidates)

251

# undecorate back to (pos, node)

252

selected = selected[1]

253

if last == selected[1][1]:

254

raise errors.BadIndexDuplicateKey(last, self)

255

last = selected[1][1]

256

# Yield, with self as the index

257

yield (self,) + selected[1][1:]

258

pos = selected[0]

259

try:

260

current_values[pos] = iterators_to_combine[pos].next()

261

except StopIteration:

262

current_values[pos] = None

263

264

def _add_key(self, string_key, line, rows):

265

"""Add a key to the current chunk.

266

267

:param string_key: The key to add.

268

:param line: The fully serialised key and value.

269

"""

270

if rows[-1].writer is None:

271

# opening a new leaf chunk;

272

for pos, internal_row in enumerate(rows[:-1]):

273

# flesh out any internal nodes that are needed to

274

# preserve the height of the tree

275

if internal_row.writer is None:

276

length = _PAGE_SIZE

277

if internal_row.nodes == 0:

278

length -= _RESERVED_HEADER_BYTES # padded

279

internal_row.writer = chunk_writer.ChunkWriter(length, 0)

280

internal_row.writer.write(_INTERNAL_FLAG)

281

internal_row.writer.write(_INTERNAL_OFFSET +

282

str(rows[pos + 1].nodes) + "\n")

283

# add a new leaf

284

length = _PAGE_SIZE

285

if rows[-1].nodes == 0:

286

length -= _RESERVED_HEADER_BYTES # padded

287

rows[-1].writer = chunk_writer.ChunkWriter(length)

288

rows[-1].writer.write(_LEAF_FLAG)

289

if rows[-1].writer.write(line):

290

# this key did not fit in the node:

291

rows[-1].finish_node()

292

key_line = string_key + "\n"

293

new_row = True

294

for row in reversed(rows[:-1]):

295

# Mark the start of the next node in the node above. If it

296

# doesn't fit then propogate upwards until we find one that

297

# it does fit into.

298

if row.writer.write(key_line):

299

row.finish_node()

300

else:

301

# We've found a node that can handle the pointer.

302

new_row = False

303

break

304

# If we reached the current root without being able to mark the

305

# division point, then we need a new root:

306

if new_row:

307

# We need a new row

308

if 'index' in debug.debug_flags:

309

trace.mutter('Inserting new global row.')

310

new_row = _InternalBuilderRow()

311

reserved_bytes = 0

312

rows.insert(0, new_row)

313

# This will be padded, hence the -100

314

new_row.writer = chunk_writer.ChunkWriter(

315

_PAGE_SIZE - _RESERVED_HEADER_BYTES,

316

reserved_bytes)

317

new_row.writer.write(_INTERNAL_FLAG)

318

new_row.writer.write(_INTERNAL_OFFSET +

319

str(rows[1].nodes - 1) + "\n")

320

new_row.writer.write(key_line)

321

self._add_key(string_key, line, rows)

322

323

def _write_nodes(self, node_iterator):

324

"""Write node_iterator out as a B+Tree.

325

326

:param node_iterator: An iterator of sorted nodes. Each node should

327

match the output given by iter_all_entries.

328

:return: A file handle for a temporary file containing a B+Tree for

329

the nodes.

330

"""

331

# The index rows - rows[0] is the root, rows[1] is the layer under it

332

# etc.

333

rows = []

334

# forward sorted by key. In future we may consider topological sorting,

335

# at the cost of table scans for direct lookup, or a second index for

336

# direct lookup

337

key_count = 0

338

# A stack with the number of nodes of each size. 0 is the root node

339

# and must always be 1 (if there are any nodes in the tree).

340

self.row_lengths = []

341

# Loop over all nodes adding them to the bottom row

342

# (rows[-1]). When we finish a chunk in a row,

343

# propogate the key that didn't fit (comes after the chunk) to the

344

# row above, transitively.

345

for node in node_iterator:

346

if key_count == 0:

347

# First key triggers the first row

348

rows.append(_LeafBuilderRow())

349

key_count += 1

350

# TODO: Flattening the node into a string key and a line should

351

# probably be put into a pyrex function. We can do a quick

352

# iter over all the entries to determine the final length,

353

# and then do a single malloc() rather than lots of

354

# intermediate mallocs as we build everything up.

355

# ATM 3 / 13s are spent flattening nodes (10s is compressing)

356

string_key, line = _btree_serializer._flatten_node(node,

357

self.reference_lists)

358

self._add_key(string_key, line, rows)

359

for row in reversed(rows):

360

pad = (type(row) != _LeafBuilderRow)

361

row.finish_node(pad=pad)

362

result = tempfile.NamedTemporaryFile()

363

lines = [_BTSIGNATURE]

364

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

365

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

366

lines.append(_OPTION_LEN + str(key_count) + '\n')

367

row_lengths = [row.nodes for row in rows]

368

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

369

result.writelines(lines)

370

position = sum(map(len, lines))

371

root_row = True

372

if position > _RESERVED_HEADER_BYTES:

373

raise AssertionError("Could not fit the header in the"

374

" reserved space: %d > %d"

375

% (position, _RESERVED_HEADER_BYTES))

376

# write the rows out:

377

for row in rows:

378

reserved = _RESERVED_HEADER_BYTES # reserved space for first node

379

row.spool.flush()

380

row.spool.seek(0)

381

# copy nodes to the finalised file.

382

# Special case the first node as it may be prefixed

383

node = row.spool.read(_PAGE_SIZE)

384

result.write(node[reserved:])

385

result.write("\x00" * (reserved - position))

386

position = 0 # Only the root row actually has an offset

387

copied_len = osutils.pumpfile(row.spool, result)

388

if copied_len != (row.nodes - 1) * _PAGE_SIZE:

389

if type(row) != _LeafBuilderRow:

390

raise AssertionError("Incorrect amount of data copied"

391

" expected: %d, got: %d"

392

% ((row.nodes - 1) * _PAGE_SIZE,

393

copied_len))

394

result.flush()

395

size = result.tell()

396

result.seek(0)

397

return result, size

398

399

def finish(self):

400

"""Finalise the index.

401

402

:return: A file handle for a temporary file containing the nodes added

403

to the index.

404

"""

405

return self._write_nodes(self.iter_all_entries())[0]

406

407

def iter_all_entries(self):

408

"""Iterate over all keys within the index

409

410

:return: An iterable of (index, key, reference_lists, value). There is no

411

defined order for the result iteration - it will be in the most

412

efficient order for the index (in this case dictionary hash order).

413

"""

414

if 'evil' in debug.debug_flags:

415

trace.mutter_callsite(3,

416

"iter_all_entries scales with size of history.")

417

# Doing serial rather than ordered would be faster; but this shouldn't

418

# be getting called routinely anyway.

419

iterators = [self._iter_mem_nodes()]

420

for backing in self._backing_indices:

421

if backing is not None:

422

iterators.append(backing.iter_all_entries())

423

if len(iterators) == 1:

424

return iterators[0]

425

return self._iter_smallest(iterators)

426

427

def iter_entries(self, keys):

428

"""Iterate over keys within the index.

429

430

:param keys: An iterable providing the keys to be retrieved.

431

:return: An iterable of (index, key, value, reference_lists). There is no

432

defined order for the result iteration - it will be in the most

433

efficient order for the index (keys iteration order in this case).

434

"""

435

keys = set(keys)

436

if self.reference_lists:

437

for key in keys.intersection(self._keys):

438

node = self._nodes[key]

439

yield self, key, node[1], node[0]

440

else:

441

for key in keys.intersection(self._keys):

442

node = self._nodes[key]

443

yield self, key, node[1]

444

keys.difference_update(self._keys)

445

for backing in self._backing_indices:

446

if backing is None:

447

continue

448

if not keys:

449

return

450

for node in backing.iter_entries(keys):

451

keys.remove(node[1])

452

yield (self,) + node[1:]

453

454

def iter_entries_prefix(self, keys):

455

"""Iterate over keys within the index using prefix matching.

456

457

Prefix matching is applied within the tuple of a key, not to within

458

the bytestring of each key element. e.g. if you have the keys ('foo',

459

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

460

only the former key is returned.

461

462

:param keys: An iterable providing the key prefixes to be retrieved.

463

Each key prefix takes the form of a tuple the length of a key, but

464

with the last N elements 'None' rather than a regular bytestring.

465

The first element cannot be 'None'.

466

:return: An iterable as per iter_all_entries, but restricted to the

467

keys with a matching prefix to those supplied. No additional keys

468

will be returned, and every match that is in the index will be

469

returned.

470

"""

471

# XXX: To much duplication with the GraphIndex class; consider finding

472

# a good place to pull out the actual common logic.

473

keys = set(keys)

474

if not keys:

475

return

476

for backing in self._backing_indices:

477

if backing is None:

478

continue

479

for node in backing.iter_entries_prefix(keys):

480

yield (self,) + node[1:]

481

if self._key_length == 1:

482

for key in keys:

483

# sanity check

484

if key[0] is None:

485

raise errors.BadIndexKey(key)

486

if len(key) != self._key_length:

487

raise errors.BadIndexKey(key)

488

try:

489

node = self._nodes[key]

490

except KeyError:

491

continue

492

if self.reference_lists:

493

yield self, key, node[1], node[0]

494

else:

495

yield self, key, node[1]

496

return

497

for key in keys:

498

# sanity check

499

if key[0] is None:

500

raise errors.BadIndexKey(key)

501

if len(key) != self._key_length:

502

raise errors.BadIndexKey(key)

503

# find what it refers to:

504

key_dict = self._get_nodes_by_key()

505

elements = list(key)

506

# find the subdict to return

507

try:

508

while len(elements) and elements[0] is not None:

509

key_dict = key_dict[elements[0]]

510

elements.pop(0)

511

except KeyError:

512

# a non-existant lookup.

513

continue

514

if len(elements):

515

dicts = [key_dict]

516

while dicts:

517

key_dict = dicts.pop(-1)

518

# can't be empty or would not exist

519

item, value = key_dict.iteritems().next()

520

if type(value) == dict:

521

# push keys

522

dicts.extend(key_dict.itervalues())

523

else:

524

# yield keys

525

for value in key_dict.itervalues():

526

yield (self, ) + value

527

else:

528

yield (self, ) + key_dict

529

530

def _get_nodes_by_key(self):

531

if self._nodes_by_key is None:

532

nodes_by_key = {}

533

if self.reference_lists:

534

for key, (references, value) in self._nodes.iteritems():

535

key_dict = nodes_by_key

536

for subkey in key[:-1]:

537

key_dict = key_dict.setdefault(subkey, {})

538

key_dict[key[-1]] = key, value, references

539

else:

540

for key, (references, value) in self._nodes.iteritems():

541

key_dict = nodes_by_key

542

for subkey in key[:-1]:

543

key_dict = key_dict.setdefault(subkey, {})

544

key_dict[key[-1]] = key, value

545

self._nodes_by_key = nodes_by_key

546

return self._nodes_by_key

547

548

def key_count(self):

549

"""Return an estimate of the number of keys in this index.

550

551

For InMemoryGraphIndex the estimate is exact.

552

"""

553

return len(self._keys) + sum(backing.key_count() for backing in

554

self._backing_indices if backing is not None)

555

556

def validate(self):

557

"""In memory index's have no known corruption at the moment."""

558

559

560

class _LeafNode(object):

561

"""A leaf node for a serialised B+Tree index."""

562

563

def __init__(self, bytes, key_length, ref_list_length):

564

"""Parse bytes to create a leaf node object."""

565

# splitlines mangles the \r delimiters.. don't use it.

566

self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,

567

key_length, ref_list_length))

568

569

570

class _InternalNode(object):

571

"""An internal node for a serialised B+Tree index."""

572

573

def __init__(self, bytes):

574

"""Parse bytes to create an internal node object."""

575

# splitlines mangles the \r delimiters.. don't use it.

576

self.keys = self._parse_lines(bytes.split('\n'))

577

578

def _parse_lines(self, lines):

579

nodes = []

580

self.offset = int(lines[1][7:])

581

for line in lines[2:]:

582

if line == '':

583

break

584

nodes.append(tuple(line.split('\0')))

585

return nodes

586

587

588

class BTreeGraphIndex(object):

589

"""Access to nodes via the standard GraphIndex interface for B+Tree's.

590

591

Individual nodes are held in a LRU cache. This holds the root node in

592

memory except when very large walks are done.

593

"""

594

595

def __init__(self, transport, name, size):

596

"""Create a B+Tree index object on the index name.

597

598

:param transport: The transport to read data for the index from.

599

:param name: The file name of the index on transport.

600

:param size: Optional size of the index in bytes. This allows

601

compatibility with the GraphIndex API, as well as ensuring that

602

the initial read (to read the root node header) can be done

603

without over-reading even on empty indices, and on small indices

604

allows single-IO to read the entire index.

605

"""

606

self._transport = transport

607

self._name = name

608

self._size = size

609

self._file = None

610

self._page_size = transport.recommended_page_size()

611

self._root_node = None

612

# Default max size is 100,000 leave values

613

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

614

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

615

self._internal_node_cache = lru_cache.LRUCache()

616

self._key_count = None

617

self._row_lengths = None

618

self._row_offsets = None # Start of each row, [-1] is the end

619

620

def __eq__(self, other):

621

"""Equal when self and other were created with the same parameters."""

622

return (

623

type(self) == type(other) and

624

self._transport == other._transport and

625

self._name == other._name and

626

self._size == other._size)

627

628

def __ne__(self, other):

629

return not self.__eq__(other)

630

631

def _get_root_node(self):

632

if self._root_node is None:

633

# We may not have a root node yet

634

nodes = list(self._read_nodes([0]))

635

if len(nodes):

636

self._root_node = nodes[0][1]

637

return self._root_node

638

639

def _cache_nodes(self, nodes, cache):

640

"""Read nodes and cache them in the lru.

641

642

The nodes list supplied is sorted and then read from disk, each node

643

being inserted it into the _node_cache.

644

645

Note: Asking for more nodes than the _node_cache can contain will

646

result in some of the results being immediately discarded, to prevent

647

this an assertion is raised if more nodes are asked for than are

648

cachable.

649

650

:return: A dict of {node_pos: node}

651

"""

652

if len(nodes) > cache._max_cache:

653

trace.mutter('Requesting %s > %s nodes, not all will be cached',

654

len(nodes), cache._max_cache)

655

found = {}

656

for node_pos, node in self._read_nodes(sorted(nodes)):

657

if node_pos == 0: # Special case

658

self._root_node = node

659

else:

660

cache.add(node_pos, node)

661

found[node_pos] = node

662

return found

663

664

def _get_nodes(self, cache, node_indexes):

665

found = {}

666

needed = []

667

for idx in node_indexes:

668

if idx == 0 and self._root_node is not None:

669

found[0] = self._root_node

670

continue

671

try:

672

found[idx] = cache[idx]

673

except KeyError:

674

needed.append(idx)

675

found.update(self._cache_nodes(needed, cache))

676

return found

677

678

def _get_internal_nodes(self, node_indexes):

679

"""Get a node, from cache or disk.

680

681

After getting it, the node will be cached.

682

"""

683

return self._get_nodes(self._internal_node_cache, node_indexes)

684

685

def _get_leaf_nodes(self, node_indexes):

686

"""Get a bunch of nodes, from cache or disk."""

687

found = self._get_nodes(self._leaf_node_cache, node_indexes)

688

if self._leaf_value_cache is not None:

689

for node in found.itervalues():

690

for key, value in node.keys.iteritems():

691

if key in self._leaf_value_cache:

692

# Don't add the rest of the keys, we've seen this node

693

# before.

694

break

695

self._leaf_value_cache[key] = value

696

return found

697

698

def iter_all_entries(self):

699

"""Iterate over all keys within the index.

700

701

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

702

The former tuple is used when there are no reference lists in the

703

index, making the API compatible with simple key:value index types.

704

There is no defined order for the result iteration - it will be in

705

the most efficient order for the index.

706

"""

707

if 'evil' in debug.debug_flags:

708

trace.mutter_callsite(3,

709

"iter_all_entries scales with size of history.")

710

if not self.key_count():

711

return

712

start_of_leaves = self._row_offsets[-2]

713

end_of_leaves = self._row_offsets[-1]

714

needed_nodes = range(start_of_leaves, end_of_leaves)

715

# We iterate strictly in-order so that we can use this function

716

# for spilling index builds to disk.

717

if self.node_ref_lists:

718

for _, node in self._read_nodes(needed_nodes):

719

for key, (value, refs) in sorted(node.keys.items()):

720

yield (self, key, value, refs)

721

else:

722

for _, node in self._read_nodes(needed_nodes):

723

for key, (value, refs) in sorted(node.keys.items()):

724

yield (self, key, value)

725

726

@staticmethod

727

def _multi_bisect_right(in_keys, fixed_keys):

728

"""Find the positions where each 'in_key' would fit in fixed_keys.

729

730

This is equivalent to doing "bisect_right" on each in_key into

731

fixed_keys

732

733

:param in_keys: A sorted list of keys to match with fixed_keys

734

:param fixed_keys: A sorted list of keys to match against

735

:return: A list of (integer position, [key list]) tuples.

736

"""

737

if not in_keys:

738

return []

739

if not fixed_keys:

740

# no pointers in the fixed_keys list, which means everything must

741

# fall to the left.

742

return [(0, in_keys)]

743

744

# TODO: Iterating both lists will generally take M + N steps

745

# Bisecting each key will generally take M * log2 N steps.

746

# If we had an efficient way to compare, we could pick the method

747

# based on which has the fewer number of steps.

748

# There is also the argument that bisect_right is a compiled

749

# function, so there is even more to be gained.

750

# iter_steps = len(in_keys) + len(fixed_keys)

751

# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)

752

if len(in_keys) == 1: # Bisect will always be faster for M = 1

753

return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]

754

# elif bisect_steps < iter_steps:

755

# offsets = {}

756

# for key in in_keys:

757

# offsets.setdefault(bisect_right(fixed_keys, key),

758

# []).append(key)

759

# return [(o, offsets[o]) for o in sorted(offsets)]

760

in_keys_iter = iter(in_keys)

761

fixed_keys_iter = enumerate(fixed_keys)

762

cur_in_key = in_keys_iter.next()

763

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

764

765

class InputDone(Exception): pass

766

class FixedDone(Exception): pass

767

768

output = []

769

cur_out = []

770

771

# TODO: Another possibility is that rather than iterating on each side,

772

# we could use a combination of bisecting and iterating. For

773

# example, while cur_in_key < fixed_key, bisect to find its

774

# point, then iterate all matching keys, then bisect (restricted

775

# to only the remainder) for the next one, etc.

776

try:

777

while True:

778

if cur_in_key < cur_fixed_key:

779

cur_keys = []

780

cur_out = (cur_fixed_offset, cur_keys)

781

output.append(cur_out)

782

while cur_in_key < cur_fixed_key:

783

cur_keys.append(cur_in_key)

784

try:

785

cur_in_key = in_keys_iter.next()

786

except StopIteration:

787

raise InputDone

788

# At this point cur_in_key must be >= cur_fixed_key

789

# step the cur_fixed_key until we pass the cur key, or walk off

790

# the end

791

while cur_in_key >= cur_fixed_key:

792

try:

793

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

794

except StopIteration:

795

raise FixedDone

796

except InputDone:

797

# We consumed all of the input, nothing more to do

798

pass

799

except FixedDone:

800

# There was some input left, but we consumed all of fixed, so we

801

# have to add one more for the tail

802

cur_keys = [cur_in_key]

803

cur_keys.extend(in_keys_iter)

804

cur_out = (len(fixed_keys), cur_keys)

805

output.append(cur_out)

806

return output

807

808

def iter_entries(self, keys):

809

"""Iterate over keys within the index.

810

811

:param keys: An iterable providing the keys to be retrieved.

812

:return: An iterable as per iter_all_entries, but restricted to the

813

keys supplied. No additional keys will be returned, and every

814

key supplied that is in the index will be returned.

815

"""

816

# 6 seconds spent in miss_torture using the sorted() line.

817

# Even with out of order disk IO it seems faster not to sort it when

818

# large queries are being made.

819

# However, now that we are doing multi-way bisecting, we need the keys

820

# in sorted order anyway. We could change the multi-way code to not

821

# require sorted order. (For example, it bisects for the first node,

822

# does an in-order search until a key comes before the current point,

823

# which it then bisects for, etc.)

824

keys = frozenset(keys)

825

if not keys:

826

return

827

828

if not self.key_count():

829

return

830

831

needed_keys = []

832

if self._leaf_value_cache is None:

833

needed_keys = keys

834

else:

835

for key in keys:

836

value = self._leaf_value_cache.get(key, None)

837

if value is not None:

838

# This key is known not to be here, skip it

839

value, refs = value

840

if self.node_ref_lists:

841

yield (self, key, value, refs)

842

else:

843

yield (self, key, value)

844

else:

845

needed_keys.append(key)

846

847

last_key = None

848

needed_keys = keys

849

if not needed_keys:

850

return

851

# 6 seconds spent in miss_torture using the sorted() line.

852

# Even with out of order disk IO it seems faster not to sort it when

853

# large queries are being made.

854

needed_keys = sorted(needed_keys)

855

856

nodes_and_keys = [(0, needed_keys)]

857

858

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

859

node_indexes = [idx for idx, s_keys in nodes_and_keys]

860

nodes = self._get_internal_nodes(node_indexes)

861

862

next_nodes_and_keys = []

863

for node_index, sub_keys in nodes_and_keys:

864

node = nodes[node_index]

865

positions = self._multi_bisect_right(sub_keys, node.keys)

866

node_offset = next_row_start + node.offset

867

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

868

for pos, s_keys in positions])

869

nodes_and_keys = next_nodes_and_keys

870

# We should now be at the _LeafNodes

871

node_indexes = [idx for idx, s_keys in nodes_and_keys]

872

873

# TODO: We may *not* want to always read all the nodes in one

874

# big go. Consider setting a max size on this.

875

876

nodes = self._get_leaf_nodes(node_indexes)

877

for node_index, sub_keys in nodes_and_keys:

878

if not sub_keys:

879

continue

880

node = nodes[node_index]

881

for next_sub_key in sub_keys:

882

if next_sub_key in node.keys:

883

value, refs = node.keys[next_sub_key]

884

if self.node_ref_lists:

885

yield (self, next_sub_key, value, refs)

886

else:

887

yield (self, next_sub_key, value)

888

889

def iter_entries_prefix(self, keys):

890

"""Iterate over keys within the index using prefix matching.

891

892

Prefix matching is applied within the tuple of a key, not to within

893

the bytestring of each key element. e.g. if you have the keys ('foo',

894

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

895

only the former key is returned.

896

897

WARNING: Note that this method currently causes a full index parse

898

unconditionally (which is reasonably appropriate as it is a means for

899

thunking many small indices into one larger one and still supplies

900

iter_all_entries at the thunk layer).

901

902

:param keys: An iterable providing the key prefixes to be retrieved.

903

Each key prefix takes the form of a tuple the length of a key, but

904

with the last N elements 'None' rather than a regular bytestring.

905

The first element cannot be 'None'.

906

:return: An iterable as per iter_all_entries, but restricted to the

907

keys with a matching prefix to those supplied. No additional keys

908

will be returned, and every match that is in the index will be

909

returned.

910

"""

911

keys = sorted(set(keys))

912

if not keys:

913

return

914

# Load if needed to check key lengths

915

if self._key_count is None:

916

self._get_root_node()

917

# TODO: only access nodes that can satisfy the prefixes we are looking

918

# for. For now, to meet API usage (as this function is not used by

919

# current bzrlib) just suck the entire index and iterate in memory.

920

nodes = {}

921

if self.node_ref_lists:

922

if self._key_length == 1:

923

for _1, key, value, refs in self.iter_all_entries():

924

nodes[key] = value, refs

925

else:

926

nodes_by_key = {}

927

for _1, key, value, refs in self.iter_all_entries():

928

key_value = key, value, refs

929

# For a key of (foo, bar, baz) create

930

# _nodes_by_key[foo][bar][baz] = key_value

931

key_dict = nodes_by_key

932

for subkey in key[:-1]:

933

key_dict = key_dict.setdefault(subkey, {})

934

key_dict[key[-1]] = key_value

935

else:

936

if self._key_length == 1:

937

for _1, key, value in self.iter_all_entries():

938

nodes[key] = value

939

else:

940

nodes_by_key = {}

941

for _1, key, value in self.iter_all_entries():

942

key_value = key, value

943

# For a key of (foo, bar, baz) create

944

# _nodes_by_key[foo][bar][baz] = key_value

945

key_dict = nodes_by_key

946

for subkey in key[:-1]:

947

key_dict = key_dict.setdefault(subkey, {})

948

key_dict[key[-1]] = key_value

949

if self._key_length == 1:

950

for key in keys:

951

# sanity check

952

if key[0] is None:

953

raise errors.BadIndexKey(key)

954

if len(key) != self._key_length:

955

raise errors.BadIndexKey(key)

956

try:

957

if self.node_ref_lists:

958

value, node_refs = nodes[key]

959

yield self, key, value, node_refs

960

else:

961

yield self, key, nodes[key]

962

except KeyError:

963

pass

964

return

965

for key in keys:

966

# sanity check

967

if key[0] is None:

968

raise errors.BadIndexKey(key)

969

if len(key) != self._key_length:

970

raise errors.BadIndexKey(key)

971

# find what it refers to:

972

key_dict = nodes_by_key

973

elements = list(key)

974

# find the subdict whose contents should be returned.

975

try:

976

while len(elements) and elements[0] is not None:

977

key_dict = key_dict[elements[0]]

978

elements.pop(0)

979

except KeyError:

980

# a non-existant lookup.

981

continue

982

if len(elements):

983

dicts = [key_dict]

984

while dicts:

985

key_dict = dicts.pop(-1)

986

# can't be empty or would not exist

987

item, value = key_dict.iteritems().next()

988

if type(value) == dict:

989

# push keys

990

dicts.extend(key_dict.itervalues())

991

else:

992

# yield keys

993

for value in key_dict.itervalues():

994

# each value is the key:value:node refs tuple

995

# ready to yield.

996

yield (self, ) + value

997

else:

998

# the last thing looked up was a terminal element

999

yield (self, ) + key_dict

1000

1001

def key_count(self):

1002

"""Return an estimate of the number of keys in this index.

1003

1004

For BTreeGraphIndex the estimate is exact as it is contained in the

1005

header.

1006

"""

1007

if self._key_count is None:

1008

self._get_root_node()

1009

return self._key_count

1010

1011

def _parse_header_from_bytes(self, bytes):

1012

"""Parse the header from a region of bytes.

1013

1014

:param bytes: The data to parse.

1015

:return: An offset, data tuple such as readv yields, for the unparsed

1016

data. (which may be of length 0).

1017

"""

1018

signature = bytes[0:len(self._signature())]

1019

if not signature == self._signature():

1020

raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)

1021

lines = bytes[len(self._signature()):].splitlines()

1022

options_line = lines[0]

1023

if not options_line.startswith(_OPTION_NODE_REFS):

1024

raise errors.BadIndexOptions(self)

1025

try:

1026

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

1027

except ValueError:

1028

raise errors.BadIndexOptions(self)

1029

options_line = lines[1]

1030

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

1031

raise errors.BadIndexOptions(self)

1032

try:

1033

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

1034

except ValueError:

1035

raise errors.BadIndexOptions(self)

1036

options_line = lines[2]

1037

if not options_line.startswith(_OPTION_LEN):

1038

raise errors.BadIndexOptions(self)

1039

try:

1040

self._key_count = int(options_line[len(_OPTION_LEN):])

1041

except ValueError:

1042

raise errors.BadIndexOptions(self)

1043

options_line = lines[3]

1044

if not options_line.startswith(_OPTION_ROW_LENGTHS):

1045

raise errors.BadIndexOptions(self)

1046

try:

1047

self._row_lengths = map(int, [length for length in

1048

options_line[len(_OPTION_ROW_LENGTHS):].split(',')

1049

if len(length)])

1050

except ValueError:

1051

raise errors.BadIndexOptions(self)

1052

offsets = []

1053

row_offset = 0

1054

for row in self._row_lengths:

1055

offsets.append(row_offset)

1056

row_offset += row

1057

offsets.append(row_offset)

1058

self._row_offsets = offsets

1059

1060

# calculate the bytes we have processed

1061

header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)

1062

return header_end, bytes[header_end:]

1063

1064

def _read_nodes(self, nodes):

1065

"""Read some nodes from disk into the LRU cache.

1066

1067

This performs a readv to get the node data into memory, and parses each

1068

node, the yields it to the caller. The nodes are requested in the

1069

supplied order. If possible doing sort() on the list before requesting

1070

a read may improve performance.

1071

1072

:param nodes: The nodes to read. 0 - first node, 1 - second node etc.

1073

:return: None

1074

"""

1075

ranges = []

1076

for index in nodes:

1077

offset = index * _PAGE_SIZE

1078

size = _PAGE_SIZE

1079

if index == 0:

1080

# Root node - special case

1081

if self._size:

1082

size = min(_PAGE_SIZE, self._size)

1083

else:

1084

stream = self._transport.get(self._name)

1085

start = stream.read(_PAGE_SIZE)

1086

# Avoid doing this again

1087

self._size = len(start)

1088

size = min(_PAGE_SIZE, self._size)

1089

else:

1090

size = min(size, self._size - offset)

1091

ranges.append((offset, size))

1092

if not ranges:

1093

return

1094

if self._file is None:

1095

data_ranges = self._transport.readv(self._name, ranges)

1096

else:

1097

data_ranges = []

1098

for offset, size in ranges:

1099

self._file.seek(offset)

1100

data_ranges.append((offset, self._file.read(size)))

1101

for offset, data in data_ranges:

1102

if offset == 0:

1103

# extract the header

1104

offset, data = self._parse_header_from_bytes(data)

1105

if len(data) == 0:

1106

continue

1107

bytes = zlib.decompress(data)

1108

if bytes.startswith(_LEAF_FLAG):

1109

node = _LeafNode(bytes, self._key_length, self.node_ref_lists)

1110

elif bytes.startswith(_INTERNAL_FLAG):

1111

node = _InternalNode(bytes)

1112

else:

1113

raise AssertionError("Unknown node type for %r" % bytes)

1114

yield offset / _PAGE_SIZE, node

1115

1116

def _signature(self):

1117

"""The file signature for this index type."""

1118

return _BTSIGNATURE

1119

1120

def validate(self):

1121

"""Validate that everything in the index can be accessed."""

1122

# just read and parse every node.

1123

self._get_root_node()

1124

if len(self._row_lengths) > 1:

1125

start_node = self._row_offsets[1]

1126

else:

1127

# We shouldn't be reading anything anyway

1128

start_node = 1

1129

node_end = self._row_offsets[-1]

1130

for node in self._read_nodes(range(start_node, node_end)):

1131

pass

1132

1133

1134

try:

1135

from bzrlib import _btree_serializer_c as _btree_serializer

1136

except ImportError:

1137

from bzrlib import _btree_serializer_py as _btree_serializer

Older »