~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

Committer: Martin Pool
Date: 2007-04-04 01:22:11 UTC
mfrom: (2393.1.1 bzr.docs)
mto: This revision was merged to the branch mainline in revision 2397.
Revision ID: mbp@sourcefrog.net-20070404012211-sq269me6bai9m6xk

merge trunk and doc fix from elliot

files added:
build-api

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/xml6.py

doc/README.1st

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_pack.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v4.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/counted_lock.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory_delta.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/rules.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/switch.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/transport/brokenrename.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version_info_formats/format_custom.py

bzrlib/views.py

bzrlib/xml5.py

bzrlib/xml6.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/win32/build_release.py

tools/win32/run_script.py

files renamed:
doc/developers/HACKING.txt => HACKING

bzrlib/deprecated_graph.py => bzrlib/graph.py

bzrlib/help_topics/__init__.py => bzrlib/help_topics.py

bzrlib/_patiencediff_py.py => bzrlib/patiencediff.py

bzrlib/plugins/launchpad/lp_directory.py => bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_directory.py => bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/http_server.py => bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_send.py => bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/tests/test_deprecated_graph.py => bzrlib/tests/test_graph.py

bzrlib/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

bzrlib/transport/remote.py => bzrlib/transport/smart.py

bzrlib/xml8.py => bzrlib/xml5.py

doc/en/tutorials/centralized_workflow.txt => doc/centralized_workflow.txt

bzrlib/help_topics/en/configuration.txt => doc/configuration.txt

doc/en/user-guide/http_smart_server.txt => doc/http_smart_server.txt

doc/en/user-guide/index.txt => doc/index.txt

doc/en/user-guide/plugins.txt => doc/plugins.txt

doc/en/user-guide/server.txt => doc/server.txt

doc/en/user-guide/setting_up_email.txt => doc/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt => doc/specifying_revisions.txt

doc/en/tutorials/tutorial.txt => doc/tutorial.txt

doc/en/user-guide/using_aliases.txt => doc/using_aliases.txt

doc/en/user-guide/version_info.txt => doc/version_info.txt

files modified:
.bzrignore

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj/configobj.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

contrib/pwclient.full

doc/bazaar-vcs.org.kid

doc/default.css

generate_docs.py

profile_imports.py

setup.py *

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""B+Tree indices"""

import array

import bisect

from bisect import bisect_right

from copy import deepcopy

import math

import struct

import tempfile

import zlib

from bzrlib import (

chunk_writer,

debug,

errors,

fifo_cache,

index,

lru_cache,

osutils,

trace,

)

from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN

from bzrlib.transport import get_transport

_BTSIGNATURE = "B+Tree Graph Index 2\n"

_OPTION_ROW_LENGTHS = "row_lengths="

_LEAF_FLAG = "type=leaf\n"

_INTERNAL_FLAG = "type=internal\n"

_INTERNAL_OFFSET = "offset="

_RESERVED_HEADER_BYTES = 120

_PAGE_SIZE = 4096

# 4K per page: 4MB - 1000 entries

_NODE_CACHE_SIZE = 1000

class _BuilderRow(object):

"""The stored state accumulated while writing out a row in the index.

:ivar spool: A temporary file used to accumulate nodes for this row

in the tree.

:ivar nodes: The count of nodes emitted so far.

"""

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

skipped_bytes = padding

self.spool.writelines(byte_lines)

remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE

if remainder != 0:

raise AssertionError("incorrect node length: %d, %d"

% (self.spool.tell(), remainder))

self.nodes += 1

self.writer = None

class _InternalBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out internal rows."""

def finish_node(self, pad=True):

if not pad:

raise AssertionError("Must pad internal nodes only.")

_BuilderRow.finish_node(self)

class _LeafBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out a leaf rows."""

100

101

class BTreeBuilder(index.GraphIndexBuilder):

102

"""A Builder for B+Tree based Graph indices.

103

104

The resulting graph has the structure:

105

106

_SIGNATURE OPTIONS NODES

107

_SIGNATURE := 'B+Tree Graph Index 1' NEWLINE

108

OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH

109

REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE

110

KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE

111

LENGTH := 'len=' DIGITS NEWLINE

112

ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)*

113

NODES := NODE_COMPRESSED*

114

NODE_COMPRESSED:= COMPRESSED_BYTES{4096}

115

NODE_RAW := INTERNAL | LEAF

116

INTERNAL := INTERNAL_FLAG POINTERS

117

LEAF := LEAF_FLAG ROWS

118

KEY_ELEMENT := Not-whitespace-utf8

119

KEY := KEY_ELEMENT (NULL KEY_ELEMENT)*

120

ROWS := ROW*

121

ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

122

ABSENT := 'a'

123

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

124

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

125

REFERENCE := KEY

126

VALUE := no-newline-no-null-bytes

127

"""

128

129

def __init__(self, reference_lists=0, key_elements=1, spill_at=100000):

130

"""See GraphIndexBuilder.__init__.

131

132

:param spill_at: Optional parameter controlling the maximum number

133

of nodes that BTreeBuilder will hold in memory.

134

"""

135

index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,

136

key_elements=key_elements)

137

self._spill_at = spill_at

138

self._backing_indices = []

139

# A map of {key: (node_refs, value)}

140

self._nodes = {}

141

# Indicate it hasn't been built yet

142

self._nodes_by_key = None

143

self._optimize_for_size = False

144

145

def add_node(self, key, value, references=()):

146

"""Add a node to the index.

147

148

If adding the node causes the builder to reach its spill_at threshold,

149

disk spilling will be triggered.

150

151

:param key: The key. keys are non-empty tuples containing

152

as many whitespace-free utf8 bytestrings as the key length

153

defined for this index.

154

:param references: An iterable of iterables of keys. Each is a

155

reference to another key.

156

:param value: The value to associate with the key. It may be any

157

bytes as long as it does not contain \0 or \n.

158

"""

159

# we don't care about absent_references

160

node_refs, _ = self._check_key_ref_value(key, references, value)

161

if key in self._nodes:

162

raise errors.BadIndexDuplicateKey(key, self)

163

self._nodes[key] = (node_refs, value)

164

self._keys.add(key)

165

if self._nodes_by_key is not None and self._key_length > 1:

166

self._update_nodes_by_key(key, value, node_refs)

167

if len(self._keys) < self._spill_at:

168

return

169

self._spill_mem_keys_to_disk()

170

171

def _spill_mem_keys_to_disk(self):

172

"""Write the in memory keys down to disk to cap memory consumption.

173

174

If we already have some keys written to disk, we will combine them so

175

as to preserve the sorted order. The algorithm for combining uses

176

powers of two. So on the first spill, write all mem nodes into a

177

single index. On the second spill, combine the mem nodes with the nodes

178

on disk to create a 2x sized disk index and get rid of the first index.

179

On the third spill, create a single new disk index, which will contain

180

the mem nodes, and preserve the existing 2x sized index. On the fourth,

181

combine mem with the first and second indexes, creating a new one of

182

size 4x. On the fifth create a single new one, etc.

183

"""

184

if self._combine_backing_indices:

185

(new_backing_file, size,

186

backing_pos) = self._spill_mem_keys_and_combine()

187

else:

188

new_backing_file, size = self._spill_mem_keys_without_combining()

189

dir_path, base_name = osutils.split(new_backing_file.name)

190

# Note: The transport here isn't strictly needed, because we will use

191

# direct access to the new_backing._file object

192

new_backing = BTreeGraphIndex(get_transport(dir_path),

193

base_name, size)

194

# GC will clean up the file

195

new_backing._file = new_backing_file

196

if self._combine_backing_indices:

197

if len(self._backing_indices) == backing_pos:

198

self._backing_indices.append(None)

199

self._backing_indices[backing_pos] = new_backing

200

for backing_pos in range(backing_pos):

201

self._backing_indices[backing_pos] = None

202

else:

203

self._backing_indices.append(new_backing)

204

self._keys = set()

205

self._nodes = {}

206

self._nodes_by_key = None

207

208

def _spill_mem_keys_without_combining(self):

209

return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)

210

211

def _spill_mem_keys_and_combine(self):

212

iterators_to_combine = [self._iter_mem_nodes()]

213

pos = -1

214

for pos, backing in enumerate(self._backing_indices):

215

if backing is None:

216

pos -= 1

217

break

218

iterators_to_combine.append(backing.iter_all_entries())

219

backing_pos = pos + 1

220

new_backing_file, size = \

221

self._write_nodes(self._iter_smallest(iterators_to_combine),

222

allow_optimize=False)

223

return new_backing_file, size, backing_pos

224

225

def add_nodes(self, nodes):

226

"""Add nodes to the index.

227

228

:param nodes: An iterable of (key, node_refs, value) entries to add.

229

"""

230

if self.reference_lists:

231

for (key, value, node_refs) in nodes:

232

self.add_node(key, value, node_refs)

233

else:

234

for (key, value) in nodes:

235

self.add_node(key, value)

236

237

def _iter_mem_nodes(self):

238

"""Iterate over the nodes held in memory."""

239

nodes = self._nodes

240

if self.reference_lists:

241

for key in sorted(nodes):

242

references, value = nodes[key]

243

yield self, key, value, references

244

else:

245

for key in sorted(nodes):

246

references, value = nodes[key]

247

yield self, key, value

248

249

def _iter_smallest(self, iterators_to_combine):

250

if len(iterators_to_combine) == 1:

251

for value in iterators_to_combine[0]:

252

yield value

253

return

254

current_values = []

255

for iterator in iterators_to_combine:

256

try:

257

current_values.append(iterator.next())

258

except StopIteration:

259

current_values.append(None)

260

last = None

261

while True:

262

# Decorate candidates with the value to allow 2.4's min to be used.

263

candidates = [(item[1][1], item) for item

264

in enumerate(current_values) if item[1] is not None]

265

if not len(candidates):

266

return

267

selected = min(candidates)

268

# undecorate back to (pos, node)

269

selected = selected[1]

270

if last == selected[1][1]:

271

raise errors.BadIndexDuplicateKey(last, self)

272

last = selected[1][1]

273

# Yield, with self as the index

274

yield (self,) + selected[1][1:]

275

pos = selected[0]

276

try:

277

current_values[pos] = iterators_to_combine[pos].next()

278

except StopIteration:

279

current_values[pos] = None

280

281

def _add_key(self, string_key, line, rows, allow_optimize=True):

282

"""Add a key to the current chunk.

283

284

:param string_key: The key to add.

285

:param line: The fully serialised key and value.

286

:param allow_optimize: If set to False, prevent setting the optimize

287

flag when writing out. This is used by the _spill_mem_keys_to_disk

288

functionality.

289

"""

290

if rows[-1].writer is None:

291

# opening a new leaf chunk;

292

for pos, internal_row in enumerate(rows[:-1]):

293

# flesh out any internal nodes that are needed to

294

# preserve the height of the tree

295

if internal_row.writer is None:

296

length = _PAGE_SIZE

297

if internal_row.nodes == 0:

298

length -= _RESERVED_HEADER_BYTES # padded

299

if allow_optimize:

300

optimize_for_size = self._optimize_for_size

301

else:

302

optimize_for_size = False

303

internal_row.writer = chunk_writer.ChunkWriter(length, 0,

304

optimize_for_size=optimize_for_size)

305

internal_row.writer.write(_INTERNAL_FLAG)

306

internal_row.writer.write(_INTERNAL_OFFSET +

307

str(rows[pos + 1].nodes) + "\n")

308

# add a new leaf

309

length = _PAGE_SIZE

310

if rows[-1].nodes == 0:

311

length -= _RESERVED_HEADER_BYTES # padded

312

rows[-1].writer = chunk_writer.ChunkWriter(length,

313

optimize_for_size=self._optimize_for_size)

314

rows[-1].writer.write(_LEAF_FLAG)

315

if rows[-1].writer.write(line):

316

# this key did not fit in the node:

317

rows[-1].finish_node()

318

key_line = string_key + "\n"

319

new_row = True

320

for row in reversed(rows[:-1]):

321

# Mark the start of the next node in the node above. If it

322

# doesn't fit then propagate upwards until we find one that

323

# it does fit into.

324

if row.writer.write(key_line):

325

row.finish_node()

326

else:

327

# We've found a node that can handle the pointer.

328

new_row = False

329

break

330

# If we reached the current root without being able to mark the

331

# division point, then we need a new root:

332

if new_row:

333

# We need a new row

334

if 'index' in debug.debug_flags:

335

trace.mutter('Inserting new global row.')

336

new_row = _InternalBuilderRow()

337

reserved_bytes = 0

338

rows.insert(0, new_row)

339

# This will be padded, hence the -100

340

new_row.writer = chunk_writer.ChunkWriter(

341

_PAGE_SIZE - _RESERVED_HEADER_BYTES,

342

reserved_bytes,

343

optimize_for_size=self._optimize_for_size)

344

new_row.writer.write(_INTERNAL_FLAG)

345

new_row.writer.write(_INTERNAL_OFFSET +

346

str(rows[1].nodes - 1) + "\n")

347

new_row.writer.write(key_line)

348

self._add_key(string_key, line, rows, allow_optimize=allow_optimize)

349

350

def _write_nodes(self, node_iterator, allow_optimize=True):

351

"""Write node_iterator out as a B+Tree.

352

353

:param node_iterator: An iterator of sorted nodes. Each node should

354

match the output given by iter_all_entries.

355

:param allow_optimize: If set to False, prevent setting the optimize

356

flag when writing out. This is used by the _spill_mem_keys_to_disk

357

functionality.

358

:return: A file handle for a temporary file containing a B+Tree for

359

the nodes.

360

"""

361

# The index rows - rows[0] is the root, rows[1] is the layer under it

362

# etc.

363

rows = []

364

# forward sorted by key. In future we may consider topological sorting,

365

# at the cost of table scans for direct lookup, or a second index for

366

# direct lookup

367

key_count = 0

368

# A stack with the number of nodes of each size. 0 is the root node

369

# and must always be 1 (if there are any nodes in the tree).

370

self.row_lengths = []

371

# Loop over all nodes adding them to the bottom row

372

# (rows[-1]). When we finish a chunk in a row,

373

# propagate the key that didn't fit (comes after the chunk) to the

374

# row above, transitively.

375

for node in node_iterator:

376

if key_count == 0:

377

# First key triggers the first row

378

rows.append(_LeafBuilderRow())

379

key_count += 1

380

string_key, line = _btree_serializer._flatten_node(node,

381

self.reference_lists)

382

self._add_key(string_key, line, rows, allow_optimize=allow_optimize)

383

for row in reversed(rows):

384

pad = (type(row) != _LeafBuilderRow)

385

row.finish_node(pad=pad)

386

result = tempfile.NamedTemporaryFile(prefix='bzr-index-')

387

lines = [_BTSIGNATURE]

388

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

389

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

390

lines.append(_OPTION_LEN + str(key_count) + '\n')

391

row_lengths = [row.nodes for row in rows]

392

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

393

result.writelines(lines)

394

position = sum(map(len, lines))

395

root_row = True

396

if position > _RESERVED_HEADER_BYTES:

397

raise AssertionError("Could not fit the header in the"

398

" reserved space: %d > %d"

399

% (position, _RESERVED_HEADER_BYTES))

400

# write the rows out:

401

for row in rows:

402

reserved = _RESERVED_HEADER_BYTES # reserved space for first node

403

row.spool.flush()

404

row.spool.seek(0)

405

# copy nodes to the finalised file.

406

# Special case the first node as it may be prefixed

407

node = row.spool.read(_PAGE_SIZE)

408

result.write(node[reserved:])

409

result.write("\x00" * (reserved - position))

410

position = 0 # Only the root row actually has an offset

411

copied_len = osutils.pumpfile(row.spool, result)

412

if copied_len != (row.nodes - 1) * _PAGE_SIZE:

413

if type(row) != _LeafBuilderRow:

414

raise AssertionError("Incorrect amount of data copied"

415

" expected: %d, got: %d"

416

% ((row.nodes - 1) * _PAGE_SIZE,

417

copied_len))

418

result.flush()

419

size = result.tell()

420

result.seek(0)

421

return result, size

422

423

def finish(self):

424

"""Finalise the index.

425

426

:return: A file handle for a temporary file containing the nodes added

427

to the index.

428

"""

429

return self._write_nodes(self.iter_all_entries())[0]

430

431

def iter_all_entries(self):

432

"""Iterate over all keys within the index

433

434

:return: An iterable of (index, key, reference_lists, value). There is no

435

defined order for the result iteration - it will be in the most

436

efficient order for the index (in this case dictionary hash order).

437

"""

438

if 'evil' in debug.debug_flags:

439

trace.mutter_callsite(3,

440

"iter_all_entries scales with size of history.")

441

# Doing serial rather than ordered would be faster; but this shouldn't

442

# be getting called routinely anyway.

443

iterators = [self._iter_mem_nodes()]

444

for backing in self._backing_indices:

445

if backing is not None:

446

iterators.append(backing.iter_all_entries())

447

if len(iterators) == 1:

448

return iterators[0]

449

return self._iter_smallest(iterators)

450

451

def iter_entries(self, keys):

452

"""Iterate over keys within the index.

453

454

:param keys: An iterable providing the keys to be retrieved.

455

:return: An iterable of (index, key, value, reference_lists). There is no

456

defined order for the result iteration - it will be in the most

457

efficient order for the index (keys iteration order in this case).

458

"""

459

keys = set(keys)

460

local_keys = keys.intersection(self._keys)

461

if self.reference_lists:

462

for key in local_keys:

463

node = self._nodes[key]

464

yield self, key, node[1], node[0]

465

else:

466

for key in local_keys:

467

node = self._nodes[key]

468

yield self, key, node[1]

469

# Find things that are in backing indices that have not been handled

470

# yet.

471

if not self._backing_indices:

472

return # We won't find anything there either

473

# Remove all of the keys that we found locally

474

keys.difference_update(local_keys)

475

for backing in self._backing_indices:

476

if backing is None:

477

continue

478

if not keys:

479

return

480

for node in backing.iter_entries(keys):

481

keys.remove(node[1])

482

yield (self,) + node[1:]

483

484

def iter_entries_prefix(self, keys):

485

"""Iterate over keys within the index using prefix matching.

486

487

Prefix matching is applied within the tuple of a key, not to within

488

the bytestring of each key element. e.g. if you have the keys ('foo',

489

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

490

only the former key is returned.

491

492

:param keys: An iterable providing the key prefixes to be retrieved.

493

Each key prefix takes the form of a tuple the length of a key, but

494

with the last N elements 'None' rather than a regular bytestring.

495

The first element cannot be 'None'.

496

:return: An iterable as per iter_all_entries, but restricted to the

497

keys with a matching prefix to those supplied. No additional keys

498

will be returned, and every match that is in the index will be

499

returned.

500

"""

501

# XXX: To much duplication with the GraphIndex class; consider finding

502

# a good place to pull out the actual common logic.

503

keys = set(keys)

504

if not keys:

505

return

506

for backing in self._backing_indices:

507

if backing is None:

508

continue

509

for node in backing.iter_entries_prefix(keys):

510

yield (self,) + node[1:]

511

if self._key_length == 1:

512

for key in keys:

513

# sanity check

514

if key[0] is None:

515

raise errors.BadIndexKey(key)

516

if len(key) != self._key_length:

517

raise errors.BadIndexKey(key)

518

try:

519

node = self._nodes[key]

520

except KeyError:

521

continue

522

if self.reference_lists:

523

yield self, key, node[1], node[0]

524

else:

525

yield self, key, node[1]

526

return

527

for key in keys:

528

# sanity check

529

if key[0] is None:

530

raise errors.BadIndexKey(key)

531

if len(key) != self._key_length:

532

raise errors.BadIndexKey(key)

533

# find what it refers to:

534

key_dict = self._get_nodes_by_key()

535

elements = list(key)

536

# find the subdict to return

537

try:

538

while len(elements) and elements[0] is not None:

539

key_dict = key_dict[elements[0]]

540

elements.pop(0)

541

except KeyError:

542

# a non-existant lookup.

543

continue

544

if len(elements):

545

dicts = [key_dict]

546

while dicts:

547

key_dict = dicts.pop(-1)

548

# can't be empty or would not exist

549

item, value = key_dict.iteritems().next()

550

if type(value) == dict:

551

# push keys

552

dicts.extend(key_dict.itervalues())

553

else:

554

# yield keys

555

for value in key_dict.itervalues():

556

yield (self, ) + value

557

else:

558

yield (self, ) + key_dict

559

560

def _get_nodes_by_key(self):

561

if self._nodes_by_key is None:

562

nodes_by_key = {}

563

if self.reference_lists:

564

for key, (references, value) in self._nodes.iteritems():

565

key_dict = nodes_by_key

566

for subkey in key[:-1]:

567

key_dict = key_dict.setdefault(subkey, {})

568

key_dict[key[-1]] = key, value, references

569

else:

570

for key, (references, value) in self._nodes.iteritems():

571

key_dict = nodes_by_key

572

for subkey in key[:-1]:

573

key_dict = key_dict.setdefault(subkey, {})

574

key_dict[key[-1]] = key, value

575

self._nodes_by_key = nodes_by_key

576

return self._nodes_by_key

577

578

def key_count(self):

579

"""Return an estimate of the number of keys in this index.

580

581

For InMemoryGraphIndex the estimate is exact.

582

"""

583

return len(self._keys) + sum(backing.key_count() for backing in

584

self._backing_indices if backing is not None)

585

586

def validate(self):

587

"""In memory index's have no known corruption at the moment."""

588

589

590

class _LeafNode(object):

591

"""A leaf node for a serialised B+Tree index."""

592

593

__slots__ = ('keys',)

594

595

def __init__(self, bytes, key_length, ref_list_length):

596

"""Parse bytes to create a leaf node object."""

597

# splitlines mangles the \r delimiters.. don't use it.

598

self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,

599

key_length, ref_list_length))

600

601

602

class _InternalNode(object):

603

"""An internal node for a serialised B+Tree index."""

604

605

__slots__ = ('keys', 'offset')

606

607

def __init__(self, bytes):

608

"""Parse bytes to create an internal node object."""

609

# splitlines mangles the \r delimiters.. don't use it.

610

self.keys = self._parse_lines(bytes.split('\n'))

611

612

def _parse_lines(self, lines):

613

nodes = []

614

self.offset = int(lines[1][7:])

615

for line in lines[2:]:

616

if line == '':

617

break

618

nodes.append(tuple(map(intern, line.split('\0'))))

619

return nodes

620

621

622

class BTreeGraphIndex(object):

623

"""Access to nodes via the standard GraphIndex interface for B+Tree's.

624

625

Individual nodes are held in a LRU cache. This holds the root node in

626

memory except when very large walks are done.

627

"""

628

629

def __init__(self, transport, name, size):

630

"""Create a B+Tree index object on the index name.

631

632

:param transport: The transport to read data for the index from.

633

:param name: The file name of the index on transport.

634

:param size: Optional size of the index in bytes. This allows

635

compatibility with the GraphIndex API, as well as ensuring that

636

the initial read (to read the root node header) can be done

637

without over-reading even on empty indices, and on small indices

638

allows single-IO to read the entire index.

639

"""

640

self._transport = transport

641

self._name = name

642

self._size = size

643

self._file = None

644

self._recommended_pages = self._compute_recommended_pages()

645

self._root_node = None

646

# Default max size is 100,000 leave values

647

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

648

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

649

# We could limit this, but even a 300k record btree has only 3k leaf

650

# nodes, and only 20 internal nodes. So the default of 100 nodes in an

651

# LRU would mean we always cache everything anyway, no need to pay the

652

# overhead of LRU

653

self._internal_node_cache = fifo_cache.FIFOCache(100)

654

self._key_count = None

655

self._row_lengths = None

656

self._row_offsets = None # Start of each row, [-1] is the end

657

658

def __eq__(self, other):

659

"""Equal when self and other were created with the same parameters."""

660

return (

661

type(self) == type(other) and

662

self._transport == other._transport and

663

self._name == other._name and

664

self._size == other._size)

665

666

def __ne__(self, other):

667

return not self.__eq__(other)

668

669

def _get_and_cache_nodes(self, nodes):

670

"""Read nodes and cache them in the lru.

671

672

The nodes list supplied is sorted and then read from disk, each node

673

being inserted it into the _node_cache.

674

675

Note: Asking for more nodes than the _node_cache can contain will

676

result in some of the results being immediately discarded, to prevent

677

this an assertion is raised if more nodes are asked for than are

678

cachable.

679

680

:return: A dict of {node_pos: node}

681

"""

682

found = {}

683

start_of_leaves = None

684

for node_pos, node in self._read_nodes(sorted(nodes)):

685

if node_pos == 0: # Special case

686

self._root_node = node

687

else:

688

if start_of_leaves is None:

689

start_of_leaves = self._row_offsets[-2]

690

if node_pos < start_of_leaves:

691

self._internal_node_cache.add(node_pos, node)

692

else:

693

self._leaf_node_cache.add(node_pos, node)

694

found[node_pos] = node

695

return found

696

697

def _compute_recommended_pages(self):

698

"""Convert transport's recommended_page_size into btree pages.

699

700

recommended_page_size is in bytes, we want to know how many _PAGE_SIZE

701

pages fit in that length.

702

"""

703

recommended_read = self._transport.recommended_page_size()

704

recommended_pages = int(math.ceil(recommended_read /

705

float(_PAGE_SIZE)))

706

return recommended_pages

707

708

def _compute_total_pages_in_index(self):

709

"""How many pages are in the index.

710

711

If we have read the header we will use the value stored there.

712

Otherwise it will be computed based on the length of the index.

713

"""

714

if self._size is None:

715

raise AssertionError('_compute_total_pages_in_index should not be'

716

' called when self._size is None')

717

if self._root_node is not None:

718

# This is the number of pages as defined by the header

719

return self._row_offsets[-1]

720

# This is the number of pages as defined by the size of the index. They

721

# should be indentical.

722

total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))

723

return total_pages

724

725

def _expand_offsets(self, offsets):

726

"""Find extra pages to download.

727

728

The idea is that we always want to make big-enough requests (like 64kB

729

for http), so that we don't waste round trips. So given the entries

730

that we already have cached and the new pages being downloaded figure

731

out what other pages we might want to read.

732

733

See also doc/developers/btree_index_prefetch.txt for more details.

734

735

:param offsets: The offsets to be read

736

:return: A list of offsets to download

737

"""

738

if 'index' in debug.debug_flags:

739

trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)

740

741

if len(offsets) >= self._recommended_pages:

742

# Don't add more, we are already requesting more than enough

743

if 'index' in debug.debug_flags:

744

trace.mutter(' not expanding large request (%s >= %s)',

745

len(offsets), self._recommended_pages)

746

return offsets

747

if self._size is None:

748

# Don't try anything, because we don't know where the file ends

749

if 'index' in debug.debug_flags:

750

trace.mutter(' not expanding without knowing index size')

751

return offsets

752

total_pages = self._compute_total_pages_in_index()

753

cached_offsets = self._get_offsets_to_cached_pages()

754

# If reading recommended_pages would read the rest of the index, just

755

# do so.

756

if total_pages - len(cached_offsets) <= self._recommended_pages:

757

# Read whatever is left

758

if cached_offsets:

759

expanded = [x for x in xrange(total_pages)

760

if x not in cached_offsets]

761

else:

762

expanded = range(total_pages)

763

if 'index' in debug.debug_flags:

764

trace.mutter(' reading all unread pages: %s', expanded)

765

return expanded

766

767

if self._root_node is None:

768

# ATM on the first read of the root node of a large index, we don't

769

# bother pre-reading any other pages. This is because the

770

# likelyhood of actually reading interesting pages is very low.

771

# See doc/developers/btree_index_prefetch.txt for a discussion, and

772

# a possible implementation when we are guessing that the second

773

# layer index is small

774

final_offsets = offsets

775

else:

776

tree_depth = len(self._row_lengths)

777

if len(cached_offsets) < tree_depth and len(offsets) == 1:

778

# We haven't read enough to justify expansion

779

# If we are only going to read the root node, and 1 leaf node,

780

# then it isn't worth expanding our request. Once we've read at

781

# least 2 nodes, then we are probably doing a search, and we

782

# start expanding our requests.

783

if 'index' in debug.debug_flags:

784

trace.mutter(' not expanding on first reads')

785

return offsets

786

final_offsets = self._expand_to_neighbors(offsets, cached_offsets,

787

total_pages)

788

789

final_offsets = sorted(final_offsets)

790

if 'index' in debug.debug_flags:

791

trace.mutter('expanded: %s', final_offsets)

792

return final_offsets

793

794

def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):

795

"""Expand requests to neighbors until we have enough pages.

796

797

This is called from _expand_offsets after policy has determined that we

798

want to expand.

799

We only want to expand requests within a given layer. We cheat a little

800

bit and assume all requests will be in the same layer. This is true

801

given the current design, but if it changes this algorithm may perform

802

oddly.

803

804

:param offsets: requested offsets

805

:param cached_offsets: offsets for pages we currently have cached

806

:return: A set() of offsets after expansion

807

"""

808

final_offsets = set(offsets)

809

first = end = None

810

new_tips = set(final_offsets)

811

while len(final_offsets) < self._recommended_pages and new_tips:

812

next_tips = set()

813

for pos in new_tips:

814

if first is None:

815

first, end = self._find_layer_first_and_end(pos)

816

previous = pos - 1

817

if (previous > 0

818

and previous not in cached_offsets

819

and previous not in final_offsets

820

and previous >= first):

821

next_tips.add(previous)

822

after = pos + 1

823

if (after < total_pages

824

and after not in cached_offsets

825

and after not in final_offsets

826

and after < end):

827

next_tips.add(after)

828

# This would keep us from going bigger than

829

# recommended_pages by only expanding the first offsets.

830

# However, if we are making a 'wide' request, it is

831

# reasonable to expand all points equally.

832

# if len(final_offsets) > recommended_pages:

833

# break

834

final_offsets.update(next_tips)

835

new_tips = next_tips

836

return final_offsets

837

838

def external_references(self, ref_list_num):

839

if self._root_node is None:

840

self._get_root_node()

841

if ref_list_num + 1 > self.node_ref_lists:

842

raise ValueError('No ref list %d, index has %d ref lists'

843

% (ref_list_num, self.node_ref_lists))

844

keys = set()

845

refs = set()

846

for node in self.iter_all_entries():

847

keys.add(node[1])

848

refs.update(node[3][ref_list_num])

849

return refs - keys

850

851

def _find_layer_first_and_end(self, offset):

852

"""Find the start/stop nodes for the layer corresponding to offset.

853

854

:return: (first, end)

855

first is the first node in this layer

856

end is the first node of the next layer

857

"""

858

first = end = 0

859

for roffset in self._row_offsets:

860

first = end

861

end = roffset

862

if offset < roffset:

863

break

864

return first, end

865

866

def _get_offsets_to_cached_pages(self):

867

"""Determine what nodes we already have cached."""

868

cached_offsets = set(self._internal_node_cache.keys())

869

cached_offsets.update(self._leaf_node_cache.keys())

870

if self._root_node is not None:

871

cached_offsets.add(0)

872

return cached_offsets

873

874

def _get_root_node(self):

875

if self._root_node is None:

876

# We may not have a root node yet

877

self._get_internal_nodes([0])

878

return self._root_node

879

880

def _get_nodes(self, cache, node_indexes):

881

found = {}

882

needed = []

883

for idx in node_indexes:

884

if idx == 0 and self._root_node is not None:

885

found[0] = self._root_node

886

continue

887

try:

888

found[idx] = cache[idx]

889

except KeyError:

890

needed.append(idx)

891

if not needed:

892

return found

893

needed = self._expand_offsets(needed)

894

found.update(self._get_and_cache_nodes(needed))

895

return found

896

897

def _get_internal_nodes(self, node_indexes):

898

"""Get a node, from cache or disk.

899

900

After getting it, the node will be cached.

901

"""

902

return self._get_nodes(self._internal_node_cache, node_indexes)

903

904

def _cache_leaf_values(self, nodes):

905

"""Cache directly from key => value, skipping the btree."""

906

if self._leaf_value_cache is not None:

907

for node in nodes.itervalues():

908

for key, value in node.keys.iteritems():

909

if key in self._leaf_value_cache:

910

# Don't add the rest of the keys, we've seen this node

911

# before.

912

break

913

self._leaf_value_cache[key] = value

914

915

def _get_leaf_nodes(self, node_indexes):

916

"""Get a bunch of nodes, from cache or disk."""

917

found = self._get_nodes(self._leaf_node_cache, node_indexes)

918

self._cache_leaf_values(found)

919

return found

920

921

def iter_all_entries(self):

922

"""Iterate over all keys within the index.

923

924

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

925

The former tuple is used when there are no reference lists in the

926

index, making the API compatible with simple key:value index types.

927

There is no defined order for the result iteration - it will be in

928

the most efficient order for the index.

929

"""

930

if 'evil' in debug.debug_flags:

931

trace.mutter_callsite(3,

932

"iter_all_entries scales with size of history.")

933

if not self.key_count():

934

return

935

if self._row_offsets[-1] == 1:

936

# There is only the root node, and we read that via key_count()

937

if self.node_ref_lists:

938

for key, (value, refs) in sorted(self._root_node.keys.items()):

939

yield (self, key, value, refs)

940

else:

941

for key, (value, refs) in sorted(self._root_node.keys.items()):

942

yield (self, key, value)

943

return

944

start_of_leaves = self._row_offsets[-2]

945

end_of_leaves = self._row_offsets[-1]

946

needed_offsets = range(start_of_leaves, end_of_leaves)

947

if needed_offsets == [0]:

948

# Special case when we only have a root node, as we have already

949

# read everything

950

nodes = [(0, self._root_node)]

951

else:

952

nodes = self._read_nodes(needed_offsets)

953

# We iterate strictly in-order so that we can use this function

954

# for spilling index builds to disk.

955

if self.node_ref_lists:

956

for _, node in nodes:

957

for key, (value, refs) in sorted(node.keys.items()):

958

yield (self, key, value, refs)

959

else:

960

for _, node in nodes:

961

for key, (value, refs) in sorted(node.keys.items()):

962

yield (self, key, value)

963

964

@staticmethod

965

def _multi_bisect_right(in_keys, fixed_keys):

966

"""Find the positions where each 'in_key' would fit in fixed_keys.

967

968

This is equivalent to doing "bisect_right" on each in_key into

969

fixed_keys

970

971

:param in_keys: A sorted list of keys to match with fixed_keys

972

:param fixed_keys: A sorted list of keys to match against

973

:return: A list of (integer position, [key list]) tuples.

974

"""

975

if not in_keys:

976

return []

977

if not fixed_keys:

978

# no pointers in the fixed_keys list, which means everything must

979

# fall to the left.

980

return [(0, in_keys)]

981

982

# TODO: Iterating both lists will generally take M + N steps

983

# Bisecting each key will generally take M * log2 N steps.

984

# If we had an efficient way to compare, we could pick the method

985

# based on which has the fewer number of steps.

986

# There is also the argument that bisect_right is a compiled

987

# function, so there is even more to be gained.

988

# iter_steps = len(in_keys) + len(fixed_keys)

989

# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)

990

if len(in_keys) == 1: # Bisect will always be faster for M = 1

991

return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]

992

# elif bisect_steps < iter_steps:

993

# offsets = {}

994

# for key in in_keys:

995

# offsets.setdefault(bisect_right(fixed_keys, key),

996

# []).append(key)

997

# return [(o, offsets[o]) for o in sorted(offsets)]

998

in_keys_iter = iter(in_keys)

999

fixed_keys_iter = enumerate(fixed_keys)

1000

cur_in_key = in_keys_iter.next()

1001

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

1002

1003

class InputDone(Exception): pass

1004

class FixedDone(Exception): pass

1005

1006

output = []

1007

cur_out = []

1008

1009

# TODO: Another possibility is that rather than iterating on each side,

1010

# we could use a combination of bisecting and iterating. For

1011

# example, while cur_in_key < fixed_key, bisect to find its

1012

# point, then iterate all matching keys, then bisect (restricted

1013

# to only the remainder) for the next one, etc.

1014

try:

1015

while True:

1016

if cur_in_key < cur_fixed_key:

1017

cur_keys = []

1018

cur_out = (cur_fixed_offset, cur_keys)

1019

output.append(cur_out)

1020

while cur_in_key < cur_fixed_key:

1021

cur_keys.append(cur_in_key)

1022

try:

1023

cur_in_key = in_keys_iter.next()

1024

except StopIteration:

1025

raise InputDone

1026

# At this point cur_in_key must be >= cur_fixed_key

1027

# step the cur_fixed_key until we pass the cur key, or walk off

1028

# the end

1029

while cur_in_key >= cur_fixed_key:

1030

try:

1031

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

1032

except StopIteration:

1033

raise FixedDone

1034

except InputDone:

1035

# We consumed all of the input, nothing more to do

1036

pass

1037

except FixedDone:

1038

# There was some input left, but we consumed all of fixed, so we

1039

# have to add one more for the tail

1040

cur_keys = [cur_in_key]

1041

cur_keys.extend(in_keys_iter)

1042

cur_out = (len(fixed_keys), cur_keys)

1043

output.append(cur_out)

1044

return output

1045

1046

def iter_entries(self, keys):

1047

"""Iterate over keys within the index.

1048

1049

:param keys: An iterable providing the keys to be retrieved.

1050

:return: An iterable as per iter_all_entries, but restricted to the

1051

keys supplied. No additional keys will be returned, and every

1052

key supplied that is in the index will be returned.

1053

"""

1054

# 6 seconds spent in miss_torture using the sorted() line.

1055

# Even with out of order disk IO it seems faster not to sort it when

1056

# large queries are being made.

1057

# However, now that we are doing multi-way bisecting, we need the keys

1058

# in sorted order anyway. We could change the multi-way code to not

1059

# require sorted order. (For example, it bisects for the first node,

1060

# does an in-order search until a key comes before the current point,

1061

# which it then bisects for, etc.)

1062

keys = frozenset(keys)

1063

if not keys:

1064

return

1065

1066

if not self.key_count():

1067

return

1068

1069

needed_keys = []

1070

if self._leaf_value_cache is None:

1071

needed_keys = keys

1072

else:

1073

for key in keys:

1074

value = self._leaf_value_cache.get(key, None)

1075

if value is not None:

1076

# This key is known not to be here, skip it

1077

value, refs = value

1078

if self.node_ref_lists:

1079

yield (self, key, value, refs)

1080

else:

1081

yield (self, key, value)

1082

else:

1083

needed_keys.append(key)

1084

1085

last_key = None

1086

needed_keys = keys

1087

if not needed_keys:

1088

return

1089

# 6 seconds spent in miss_torture using the sorted() line.

1090

# Even with out of order disk IO it seems faster not to sort it when

1091

# large queries are being made.

1092

needed_keys = sorted(needed_keys)

1093

1094

nodes_and_keys = [(0, needed_keys)]

1095

1096

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

1097

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1098

nodes = self._get_internal_nodes(node_indexes)

1099

1100

next_nodes_and_keys = []

1101

for node_index, sub_keys in nodes_and_keys:

1102

node = nodes[node_index]

1103

positions = self._multi_bisect_right(sub_keys, node.keys)

1104

node_offset = next_row_start + node.offset

1105

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

1106

for pos, s_keys in positions])

1107

nodes_and_keys = next_nodes_and_keys

1108

# We should now be at the _LeafNodes

1109

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1110

1111

# TODO: We may *not* want to always read all the nodes in one

1112

# big go. Consider setting a max size on this.

1113

1114

nodes = self._get_leaf_nodes(node_indexes)

1115

for node_index, sub_keys in nodes_and_keys:

1116

if not sub_keys:

1117

continue

1118

node = nodes[node_index]

1119

for next_sub_key in sub_keys:

1120

if next_sub_key in node.keys:

1121

value, refs = node.keys[next_sub_key]

1122

if self.node_ref_lists:

1123

yield (self, next_sub_key, value, refs)

1124

else:

1125

yield (self, next_sub_key, value)

1126

1127

def iter_entries_prefix(self, keys):

1128

"""Iterate over keys within the index using prefix matching.

1129

1130

Prefix matching is applied within the tuple of a key, not to within

1131

the bytestring of each key element. e.g. if you have the keys ('foo',

1132

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1133

only the former key is returned.

1134

1135

WARNING: Note that this method currently causes a full index parse

1136

unconditionally (which is reasonably appropriate as it is a means for

1137

thunking many small indices into one larger one and still supplies

1138

iter_all_entries at the thunk layer).

1139

1140

:param keys: An iterable providing the key prefixes to be retrieved.

1141

Each key prefix takes the form of a tuple the length of a key, but

1142

with the last N elements 'None' rather than a regular bytestring.

1143

The first element cannot be 'None'.

1144

:return: An iterable as per iter_all_entries, but restricted to the

1145

keys with a matching prefix to those supplied. No additional keys

1146

will be returned, and every match that is in the index will be

1147

returned.

1148

"""

1149

keys = sorted(set(keys))

1150

if not keys:

1151

return

1152

# Load if needed to check key lengths

1153

if self._key_count is None:

1154

self._get_root_node()

1155

# TODO: only access nodes that can satisfy the prefixes we are looking

1156

# for. For now, to meet API usage (as this function is not used by

1157

# current bzrlib) just suck the entire index and iterate in memory.

1158

nodes = {}

1159

if self.node_ref_lists:

1160

if self._key_length == 1:

1161

for _1, key, value, refs in self.iter_all_entries():

1162

nodes[key] = value, refs

1163

else:

1164

nodes_by_key = {}

1165

for _1, key, value, refs in self.iter_all_entries():

1166

key_value = key, value, refs

1167

# For a key of (foo, bar, baz) create

1168

# _nodes_by_key[foo][bar][baz] = key_value

1169

key_dict = nodes_by_key

1170

for subkey in key[:-1]:

1171

key_dict = key_dict.setdefault(subkey, {})

1172

key_dict[key[-1]] = key_value

1173

else:

1174

if self._key_length == 1:

1175

for _1, key, value in self.iter_all_entries():

1176

nodes[key] = value

1177

else:

1178

nodes_by_key = {}

1179

for _1, key, value in self.iter_all_entries():

1180

key_value = key, value

1181

# For a key of (foo, bar, baz) create

1182

# _nodes_by_key[foo][bar][baz] = key_value

1183

key_dict = nodes_by_key

1184

for subkey in key[:-1]:

1185

key_dict = key_dict.setdefault(subkey, {})

1186

key_dict[key[-1]] = key_value

1187

if self._key_length == 1:

1188

for key in keys:

1189

# sanity check

1190

if key[0] is None:

1191

raise errors.BadIndexKey(key)

1192

if len(key) != self._key_length:

1193

raise errors.BadIndexKey(key)

1194

try:

1195

if self.node_ref_lists:

1196

value, node_refs = nodes[key]

1197

yield self, key, value, node_refs

1198

else:

1199

yield self, key, nodes[key]

1200

except KeyError:

1201

pass

1202

return

1203

for key in keys:

1204

# sanity check

1205

if key[0] is None:

1206

raise errors.BadIndexKey(key)

1207

if len(key) != self._key_length:

1208

raise errors.BadIndexKey(key)

1209

# find what it refers to:

1210

key_dict = nodes_by_key

1211

elements = list(key)

1212

# find the subdict whose contents should be returned.

1213

try:

1214

while len(elements) and elements[0] is not None:

1215

key_dict = key_dict[elements[0]]

1216

elements.pop(0)

1217

except KeyError:

1218

# a non-existant lookup.

1219

continue

1220

if len(elements):

1221

dicts = [key_dict]

1222

while dicts:

1223

key_dict = dicts.pop(-1)

1224

# can't be empty or would not exist

1225

item, value = key_dict.iteritems().next()

1226

if type(value) == dict:

1227

# push keys

1228

dicts.extend(key_dict.itervalues())

1229

else:

1230

# yield keys

1231

for value in key_dict.itervalues():

1232

# each value is the key:value:node refs tuple

1233

# ready to yield.

1234

yield (self, ) + value

1235

else:

1236

# the last thing looked up was a terminal element

1237

yield (self, ) + key_dict

1238

1239

def key_count(self):

1240

"""Return an estimate of the number of keys in this index.

1241

1242

For BTreeGraphIndex the estimate is exact as it is contained in the

1243

header.

1244

"""

1245

if self._key_count is None:

1246

self._get_root_node()

1247

return self._key_count

1248

1249

def _compute_row_offsets(self):

1250

"""Fill out the _row_offsets attribute based on _row_lengths."""

1251

offsets = []

1252

row_offset = 0

1253

for row in self._row_lengths:

1254

offsets.append(row_offset)

1255

row_offset += row

1256

offsets.append(row_offset)

1257

self._row_offsets = offsets

1258

1259

def _parse_header_from_bytes(self, bytes):

1260

"""Parse the header from a region of bytes.

1261

1262

:param bytes: The data to parse.

1263

:return: An offset, data tuple such as readv yields, for the unparsed

1264

data. (which may be of length 0).

1265

"""

1266

signature = bytes[0:len(self._signature())]

1267

if not signature == self._signature():

1268

raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)

1269

lines = bytes[len(self._signature()):].splitlines()

1270

options_line = lines[0]

1271

if not options_line.startswith(_OPTION_NODE_REFS):

1272

raise errors.BadIndexOptions(self)

1273

try:

1274

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

1275

except ValueError:

1276

raise errors.BadIndexOptions(self)

1277

options_line = lines[1]

1278

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

1279

raise errors.BadIndexOptions(self)

1280

try:

1281

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

1282

except ValueError:

1283

raise errors.BadIndexOptions(self)

1284

options_line = lines[2]

1285

if not options_line.startswith(_OPTION_LEN):

1286

raise errors.BadIndexOptions(self)

1287

try:

1288

self._key_count = int(options_line[len(_OPTION_LEN):])

1289

except ValueError:

1290

raise errors.BadIndexOptions(self)

1291

options_line = lines[3]

1292

if not options_line.startswith(_OPTION_ROW_LENGTHS):

1293

raise errors.BadIndexOptions(self)

1294

try:

1295

self._row_lengths = map(int, [length for length in

1296

options_line[len(_OPTION_ROW_LENGTHS):].split(',')

1297

if len(length)])

1298

except ValueError:

1299

raise errors.BadIndexOptions(self)

1300

self._compute_row_offsets()

1301

1302

# calculate the bytes we have processed

1303

header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)

1304

return header_end, bytes[header_end:]

1305

1306

def _read_nodes(self, nodes):

1307

"""Read some nodes from disk into the LRU cache.

1308

1309

This performs a readv to get the node data into memory, and parses each

1310

node, then yields it to the caller. The nodes are requested in the

1311

supplied order. If possible doing sort() on the list before requesting

1312

a read may improve performance.

1313

1314

:param nodes: The nodes to read. 0 - first node, 1 - second node etc.

1315

:return: None

1316

"""

1317

# may be the byte string of the whole file

1318

bytes = None

1319

# list of (offset, length) regions of the file that should, evenually

1320

# be read in to data_ranges, either from 'bytes' or from the transport

1321

ranges = []

1322

for index in nodes:

1323

offset = index * _PAGE_SIZE

1324

size = _PAGE_SIZE

1325

if index == 0:

1326

# Root node - special case

1327

if self._size:

1328

size = min(_PAGE_SIZE, self._size)

1329

else:

1330

# The only case where we don't know the size, is for very

1331

# small indexes. So we read the whole thing

1332

bytes = self._transport.get_bytes(self._name)

1333

self._size = len(bytes)

1334

# the whole thing should be parsed out of 'bytes'

1335

ranges.append((0, len(bytes)))

1336

break

1337

else:

1338

if offset > self._size:

1339

raise AssertionError('tried to read past the end'

1340

' of the file %s > %s'

1341

% (offset, self._size))

1342

size = min(size, self._size - offset)

1343

ranges.append((offset, size))

1344

if not ranges:

1345

return

1346

elif bytes is not None:

1347

# already have the whole file

1348

data_ranges = [(start, bytes[start:start+_PAGE_SIZE])

1349

for start in xrange(0, len(bytes), _PAGE_SIZE)]

1350

elif self._file is None:

1351

data_ranges = self._transport.readv(self._name, ranges)

1352

else:

1353

data_ranges = []

1354

for offset, size in ranges:

1355

self._file.seek(offset)

1356

data_ranges.append((offset, self._file.read(size)))

1357

for offset, data in data_ranges:

1358

if offset == 0:

1359

# extract the header

1360

offset, data = self._parse_header_from_bytes(data)

1361

if len(data) == 0:

1362

continue

1363

bytes = zlib.decompress(data)

1364

if bytes.startswith(_LEAF_FLAG):

1365

node = _LeafNode(bytes, self._key_length, self.node_ref_lists)

1366

elif bytes.startswith(_INTERNAL_FLAG):

1367

node = _InternalNode(bytes)

1368

else:

1369

raise AssertionError("Unknown node type for %r" % bytes)

1370

yield offset / _PAGE_SIZE, node

1371

1372

def _signature(self):

1373

"""The file signature for this index type."""

1374

return _BTSIGNATURE

1375

1376

def validate(self):

1377

"""Validate that everything in the index can be accessed."""

1378

# just read and parse every node.

1379

self._get_root_node()

1380

if len(self._row_lengths) > 1:

1381

start_node = self._row_offsets[1]

1382

else:

1383

# We shouldn't be reading anything anyway

1384

start_node = 1

1385

node_end = self._row_offsets[-1]

1386

for node in self._read_nodes(range(start_node, node_end)):

1387

pass

1388

1389

1390

try:

1391

from bzrlib import _btree_serializer_c as _btree_serializer

1392

except ImportError:

1393

from bzrlib import _btree_serializer_py as _btree_serializer

Older »