~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/btree_index.py

Committer: John Arbash Meinel
Date: 2009-07-06 18:59:24 UTC
mto: This revision was merged to the branch mainline in revision 4522.
Revision ID: john@arbash-meinel.com-20090706185924-qlhn1j607117lgdj

Start implementing an Annotator.add_special_text functionality.

The Python implementation supports it. Basically, it is meant to allow things
like WT and PreviewTree to insert the 'current' content into the graph, so that
we can get local modifications into the annotations.
There is also some work here to get support for texts that are already cached
in the annotator. So that we avoid extracting them, and can shortcut the
history.

files added:
bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_knit_load_data_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_tags.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bencode.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/cache_utf8.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inventory_delta.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/tag.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__annotator.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_file_with_stat.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/util/_bencode_py.py

bzrlib/util/bencode.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/views.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/bazaar-vcs.org.kid

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.ru.txt

doc/index.txt

doc/news-template.txt

doc/ru

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/Makefile

doc/ru/quick-reference/quick-start-summary.pdf

doc/ru/quick-reference/quick-start-summary.png

doc/ru/quick-reference/quick-start-summary.svg

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/time_graph.py

tools/win32/build_release.py

tools/win32/info.txt

tools/win32/run_script.py

files removed:
NEWS.developers

build-api

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

doc/README.1st

files renamed:
bzrlib/patiencediff.py => bzrlib/_patiencediff_py.py

bzrlib/graph.py => bzrlib/deprecated_graph.py

tools/doc_generate/ => bzrlib/doc_generate/

doc/configuration.txt => bzrlib/help_topics/en/configuration.txt

bzrlib/tests/blackbox/test_bundle.py => bzrlib/tests/blackbox/test_send.py

bzrlib/tests/HTTPTestUtil.py => bzrlib/tests/http_utils.py

bzrlib/tests/repository_implementations/ => bzrlib/tests/per_repository/

bzrlib/tests/repository_implementations/test_revprops.py => bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

bzrlib/tests/test_graph.py => bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_revisionnamespaces.py => bzrlib/tests/test_revisionspec.py

bzrlib/transport/ftp.py => bzrlib/transport/ftp/__init__.py

bzrlib/win32console.py => bzrlib/win32utils.py

bzrlib/xml5.py => bzrlib/xml8.py

HACKING => doc/en/developer-guide/HACKING.txt

doc/tutorial.txt => doc/en/tutorials/tutorial.txt

doc/index.txt => doc/en/user-guide/index.txt

doc/plugins.txt => doc/en/user-guide/plugins.txt

doc/setting_up_email.txt => doc/en/user-guide/setting_up_email.txt

doc/specifying_revisions.txt => doc/en/user-guide/specifying_revisions.txt

doc/using_aliases.txt => doc/en/user-guide/using_aliases.txt

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

contrib/pwclient.full

doc/default.css

profile_imports.py

setup.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/trace-revisions

tools/weavebench.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""B+Tree indices"""

from bisect import bisect_right

import math

import tempfile

import zlib

from bzrlib import (

chunk_writer,

debug,

errors,

fifo_cache,

index,

lru_cache,

osutils,

trace,

)

from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN

from bzrlib.transport import get_transport

_BTSIGNATURE = "B+Tree Graph Index 2\n"

_OPTION_ROW_LENGTHS = "row_lengths="

_LEAF_FLAG = "type=leaf\n"

_INTERNAL_FLAG = "type=internal\n"

_INTERNAL_OFFSET = "offset="

_RESERVED_HEADER_BYTES = 120

_PAGE_SIZE = 4096

# 4K per page: 4MB - 1000 entries

_NODE_CACHE_SIZE = 1000

class _BuilderRow(object):

"""The stored state accumulated while writing out a row in the index.

:ivar spool: A temporary file used to accumulate nodes for this row

in the tree.

:ivar nodes: The count of nodes emitted so far.

"""

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

skipped_bytes = padding

self.spool.writelines(byte_lines)

remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE

if remainder != 0:

raise AssertionError("incorrect node length: %d, %d"

% (self.spool.tell(), remainder))

self.nodes += 1

self.writer = None

class _InternalBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out internal rows."""

def finish_node(self, pad=True):

if not pad:

raise AssertionError("Must pad internal nodes only.")

_BuilderRow.finish_node(self)

class _LeafBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out a leaf rows."""

class BTreeBuilder(index.GraphIndexBuilder):

"""A Builder for B+Tree based Graph indices.

100

The resulting graph has the structure:

101

102

_SIGNATURE OPTIONS NODES

103

_SIGNATURE := 'B+Tree Graph Index 1' NEWLINE

104

OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH

105

REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE

106

KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE

107

LENGTH := 'len=' DIGITS NEWLINE

108

ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)*

109

NODES := NODE_COMPRESSED*

110

NODE_COMPRESSED:= COMPRESSED_BYTES{4096}

111

NODE_RAW := INTERNAL | LEAF

112

INTERNAL := INTERNAL_FLAG POINTERS

113

LEAF := LEAF_FLAG ROWS

114

KEY_ELEMENT := Not-whitespace-utf8

115

KEY := KEY_ELEMENT (NULL KEY_ELEMENT)*

116

ROWS := ROW*

117

ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

118

ABSENT := 'a'

119

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

120

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

121

REFERENCE := KEY

122

VALUE := no-newline-no-null-bytes

123

"""

124

125

def __init__(self, reference_lists=0, key_elements=1, spill_at=100000):

126

"""See GraphIndexBuilder.__init__.

127

128

:param spill_at: Optional parameter controlling the maximum number

129

of nodes that BTreeBuilder will hold in memory.

130

"""

131

index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,

132

key_elements=key_elements)

133

self._spill_at = spill_at

134

self._backing_indices = []

135

# A map of {key: (node_refs, value)}

136

self._nodes = {}

137

# Indicate it hasn't been built yet

138

self._nodes_by_key = None

139

self._optimize_for_size = False

140

141

def add_node(self, key, value, references=()):

142

"""Add a node to the index.

143

144

If adding the node causes the builder to reach its spill_at threshold,

145

disk spilling will be triggered.

146

147

:param key: The key. keys are non-empty tuples containing

148

as many whitespace-free utf8 bytestrings as the key length

149

defined for this index.

150

:param references: An iterable of iterables of keys. Each is a

151

reference to another key.

152

:param value: The value to associate with the key. It may be any

153

bytes as long as it does not contain \0 or \n.

154

"""

155

# we don't care about absent_references

156

node_refs, _ = self._check_key_ref_value(key, references, value)

157

if key in self._nodes:

158

raise errors.BadIndexDuplicateKey(key, self)

159

self._nodes[key] = (node_refs, value)

160

self._keys.add(key)

161

if self._nodes_by_key is not None and self._key_length > 1:

162

self._update_nodes_by_key(key, value, node_refs)

163

if len(self._keys) < self._spill_at:

164

return

165

self._spill_mem_keys_to_disk()

166

167

def _spill_mem_keys_to_disk(self):

168

"""Write the in memory keys down to disk to cap memory consumption.

169

170

If we already have some keys written to disk, we will combine them so

171

as to preserve the sorted order. The algorithm for combining uses

172

powers of two. So on the first spill, write all mem nodes into a

173

single index. On the second spill, combine the mem nodes with the nodes

174

on disk to create a 2x sized disk index and get rid of the first index.

175

On the third spill, create a single new disk index, which will contain

176

the mem nodes, and preserve the existing 2x sized index. On the fourth,

177

combine mem with the first and second indexes, creating a new one of

178

size 4x. On the fifth create a single new one, etc.

179

"""

180

if self._combine_backing_indices:

181

(new_backing_file, size,

182

backing_pos) = self._spill_mem_keys_and_combine()

183

else:

184

new_backing_file, size = self._spill_mem_keys_without_combining()

185

dir_path, base_name = osutils.split(new_backing_file.name)

186

# Note: The transport here isn't strictly needed, because we will use

187

# direct access to the new_backing._file object

188

new_backing = BTreeGraphIndex(get_transport(dir_path),

189

base_name, size)

190

# GC will clean up the file

191

new_backing._file = new_backing_file

192

if self._combine_backing_indices:

193

if len(self._backing_indices) == backing_pos:

194

self._backing_indices.append(None)

195

self._backing_indices[backing_pos] = new_backing

196

for backing_pos in range(backing_pos):

197

self._backing_indices[backing_pos] = None

198

else:

199

self._backing_indices.append(new_backing)

200

self._keys = set()

201

self._nodes = {}

202

self._nodes_by_key = None

203

204

def _spill_mem_keys_without_combining(self):

205

return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)

206

207

def _spill_mem_keys_and_combine(self):

208

iterators_to_combine = [self._iter_mem_nodes()]

209

pos = -1

210

for pos, backing in enumerate(self._backing_indices):

211

if backing is None:

212

pos -= 1

213

break

214

iterators_to_combine.append(backing.iter_all_entries())

215

backing_pos = pos + 1

216

new_backing_file, size = \

217

self._write_nodes(self._iter_smallest(iterators_to_combine),

218

allow_optimize=False)

219

return new_backing_file, size, backing_pos

220

221

def add_nodes(self, nodes):

222

"""Add nodes to the index.

223

224

:param nodes: An iterable of (key, node_refs, value) entries to add.

225

"""

226

if self.reference_lists:

227

for (key, value, node_refs) in nodes:

228

self.add_node(key, value, node_refs)

229

else:

230

for (key, value) in nodes:

231

self.add_node(key, value)

232

233

def _iter_mem_nodes(self):

234

"""Iterate over the nodes held in memory."""

235

nodes = self._nodes

236

if self.reference_lists:

237

for key in sorted(nodes):

238

references, value = nodes[key]

239

yield self, key, value, references

240

else:

241

for key in sorted(nodes):

242

references, value = nodes[key]

243

yield self, key, value

244

245

def _iter_smallest(self, iterators_to_combine):

246

if len(iterators_to_combine) == 1:

247

for value in iterators_to_combine[0]:

248

yield value

249

return

250

current_values = []

251

for iterator in iterators_to_combine:

252

try:

253

current_values.append(iterator.next())

254

except StopIteration:

255

current_values.append(None)

256

last = None

257

while True:

258

# Decorate candidates with the value to allow 2.4's min to be used.

259

candidates = [(item[1][1], item) for item

260

in enumerate(current_values) if item[1] is not None]

261

if not len(candidates):

262

return

263

selected = min(candidates)

264

# undecorate back to (pos, node)

265

selected = selected[1]

266

if last == selected[1][1]:

267

raise errors.BadIndexDuplicateKey(last, self)

268

last = selected[1][1]

269

# Yield, with self as the index

270

yield (self,) + selected[1][1:]

271

pos = selected[0]

272

try:

273

current_values[pos] = iterators_to_combine[pos].next()

274

except StopIteration:

275

current_values[pos] = None

276

277

def _add_key(self, string_key, line, rows, allow_optimize=True):

278

"""Add a key to the current chunk.

279

280

:param string_key: The key to add.

281

:param line: The fully serialised key and value.

282

:param allow_optimize: If set to False, prevent setting the optimize

283

flag when writing out. This is used by the _spill_mem_keys_to_disk

284

functionality.

285

"""

286

if rows[-1].writer is None:

287

# opening a new leaf chunk;

288

for pos, internal_row in enumerate(rows[:-1]):

289

# flesh out any internal nodes that are needed to

290

# preserve the height of the tree

291

if internal_row.writer is None:

292

length = _PAGE_SIZE

293

if internal_row.nodes == 0:

294

length -= _RESERVED_HEADER_BYTES # padded

295

if allow_optimize:

296

optimize_for_size = self._optimize_for_size

297

else:

298

optimize_for_size = False

299

internal_row.writer = chunk_writer.ChunkWriter(length, 0,

300

optimize_for_size=optimize_for_size)

301

internal_row.writer.write(_INTERNAL_FLAG)

302

internal_row.writer.write(_INTERNAL_OFFSET +

303

str(rows[pos + 1].nodes) + "\n")

304

# add a new leaf

305

length = _PAGE_SIZE

306

if rows[-1].nodes == 0:

307

length -= _RESERVED_HEADER_BYTES # padded

308

rows[-1].writer = chunk_writer.ChunkWriter(length,

309

optimize_for_size=self._optimize_for_size)

310

rows[-1].writer.write(_LEAF_FLAG)

311

if rows[-1].writer.write(line):

312

# this key did not fit in the node:

313

rows[-1].finish_node()

314

key_line = string_key + "\n"

315

new_row = True

316

for row in reversed(rows[:-1]):

317

# Mark the start of the next node in the node above. If it

318

# doesn't fit then propagate upwards until we find one that

319

# it does fit into.

320

if row.writer.write(key_line):

321

row.finish_node()

322

else:

323

# We've found a node that can handle the pointer.

324

new_row = False

325

break

326

# If we reached the current root without being able to mark the

327

# division point, then we need a new root:

328

if new_row:

329

# We need a new row

330

if 'index' in debug.debug_flags:

331

trace.mutter('Inserting new global row.')

332

new_row = _InternalBuilderRow()

333

reserved_bytes = 0

334

rows.insert(0, new_row)

335

# This will be padded, hence the -100

336

new_row.writer = chunk_writer.ChunkWriter(

337

_PAGE_SIZE - _RESERVED_HEADER_BYTES,

338

reserved_bytes,

339

optimize_for_size=self._optimize_for_size)

340

new_row.writer.write(_INTERNAL_FLAG)

341

new_row.writer.write(_INTERNAL_OFFSET +

342

str(rows[1].nodes - 1) + "\n")

343

new_row.writer.write(key_line)

344

self._add_key(string_key, line, rows, allow_optimize=allow_optimize)

345

346

def _write_nodes(self, node_iterator, allow_optimize=True):

347

"""Write node_iterator out as a B+Tree.

348

349

:param node_iterator: An iterator of sorted nodes. Each node should

350

match the output given by iter_all_entries.

351

:param allow_optimize: If set to False, prevent setting the optimize

352

flag when writing out. This is used by the _spill_mem_keys_to_disk

353

functionality.

354

:return: A file handle for a temporary file containing a B+Tree for

355

the nodes.

356

"""

357

# The index rows - rows[0] is the root, rows[1] is the layer under it

358

# etc.

359

rows = []

360

# forward sorted by key. In future we may consider topological sorting,

361

# at the cost of table scans for direct lookup, or a second index for

362

# direct lookup

363

key_count = 0

364

# A stack with the number of nodes of each size. 0 is the root node

365

# and must always be 1 (if there are any nodes in the tree).

366

self.row_lengths = []

367

# Loop over all nodes adding them to the bottom row

368

# (rows[-1]). When we finish a chunk in a row,

369

# propagate the key that didn't fit (comes after the chunk) to the

370

# row above, transitively.

371

for node in node_iterator:

372

if key_count == 0:

373

# First key triggers the first row

374

rows.append(_LeafBuilderRow())

375

key_count += 1

376

string_key, line = _btree_serializer._flatten_node(node,

377

self.reference_lists)

378

self._add_key(string_key, line, rows, allow_optimize=allow_optimize)

379

for row in reversed(rows):

380

pad = (type(row) != _LeafBuilderRow)

381

row.finish_node(pad=pad)

382

result = tempfile.NamedTemporaryFile(prefix='bzr-index-')

383

lines = [_BTSIGNATURE]

384

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

385

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

386

lines.append(_OPTION_LEN + str(key_count) + '\n')

387

row_lengths = [row.nodes for row in rows]

388

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

389

result.writelines(lines)

390

position = sum(map(len, lines))

391

root_row = True

392

if position > _RESERVED_HEADER_BYTES:

393

raise AssertionError("Could not fit the header in the"

394

" reserved space: %d > %d"

395

% (position, _RESERVED_HEADER_BYTES))

396

# write the rows out:

397

for row in rows:

398

reserved = _RESERVED_HEADER_BYTES # reserved space for first node

399

row.spool.flush()

400

row.spool.seek(0)

401

# copy nodes to the finalised file.

402

# Special case the first node as it may be prefixed

403

node = row.spool.read(_PAGE_SIZE)

404

result.write(node[reserved:])

405

result.write("\x00" * (reserved - position))

406

position = 0 # Only the root row actually has an offset

407

copied_len = osutils.pumpfile(row.spool, result)

408

if copied_len != (row.nodes - 1) * _PAGE_SIZE:

409

if type(row) != _LeafBuilderRow:

410

raise AssertionError("Incorrect amount of data copied"

411

" expected: %d, got: %d"

412

% ((row.nodes - 1) * _PAGE_SIZE,

413

copied_len))

414

result.flush()

415

size = result.tell()

416

result.seek(0)

417

return result, size

418

419

def finish(self):

420

"""Finalise the index.

421

422

:return: A file handle for a temporary file containing the nodes added

423

to the index.

424

"""

425

return self._write_nodes(self.iter_all_entries())[0]

426

427

def iter_all_entries(self):

428

"""Iterate over all keys within the index

429

430

:return: An iterable of (index, key, value, reference_lists). There is

431

no defined order for the result iteration - it will be in the most

432

efficient order for the index (in this case dictionary hash order).

433

"""

434

if 'evil' in debug.debug_flags:

435

trace.mutter_callsite(3,

436

"iter_all_entries scales with size of history.")

437

# Doing serial rather than ordered would be faster; but this shouldn't

438

# be getting called routinely anyway.

439

iterators = [self._iter_mem_nodes()]

440

for backing in self._backing_indices:

441

if backing is not None:

442

iterators.append(backing.iter_all_entries())

443

if len(iterators) == 1:

444

return iterators[0]

445

return self._iter_smallest(iterators)

446

447

def iter_entries(self, keys):

448

"""Iterate over keys within the index.

449

450

:param keys: An iterable providing the keys to be retrieved.

451

:return: An iterable of (index, key, value, reference_lists). There is no

452

defined order for the result iteration - it will be in the most

453

efficient order for the index (keys iteration order in this case).

454

"""

455

keys = set(keys)

456

local_keys = keys.intersection(self._keys)

457

if self.reference_lists:

458

for key in local_keys:

459

node = self._nodes[key]

460

yield self, key, node[1], node[0]

461

else:

462

for key in local_keys:

463

node = self._nodes[key]

464

yield self, key, node[1]

465

# Find things that are in backing indices that have not been handled

466

# yet.

467

if not self._backing_indices:

468

return # We won't find anything there either

469

# Remove all of the keys that we found locally

470

keys.difference_update(local_keys)

471

for backing in self._backing_indices:

472

if backing is None:

473

continue

474

if not keys:

475

return

476

for node in backing.iter_entries(keys):

477

keys.remove(node[1])

478

yield (self,) + node[1:]

479

480

def iter_entries_prefix(self, keys):

481

"""Iterate over keys within the index using prefix matching.

482

483

Prefix matching is applied within the tuple of a key, not to within

484

the bytestring of each key element. e.g. if you have the keys ('foo',

485

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

486

only the former key is returned.

487

488

:param keys: An iterable providing the key prefixes to be retrieved.

489

Each key prefix takes the form of a tuple the length of a key, but

490

with the last N elements 'None' rather than a regular bytestring.

491

The first element cannot be 'None'.

492

:return: An iterable as per iter_all_entries, but restricted to the

493

keys with a matching prefix to those supplied. No additional keys

494

will be returned, and every match that is in the index will be

495

returned.

496

"""

497

# XXX: To much duplication with the GraphIndex class; consider finding

498

# a good place to pull out the actual common logic.

499

keys = set(keys)

500

if not keys:

501

return

502

for backing in self._backing_indices:

503

if backing is None:

504

continue

505

for node in backing.iter_entries_prefix(keys):

506

yield (self,) + node[1:]

507

if self._key_length == 1:

508

for key in keys:

509

# sanity check

510

if key[0] is None:

511

raise errors.BadIndexKey(key)

512

if len(key) != self._key_length:

513

raise errors.BadIndexKey(key)

514

try:

515

node = self._nodes[key]

516

except KeyError:

517

continue

518

if self.reference_lists:

519

yield self, key, node[1], node[0]

520

else:

521

yield self, key, node[1]

522

return

523

for key in keys:

524

# sanity check

525

if key[0] is None:

526

raise errors.BadIndexKey(key)

527

if len(key) != self._key_length:

528

raise errors.BadIndexKey(key)

529

# find what it refers to:

530

key_dict = self._get_nodes_by_key()

531

elements = list(key)

532

# find the subdict to return

533

try:

534

while len(elements) and elements[0] is not None:

535

key_dict = key_dict[elements[0]]

536

elements.pop(0)

537

except KeyError:

538

# a non-existant lookup.

539

continue

540

if len(elements):

541

dicts = [key_dict]

542

while dicts:

543

key_dict = dicts.pop(-1)

544

# can't be empty or would not exist

545

item, value = key_dict.iteritems().next()

546

if type(value) == dict:

547

# push keys

548

dicts.extend(key_dict.itervalues())

549

else:

550

# yield keys

551

for value in key_dict.itervalues():

552

yield (self, ) + value

553

else:

554

yield (self, ) + key_dict

555

556

def _get_nodes_by_key(self):

557

if self._nodes_by_key is None:

558

nodes_by_key = {}

559

if self.reference_lists:

560

for key, (references, value) in self._nodes.iteritems():

561

key_dict = nodes_by_key

562

for subkey in key[:-1]:

563

key_dict = key_dict.setdefault(subkey, {})

564

key_dict[key[-1]] = key, value, references

565

else:

566

for key, (references, value) in self._nodes.iteritems():

567

key_dict = nodes_by_key

568

for subkey in key[:-1]:

569

key_dict = key_dict.setdefault(subkey, {})

570

key_dict[key[-1]] = key, value

571

self._nodes_by_key = nodes_by_key

572

return self._nodes_by_key

573

574

def key_count(self):

575

"""Return an estimate of the number of keys in this index.

576

577

For InMemoryGraphIndex the estimate is exact.

578

"""

579

return len(self._keys) + sum(backing.key_count() for backing in

580

self._backing_indices if backing is not None)

581

582

def validate(self):

583

"""In memory index's have no known corruption at the moment."""

584

585

586

class _LeafNode(object):

587

"""A leaf node for a serialised B+Tree index."""

588

589

__slots__ = ('keys',)

590

591

def __init__(self, bytes, key_length, ref_list_length):

592

"""Parse bytes to create a leaf node object."""

593

# splitlines mangles the \r delimiters.. don't use it.

594

self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,

595

key_length, ref_list_length))

596

597

598

class _InternalNode(object):

599

"""An internal node for a serialised B+Tree index."""

600

601

__slots__ = ('keys', 'offset')

602

603

def __init__(self, bytes):

604

"""Parse bytes to create an internal node object."""

605

# splitlines mangles the \r delimiters.. don't use it.

606

self.keys = self._parse_lines(bytes.split('\n'))

607

608

def _parse_lines(self, lines):

609

nodes = []

610

self.offset = int(lines[1][7:])

611

for line in lines[2:]:

612

if line == '':

613

break

614

nodes.append(tuple(map(intern, line.split('\0'))))

615

return nodes

616

617

618

class BTreeGraphIndex(object):

619

"""Access to nodes via the standard GraphIndex interface for B+Tree's.

620

621

Individual nodes are held in a LRU cache. This holds the root node in

622

memory except when very large walks are done.

623

"""

624

625

def __init__(self, transport, name, size):

626

"""Create a B+Tree index object on the index name.

627

628

:param transport: The transport to read data for the index from.

629

:param name: The file name of the index on transport.

630

:param size: Optional size of the index in bytes. This allows

631

compatibility with the GraphIndex API, as well as ensuring that

632

the initial read (to read the root node header) can be done

633

without over-reading even on empty indices, and on small indices

634

allows single-IO to read the entire index.

635

"""

636

self._transport = transport

637

self._name = name

638

self._size = size

639

self._file = None

640

self._recommended_pages = self._compute_recommended_pages()

641

self._root_node = None

642

# Default max size is 100,000 leave values

643

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

644

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

645

# We could limit this, but even a 300k record btree has only 3k leaf

646

# nodes, and only 20 internal nodes. So the default of 100 nodes in an

647

# LRU would mean we always cache everything anyway, no need to pay the

648

# overhead of LRU

649

self._internal_node_cache = fifo_cache.FIFOCache(100)

650

self._key_count = None

651

self._row_lengths = None

652

self._row_offsets = None # Start of each row, [-1] is the end

653

654

def __eq__(self, other):

655

"""Equal when self and other were created with the same parameters."""

656

return (

657

type(self) == type(other) and

658

self._transport == other._transport and

659

self._name == other._name and

660

self._size == other._size)

661

662

def __ne__(self, other):

663

return not self.__eq__(other)

664

665

def _get_and_cache_nodes(self, nodes):

666

"""Read nodes and cache them in the lru.

667

668

The nodes list supplied is sorted and then read from disk, each node

669

being inserted it into the _node_cache.

670

671

Note: Asking for more nodes than the _node_cache can contain will

672

result in some of the results being immediately discarded, to prevent

673

this an assertion is raised if more nodes are asked for than are

674

cachable.

675

676

:return: A dict of {node_pos: node}

677

"""

678

found = {}

679

start_of_leaves = None

680

for node_pos, node in self._read_nodes(sorted(nodes)):

681

if node_pos == 0: # Special case

682

self._root_node = node

683

else:

684

if start_of_leaves is None:

685

start_of_leaves = self._row_offsets[-2]

686

if node_pos < start_of_leaves:

687

self._internal_node_cache.add(node_pos, node)

688

else:

689

self._leaf_node_cache.add(node_pos, node)

690

found[node_pos] = node

691

return found

692

693

def _compute_recommended_pages(self):

694

"""Convert transport's recommended_page_size into btree pages.

695

696

recommended_page_size is in bytes, we want to know how many _PAGE_SIZE

697

pages fit in that length.

698

"""

699

recommended_read = self._transport.recommended_page_size()

700

recommended_pages = int(math.ceil(recommended_read /

701

float(_PAGE_SIZE)))

702

return recommended_pages

703

704

def _compute_total_pages_in_index(self):

705

"""How many pages are in the index.

706

707

If we have read the header we will use the value stored there.

708

Otherwise it will be computed based on the length of the index.

709

"""

710

if self._size is None:

711

raise AssertionError('_compute_total_pages_in_index should not be'

712

' called when self._size is None')

713

if self._root_node is not None:

714

# This is the number of pages as defined by the header

715

return self._row_offsets[-1]

716

# This is the number of pages as defined by the size of the index. They

717

# should be indentical.

718

total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))

719

return total_pages

720

721

def _expand_offsets(self, offsets):

722

"""Find extra pages to download.

723

724

The idea is that we always want to make big-enough requests (like 64kB

725

for http), so that we don't waste round trips. So given the entries

726

that we already have cached and the new pages being downloaded figure

727

out what other pages we might want to read.

728

729

See also doc/developers/btree_index_prefetch.txt for more details.

730

731

:param offsets: The offsets to be read

732

:return: A list of offsets to download

733

"""

734

if 'index' in debug.debug_flags:

735

trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)

736

737

if len(offsets) >= self._recommended_pages:

738

# Don't add more, we are already requesting more than enough

739

if 'index' in debug.debug_flags:

740

trace.mutter(' not expanding large request (%s >= %s)',

741

len(offsets), self._recommended_pages)

742

return offsets

743

if self._size is None:

744

# Don't try anything, because we don't know where the file ends

745

if 'index' in debug.debug_flags:

746

trace.mutter(' not expanding without knowing index size')

747

return offsets

748

total_pages = self._compute_total_pages_in_index()

749

cached_offsets = self._get_offsets_to_cached_pages()

750

# If reading recommended_pages would read the rest of the index, just

751

# do so.

752

if total_pages - len(cached_offsets) <= self._recommended_pages:

753

# Read whatever is left

754

if cached_offsets:

755

expanded = [x for x in xrange(total_pages)

756

if x not in cached_offsets]

757

else:

758

expanded = range(total_pages)

759

if 'index' in debug.debug_flags:

760

trace.mutter(' reading all unread pages: %s', expanded)

761

return expanded

762

763

if self._root_node is None:

764

# ATM on the first read of the root node of a large index, we don't

765

# bother pre-reading any other pages. This is because the

766

# likelyhood of actually reading interesting pages is very low.

767

# See doc/developers/btree_index_prefetch.txt for a discussion, and

768

# a possible implementation when we are guessing that the second

769

# layer index is small

770

final_offsets = offsets

771

else:

772

tree_depth = len(self._row_lengths)

773

if len(cached_offsets) < tree_depth and len(offsets) == 1:

774

# We haven't read enough to justify expansion

775

# If we are only going to read the root node, and 1 leaf node,

776

# then it isn't worth expanding our request. Once we've read at

777

# least 2 nodes, then we are probably doing a search, and we

778

# start expanding our requests.

779

if 'index' in debug.debug_flags:

780

trace.mutter(' not expanding on first reads')

781

return offsets

782

final_offsets = self._expand_to_neighbors(offsets, cached_offsets,

783

total_pages)

784

785

final_offsets = sorted(final_offsets)

786

if 'index' in debug.debug_flags:

787

trace.mutter('expanded: %s', final_offsets)

788

return final_offsets

789

790

def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):

791

"""Expand requests to neighbors until we have enough pages.

792

793

This is called from _expand_offsets after policy has determined that we

794

want to expand.

795

We only want to expand requests within a given layer. We cheat a little

796

bit and assume all requests will be in the same layer. This is true

797

given the current design, but if it changes this algorithm may perform

798

oddly.

799

800

:param offsets: requested offsets

801

:param cached_offsets: offsets for pages we currently have cached

802

:return: A set() of offsets after expansion

803

"""

804

final_offsets = set(offsets)

805

first = end = None

806

new_tips = set(final_offsets)

807

while len(final_offsets) < self._recommended_pages and new_tips:

808

next_tips = set()

809

for pos in new_tips:

810

if first is None:

811

first, end = self._find_layer_first_and_end(pos)

812

previous = pos - 1

813

if (previous > 0

814

and previous not in cached_offsets

815

and previous not in final_offsets

816

and previous >= first):

817

next_tips.add(previous)

818

after = pos + 1

819

if (after < total_pages

820

and after not in cached_offsets

821

and after not in final_offsets

822

and after < end):

823

next_tips.add(after)

824

# This would keep us from going bigger than

825

# recommended_pages by only expanding the first offsets.

826

# However, if we are making a 'wide' request, it is

827

# reasonable to expand all points equally.

828

# if len(final_offsets) > recommended_pages:

829

# break

830

final_offsets.update(next_tips)

831

new_tips = next_tips

832

return final_offsets

833

834

def external_references(self, ref_list_num):

835

if self._root_node is None:

836

self._get_root_node()

837

if ref_list_num + 1 > self.node_ref_lists:

838

raise ValueError('No ref list %d, index has %d ref lists'

839

% (ref_list_num, self.node_ref_lists))

840

keys = set()

841

refs = set()

842

for node in self.iter_all_entries():

843

keys.add(node[1])

844

refs.update(node[3][ref_list_num])

845

return refs - keys

846

847

def _find_layer_first_and_end(self, offset):

848

"""Find the start/stop nodes for the layer corresponding to offset.

849

850

:return: (first, end)

851

first is the first node in this layer

852

end is the first node of the next layer

853

"""

854

first = end = 0

855

for roffset in self._row_offsets:

856

first = end

857

end = roffset

858

if offset < roffset:

859

break

860

return first, end

861

862

def _get_offsets_to_cached_pages(self):

863

"""Determine what nodes we already have cached."""

864

cached_offsets = set(self._internal_node_cache.keys())

865

cached_offsets.update(self._leaf_node_cache.keys())

866

if self._root_node is not None:

867

cached_offsets.add(0)

868

return cached_offsets

869

870

def _get_root_node(self):

871

if self._root_node is None:

872

# We may not have a root node yet

873

self._get_internal_nodes([0])

874

return self._root_node

875

876

def _get_nodes(self, cache, node_indexes):

877

found = {}

878

needed = []

879

for idx in node_indexes:

880

if idx == 0 and self._root_node is not None:

881

found[0] = self._root_node

882

continue

883

try:

884

found[idx] = cache[idx]

885

except KeyError:

886

needed.append(idx)

887

if not needed:

888

return found

889

needed = self._expand_offsets(needed)

890

found.update(self._get_and_cache_nodes(needed))

891

return found

892

893

def _get_internal_nodes(self, node_indexes):

894

"""Get a node, from cache or disk.

895

896

After getting it, the node will be cached.

897

"""

898

return self._get_nodes(self._internal_node_cache, node_indexes)

899

900

def _cache_leaf_values(self, nodes):

901

"""Cache directly from key => value, skipping the btree."""

902

if self._leaf_value_cache is not None:

903

for node in nodes.itervalues():

904

for key, value in node.keys.iteritems():

905

if key in self._leaf_value_cache:

906

# Don't add the rest of the keys, we've seen this node

907

# before.

908

break

909

self._leaf_value_cache[key] = value

910

911

def _get_leaf_nodes(self, node_indexes):

912

"""Get a bunch of nodes, from cache or disk."""

913

found = self._get_nodes(self._leaf_node_cache, node_indexes)

914

self._cache_leaf_values(found)

915

return found

916

917

def iter_all_entries(self):

918

"""Iterate over all keys within the index.

919

920

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

921

The former tuple is used when there are no reference lists in the

922

index, making the API compatible with simple key:value index types.

923

There is no defined order for the result iteration - it will be in

924

the most efficient order for the index.

925

"""

926

if 'evil' in debug.debug_flags:

927

trace.mutter_callsite(3,

928

"iter_all_entries scales with size of history.")

929

if not self.key_count():

930

return

931

if self._row_offsets[-1] == 1:

932

# There is only the root node, and we read that via key_count()

933

if self.node_ref_lists:

934

for key, (value, refs) in sorted(self._root_node.keys.items()):

935

yield (self, key, value, refs)

936

else:

937

for key, (value, refs) in sorted(self._root_node.keys.items()):

938

yield (self, key, value)

939

return

940

start_of_leaves = self._row_offsets[-2]

941

end_of_leaves = self._row_offsets[-1]

942

needed_offsets = range(start_of_leaves, end_of_leaves)

943

if needed_offsets == [0]:

944

# Special case when we only have a root node, as we have already

945

# read everything

946

nodes = [(0, self._root_node)]

947

else:

948

nodes = self._read_nodes(needed_offsets)

949

# We iterate strictly in-order so that we can use this function

950

# for spilling index builds to disk.

951

if self.node_ref_lists:

952

for _, node in nodes:

953

for key, (value, refs) in sorted(node.keys.items()):

954

yield (self, key, value, refs)

955

else:

956

for _, node in nodes:

957

for key, (value, refs) in sorted(node.keys.items()):

958

yield (self, key, value)

959

960

@staticmethod

961

def _multi_bisect_right(in_keys, fixed_keys):

962

"""Find the positions where each 'in_key' would fit in fixed_keys.

963

964

This is equivalent to doing "bisect_right" on each in_key into

965

fixed_keys

966

967

:param in_keys: A sorted list of keys to match with fixed_keys

968

:param fixed_keys: A sorted list of keys to match against

969

:return: A list of (integer position, [key list]) tuples.

970

"""

971

if not in_keys:

972

return []

973

if not fixed_keys:

974

# no pointers in the fixed_keys list, which means everything must

975

# fall to the left.

976

return [(0, in_keys)]

977

978

# TODO: Iterating both lists will generally take M + N steps

979

# Bisecting each key will generally take M * log2 N steps.

980

# If we had an efficient way to compare, we could pick the method

981

# based on which has the fewer number of steps.

982

# There is also the argument that bisect_right is a compiled

983

# function, so there is even more to be gained.

984

# iter_steps = len(in_keys) + len(fixed_keys)

985

# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)

986

if len(in_keys) == 1: # Bisect will always be faster for M = 1

987

return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]

988

# elif bisect_steps < iter_steps:

989

# offsets = {}

990

# for key in in_keys:

991

# offsets.setdefault(bisect_right(fixed_keys, key),

992

# []).append(key)

993

# return [(o, offsets[o]) for o in sorted(offsets)]

994

in_keys_iter = iter(in_keys)

995

fixed_keys_iter = enumerate(fixed_keys)

996

cur_in_key = in_keys_iter.next()

997

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

998

999

class InputDone(Exception): pass

1000

class FixedDone(Exception): pass

1001

1002

output = []

1003

cur_out = []

1004

1005

# TODO: Another possibility is that rather than iterating on each side,

1006

# we could use a combination of bisecting and iterating. For

1007

# example, while cur_in_key < fixed_key, bisect to find its

1008

# point, then iterate all matching keys, then bisect (restricted

1009

# to only the remainder) for the next one, etc.

1010

try:

1011

while True:

1012

if cur_in_key < cur_fixed_key:

1013

cur_keys = []

1014

cur_out = (cur_fixed_offset, cur_keys)

1015

output.append(cur_out)

1016

while cur_in_key < cur_fixed_key:

1017

cur_keys.append(cur_in_key)

1018

try:

1019

cur_in_key = in_keys_iter.next()

1020

except StopIteration:

1021

raise InputDone

1022

# At this point cur_in_key must be >= cur_fixed_key

1023

# step the cur_fixed_key until we pass the cur key, or walk off

1024

# the end

1025

while cur_in_key >= cur_fixed_key:

1026

try:

1027

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

1028

except StopIteration:

1029

raise FixedDone

1030

except InputDone:

1031

# We consumed all of the input, nothing more to do

1032

pass

1033

except FixedDone:

1034

# There was some input left, but we consumed all of fixed, so we

1035

# have to add one more for the tail

1036

cur_keys = [cur_in_key]

1037

cur_keys.extend(in_keys_iter)

1038

cur_out = (len(fixed_keys), cur_keys)

1039

output.append(cur_out)

1040

return output

1041

1042

def iter_entries(self, keys):

1043

"""Iterate over keys within the index.

1044

1045

:param keys: An iterable providing the keys to be retrieved.

1046

:return: An iterable as per iter_all_entries, but restricted to the

1047

keys supplied. No additional keys will be returned, and every

1048

key supplied that is in the index will be returned.

1049

"""

1050

# 6 seconds spent in miss_torture using the sorted() line.

1051

# Even with out of order disk IO it seems faster not to sort it when

1052

# large queries are being made.

1053

# However, now that we are doing multi-way bisecting, we need the keys

1054

# in sorted order anyway. We could change the multi-way code to not

1055

# require sorted order. (For example, it bisects for the first node,

1056

# does an in-order search until a key comes before the current point,

1057

# which it then bisects for, etc.)

1058

keys = frozenset(keys)

1059

if not keys:

1060

return

1061

1062

if not self.key_count():

1063

return

1064

1065

needed_keys = []

1066

if self._leaf_value_cache is None:

1067

needed_keys = keys

1068

else:

1069

for key in keys:

1070

value = self._leaf_value_cache.get(key, None)

1071

if value is not None:

1072

# This key is known not to be here, skip it

1073

value, refs = value

1074

if self.node_ref_lists:

1075

yield (self, key, value, refs)

1076

else:

1077

yield (self, key, value)

1078

else:

1079

needed_keys.append(key)

1080

1081

last_key = None

1082

needed_keys = keys

1083

if not needed_keys:

1084

return

1085

# 6 seconds spent in miss_torture using the sorted() line.

1086

# Even with out of order disk IO it seems faster not to sort it when

1087

# large queries are being made.

1088

needed_keys = sorted(needed_keys)

1089

1090

nodes_and_keys = [(0, needed_keys)]

1091

1092

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

1093

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1094

nodes = self._get_internal_nodes(node_indexes)

1095

1096

next_nodes_and_keys = []

1097

for node_index, sub_keys in nodes_and_keys:

1098

node = nodes[node_index]

1099

positions = self._multi_bisect_right(sub_keys, node.keys)

1100

node_offset = next_row_start + node.offset

1101

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

1102

for pos, s_keys in positions])

1103

nodes_and_keys = next_nodes_and_keys

1104

# We should now be at the _LeafNodes

1105

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1106

1107

# TODO: We may *not* want to always read all the nodes in one

1108

# big go. Consider setting a max size on this.

1109

1110

nodes = self._get_leaf_nodes(node_indexes)

1111

for node_index, sub_keys in nodes_and_keys:

1112

if not sub_keys:

1113

continue

1114

node = nodes[node_index]

1115

for next_sub_key in sub_keys:

1116

if next_sub_key in node.keys:

1117

value, refs = node.keys[next_sub_key]

1118

if self.node_ref_lists:

1119

yield (self, next_sub_key, value, refs)

1120

else:

1121

yield (self, next_sub_key, value)

1122

1123

def iter_entries_prefix(self, keys):

1124

"""Iterate over keys within the index using prefix matching.

1125

1126

Prefix matching is applied within the tuple of a key, not to within

1127

the bytestring of each key element. e.g. if you have the keys ('foo',

1128

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1129

only the former key is returned.

1130

1131

WARNING: Note that this method currently causes a full index parse

1132

unconditionally (which is reasonably appropriate as it is a means for

1133

thunking many small indices into one larger one and still supplies

1134

iter_all_entries at the thunk layer).

1135

1136

:param keys: An iterable providing the key prefixes to be retrieved.

1137

Each key prefix takes the form of a tuple the length of a key, but

1138

with the last N elements 'None' rather than a regular bytestring.

1139

The first element cannot be 'None'.

1140

:return: An iterable as per iter_all_entries, but restricted to the

1141

keys with a matching prefix to those supplied. No additional keys

1142

will be returned, and every match that is in the index will be

1143

returned.

1144

"""

1145

keys = sorted(set(keys))

1146

if not keys:

1147

return

1148

# Load if needed to check key lengths

1149

if self._key_count is None:

1150

self._get_root_node()

1151

# TODO: only access nodes that can satisfy the prefixes we are looking

1152

# for. For now, to meet API usage (as this function is not used by

1153

# current bzrlib) just suck the entire index and iterate in memory.

1154

nodes = {}

1155

if self.node_ref_lists:

1156

if self._key_length == 1:

1157

for _1, key, value, refs in self.iter_all_entries():

1158

nodes[key] = value, refs

1159

else:

1160

nodes_by_key = {}

1161

for _1, key, value, refs in self.iter_all_entries():

1162

key_value = key, value, refs

1163

# For a key of (foo, bar, baz) create

1164

# _nodes_by_key[foo][bar][baz] = key_value

1165

key_dict = nodes_by_key

1166

for subkey in key[:-1]:

1167

key_dict = key_dict.setdefault(subkey, {})

1168

key_dict[key[-1]] = key_value

1169

else:

1170

if self._key_length == 1:

1171

for _1, key, value in self.iter_all_entries():

1172

nodes[key] = value

1173

else:

1174

nodes_by_key = {}

1175

for _1, key, value in self.iter_all_entries():

1176

key_value = key, value

1177

# For a key of (foo, bar, baz) create

1178

# _nodes_by_key[foo][bar][baz] = key_value

1179

key_dict = nodes_by_key

1180

for subkey in key[:-1]:

1181

key_dict = key_dict.setdefault(subkey, {})

1182

key_dict[key[-1]] = key_value

1183

if self._key_length == 1:

1184

for key in keys:

1185

# sanity check

1186

if key[0] is None:

1187

raise errors.BadIndexKey(key)

1188

if len(key) != self._key_length:

1189

raise errors.BadIndexKey(key)

1190

try:

1191

if self.node_ref_lists:

1192

value, node_refs = nodes[key]

1193

yield self, key, value, node_refs

1194

else:

1195

yield self, key, nodes[key]

1196

except KeyError:

1197

pass

1198

return

1199

for key in keys:

1200

# sanity check

1201

if key[0] is None:

1202

raise errors.BadIndexKey(key)

1203

if len(key) != self._key_length:

1204

raise errors.BadIndexKey(key)

1205

# find what it refers to:

1206

key_dict = nodes_by_key

1207

elements = list(key)

1208

# find the subdict whose contents should be returned.

1209

try:

1210

while len(elements) and elements[0] is not None:

1211

key_dict = key_dict[elements[0]]

1212

elements.pop(0)

1213

except KeyError:

1214

# a non-existant lookup.

1215

continue

1216

if len(elements):

1217

dicts = [key_dict]

1218

while dicts:

1219

key_dict = dicts.pop(-1)

1220

# can't be empty or would not exist

1221

item, value = key_dict.iteritems().next()

1222

if type(value) == dict:

1223

# push keys

1224

dicts.extend(key_dict.itervalues())

1225

else:

1226

# yield keys

1227

for value in key_dict.itervalues():

1228

# each value is the key:value:node refs tuple

1229

# ready to yield.

1230

yield (self, ) + value

1231

else:

1232

# the last thing looked up was a terminal element

1233

yield (self, ) + key_dict

1234

1235

def key_count(self):

1236

"""Return an estimate of the number of keys in this index.

1237

1238

For BTreeGraphIndex the estimate is exact as it is contained in the

1239

header.

1240

"""

1241

if self._key_count is None:

1242

self._get_root_node()

1243

return self._key_count

1244

1245

def _compute_row_offsets(self):

1246

"""Fill out the _row_offsets attribute based on _row_lengths."""

1247

offsets = []

1248

row_offset = 0

1249

for row in self._row_lengths:

1250

offsets.append(row_offset)

1251

row_offset += row

1252

offsets.append(row_offset)

1253

self._row_offsets = offsets

1254

1255

def _parse_header_from_bytes(self, bytes):

1256

"""Parse the header from a region of bytes.

1257

1258

:param bytes: The data to parse.

1259

:return: An offset, data tuple such as readv yields, for the unparsed

1260

data. (which may be of length 0).

1261

"""

1262

signature = bytes[0:len(self._signature())]

1263

if not signature == self._signature():

1264

raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)

1265

lines = bytes[len(self._signature()):].splitlines()

1266

options_line = lines[0]

1267

if not options_line.startswith(_OPTION_NODE_REFS):

1268

raise errors.BadIndexOptions(self)

1269

try:

1270

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

1271

except ValueError:

1272

raise errors.BadIndexOptions(self)

1273

options_line = lines[1]

1274

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

1275

raise errors.BadIndexOptions(self)

1276

try:

1277

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

1278

except ValueError:

1279

raise errors.BadIndexOptions(self)

1280

options_line = lines[2]

1281

if not options_line.startswith(_OPTION_LEN):

1282

raise errors.BadIndexOptions(self)

1283

try:

1284

self._key_count = int(options_line[len(_OPTION_LEN):])

1285

except ValueError:

1286

raise errors.BadIndexOptions(self)

1287

options_line = lines[3]

1288

if not options_line.startswith(_OPTION_ROW_LENGTHS):

1289

raise errors.BadIndexOptions(self)

1290

try:

1291

self._row_lengths = map(int, [length for length in

1292

options_line[len(_OPTION_ROW_LENGTHS):].split(',')

1293

if len(length)])

1294

except ValueError:

1295

raise errors.BadIndexOptions(self)

1296

self._compute_row_offsets()

1297

1298

# calculate the bytes we have processed

1299

header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)

1300

return header_end, bytes[header_end:]

1301

1302

def _read_nodes(self, nodes):

1303

"""Read some nodes from disk into the LRU cache.

1304

1305

This performs a readv to get the node data into memory, and parses each

1306

node, then yields it to the caller. The nodes are requested in the

1307

supplied order. If possible doing sort() on the list before requesting

1308

a read may improve performance.

1309

1310

:param nodes: The nodes to read. 0 - first node, 1 - second node etc.

1311

:return: None

1312

"""

1313

# may be the byte string of the whole file

1314

bytes = None

1315

# list of (offset, length) regions of the file that should, evenually

1316

# be read in to data_ranges, either from 'bytes' or from the transport

1317

ranges = []

1318

for index in nodes:

1319

offset = index * _PAGE_SIZE

1320

size = _PAGE_SIZE

1321

if index == 0:

1322

# Root node - special case

1323

if self._size:

1324

size = min(_PAGE_SIZE, self._size)

1325

else:

1326

# The only case where we don't know the size, is for very

1327

# small indexes. So we read the whole thing

1328

bytes = self._transport.get_bytes(self._name)

1329

self._size = len(bytes)

1330

# the whole thing should be parsed out of 'bytes'

1331

ranges.append((0, len(bytes)))

1332

break

1333

else:

1334

if offset > self._size:

1335

raise AssertionError('tried to read past the end'

1336

' of the file %s > %s'

1337

% (offset, self._size))

1338

size = min(size, self._size - offset)

1339

ranges.append((offset, size))

1340

if not ranges:

1341

return

1342

elif bytes is not None:

1343

# already have the whole file

1344

data_ranges = [(start, bytes[start:start+_PAGE_SIZE])

1345

for start in xrange(0, len(bytes), _PAGE_SIZE)]

1346

elif self._file is None:

1347

data_ranges = self._transport.readv(self._name, ranges)

1348

else:

1349

data_ranges = []

1350

for offset, size in ranges:

1351

self._file.seek(offset)

1352

data_ranges.append((offset, self._file.read(size)))

1353

for offset, data in data_ranges:

1354

if offset == 0:

1355

# extract the header

1356

offset, data = self._parse_header_from_bytes(data)

1357

if len(data) == 0:

1358

continue

1359

bytes = zlib.decompress(data)

1360

if bytes.startswith(_LEAF_FLAG):

1361

node = _LeafNode(bytes, self._key_length, self.node_ref_lists)

1362

elif bytes.startswith(_INTERNAL_FLAG):

1363

node = _InternalNode(bytes)

1364

else:

1365

raise AssertionError("Unknown node type for %r" % bytes)

1366

yield offset / _PAGE_SIZE, node

1367

1368

def _signature(self):

1369

"""The file signature for this index type."""

1370

return _BTSIGNATURE

1371

1372

def validate(self):

1373

"""Validate that everything in the index can be accessed."""

1374

# just read and parse every node.

1375

self._get_root_node()

1376

if len(self._row_lengths) > 1:

1377

start_node = self._row_offsets[1]

1378

else:

1379

# We shouldn't be reading anything anyway

1380

start_node = 1

1381

node_end = self._row_offsets[-1]

1382

for node in self._read_nodes(range(start_node, node_end)):

1383

pass

1384

1385

1386

try:

1387

from bzrlib import _btree_serializer_pyx as _btree_serializer

1388

except ImportError:

1389

from bzrlib import _btree_serializer_py as _btree_serializer

Older »