~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/btree_index.py

Committer: Vincent Ladeuil
Date: 2007-02-11 16:06:13 UTC
mto: (2323.7.1 redirection)
mto: This revision was merged to the branch mainline in revision 2390.
Revision ID: v.ladeuil+lp@free.fr-20070211160613-9k1vwo0e1x0si26z

Http redirections are not followed by default. Do not use hints
anymore.

* bzrlib/transport/smart.py:
(SmartTransport.get): Do not use hints.

* bzrlib/transport/sftp.py:
(SFTPTransport.get): Do not use hints.

* bzrlib/transport/memory.py:
(MemoryTransport.get): Do not use hints.

* bzrlib/transport/local.py:
(LocalTransport.get): Do not use hints.

* bzrlib/transport/http/_urllib2_wrappers.py:
(Request.__init__): Redirections are *not* followed by default.

* bzrlib/transport/http/_urllib.py:
(HttpTransport_urllib._get): Do not use hints.

* bzrlib/transport/http/_pycurl.py:
(PyCurlTransport._get): Do not use hints.

* bzrlib/transport/http/__init__.py:
(HttpTransportBase.get, HttpTransportBase._get): Do not use hints.
Fix _get doc anyway.

* bzrlib/transport/ftp.py:
(FtpTransport.get): Do not use hints.

* bzrlib/transport/fakevfat.py:
(FakeVFATTransportDecorator.get): Do not use hints.

* bzrlib/transport/decorator.py
(TransportDecorator.get): Do not use hints.

* bzrlib/transport/chroot.py:
(ChrootTransportDecorator.get): Do not use hints.

* bzrlib/tests/test_transport_hints.py:
Deleted.

* bzrlib/tests/__init__.py:
(test_suite): Do not test hints.

* bzrlib/errors.py:
(UnknownHint): Deleted.

* bzrlib/bzrdir.py:
(BzrDirMetaFormat1.probe_transport): Do not use hints.

files added:
NEWS.developers

build-api

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/xml6.py

doc/README.1st

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_pack.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v4.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/counted_lock.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory_delta.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/rules.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/tag.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/util/bencode.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version_info_formats/format_custom.py

bzrlib/views.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/win32/build_release.py

tools/win32/run_script.py

files renamed:
doc/developers/HACKING.txt => HACKING

bzrlib/deprecated_graph.py => bzrlib/graph.py

bzrlib/help_topics/__init__.py => bzrlib/help_topics.py

bzrlib/_patiencediff_py.py => bzrlib/patiencediff.py

bzrlib/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/http_server.py => bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_send.py => bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/tests/test_deprecated_graph.py => bzrlib/tests/test_graph.py

bzrlib/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

bzrlib/transport/remote.py => bzrlib/transport/smart.py

bzrlib/win32utils.py => bzrlib/win32console.py

bzrlib/xml8.py => bzrlib/xml5.py

doc/en/tutorials/centralized_workflow.txt => doc/centralized_workflow.txt

bzrlib/help_topics/en/configuration.txt => doc/configuration.txt

doc/en/user-guide/http_smart_server.txt => doc/http_smart_server.txt

doc/en/user-guide/index.txt => doc/index.txt

doc/en/user-guide/plugins.txt => doc/plugins.txt

doc/en/user-guide/server.txt => doc/server.txt

doc/en/user-guide/setting_up_email.txt => doc/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt => doc/specifying_revisions.txt

doc/en/tutorials/tutorial.txt => doc/tutorial.txt

doc/en/user-guide/using_aliases.txt => doc/using_aliases.txt

doc/en/user-guide/version_info.txt => doc/version_info.txt

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

contrib/pwclient.full

doc/bazaar-vcs.org.kid

doc/default.css

generate_docs.py

profile_imports.py

setup.py *

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""B+Tree indices"""

import array

import bisect

from bisect import bisect_right

from copy import deepcopy

import math

import struct

import tempfile

import zlib

from bzrlib import (

chunk_writer,

debug,

errors,

fifo_cache,

index,

lru_cache,

osutils,

trace,

)

from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN

from bzrlib.transport import get_transport

_BTSIGNATURE = "B+Tree Graph Index 2\n"

_OPTION_ROW_LENGTHS = "row_lengths="

_LEAF_FLAG = "type=leaf\n"

_INTERNAL_FLAG = "type=internal\n"

_INTERNAL_OFFSET = "offset="

_RESERVED_HEADER_BYTES = 120

_PAGE_SIZE = 4096

# 4K per page: 4MB - 1000 entries

_NODE_CACHE_SIZE = 1000

class _BuilderRow(object):

"""The stored state accumulated while writing out a row in the index.

:ivar spool: A temporary file used to accumulate nodes for this row

in the tree.

:ivar nodes: The count of nodes emitted so far.

"""

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

skipped_bytes = padding

self.spool.writelines(byte_lines)

remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE

if remainder != 0:

raise AssertionError("incorrect node length: %d, %d"

% (self.spool.tell(), remainder))

self.nodes += 1

self.writer = None

class _InternalBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out internal rows."""

def finish_node(self, pad=True):

if not pad:

raise AssertionError("Must pad internal nodes only.")

_BuilderRow.finish_node(self)

class _LeafBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out a leaf rows."""

100

101

class BTreeBuilder(index.GraphIndexBuilder):

102

"""A Builder for B+Tree based Graph indices.

103

104

The resulting graph has the structure:

105

106

_SIGNATURE OPTIONS NODES

107

_SIGNATURE := 'B+Tree Graph Index 1' NEWLINE

108

OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH

109

REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE

110

KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE

111

LENGTH := 'len=' DIGITS NEWLINE

112

ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)*

113

NODES := NODE_COMPRESSED*

114

NODE_COMPRESSED:= COMPRESSED_BYTES{4096}

115

NODE_RAW := INTERNAL | LEAF

116

INTERNAL := INTERNAL_FLAG POINTERS

117

LEAF := LEAF_FLAG ROWS

118

KEY_ELEMENT := Not-whitespace-utf8

119

KEY := KEY_ELEMENT (NULL KEY_ELEMENT)*

120

ROWS := ROW*

121

ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

122

ABSENT := 'a'

123

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

124

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

125

REFERENCE := KEY

126

VALUE := no-newline-no-null-bytes

127

"""

128

129

def __init__(self, reference_lists=0, key_elements=1, spill_at=100000):

130

"""See GraphIndexBuilder.__init__.

131

132

:param spill_at: Optional parameter controlling the maximum number

133

of nodes that BTreeBuilder will hold in memory.

134

"""

135

index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,

136

key_elements=key_elements)

137

self._spill_at = spill_at

138

self._backing_indices = []

139

# A map of {key: (node_refs, value)}

140

self._nodes = {}

141

# Indicate it hasn't been built yet

142

self._nodes_by_key = None

143

self._optimize_for_size = False

144

145

def add_node(self, key, value, references=()):

146

"""Add a node to the index.

147

148

If adding the node causes the builder to reach its spill_at threshold,

149

disk spilling will be triggered.

150

151

:param key: The key. keys are non-empty tuples containing

152

as many whitespace-free utf8 bytestrings as the key length

153

defined for this index.

154

:param references: An iterable of iterables of keys. Each is a

155

reference to another key.

156

:param value: The value to associate with the key. It may be any

157

bytes as long as it does not contain \0 or \n.

158

"""

159

# we don't care about absent_references

160

node_refs, _ = self._check_key_ref_value(key, references, value)

161

if key in self._nodes:

162

raise errors.BadIndexDuplicateKey(key, self)

163

self._nodes[key] = (node_refs, value)

164

self._keys.add(key)

165

if self._nodes_by_key is not None and self._key_length > 1:

166

self._update_nodes_by_key(key, value, node_refs)

167

if len(self._keys) < self._spill_at:

168

return

169

self._spill_mem_keys_to_disk()

170

171

def _spill_mem_keys_to_disk(self):

172

"""Write the in memory keys down to disk to cap memory consumption.

173

174

If we already have some keys written to disk, we will combine them so

175

as to preserve the sorted order. The algorithm for combining uses

176

powers of two. So on the first spill, write all mem nodes into a

177

single index. On the second spill, combine the mem nodes with the nodes

178

on disk to create a 2x sized disk index and get rid of the first index.

179

On the third spill, create a single new disk index, which will contain

180

the mem nodes, and preserve the existing 2x sized index. On the fourth,

181

combine mem with the first and second indexes, creating a new one of

182

size 4x. On the fifth create a single new one, etc.

183

"""

184

if self._combine_backing_indices:

185

(new_backing_file, size,

186

backing_pos) = self._spill_mem_keys_and_combine()

187

else:

188

new_backing_file, size = self._spill_mem_keys_without_combining()

189

dir_path, base_name = osutils.split(new_backing_file.name)

190

# Note: The transport here isn't strictly needed, because we will use

191

# direct access to the new_backing._file object

192

new_backing = BTreeGraphIndex(get_transport(dir_path),

193

base_name, size)

194

# GC will clean up the file

195

new_backing._file = new_backing_file

196

if self._combine_backing_indices:

197

if len(self._backing_indices) == backing_pos:

198

self._backing_indices.append(None)

199

self._backing_indices[backing_pos] = new_backing

200

for backing_pos in range(backing_pos):

201

self._backing_indices[backing_pos] = None

202

else:

203

self._backing_indices.append(new_backing)

204

self._keys = set()

205

self._nodes = {}

206

self._nodes_by_key = None

207

208

def _spill_mem_keys_without_combining(self):

209

return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)

210

211

def _spill_mem_keys_and_combine(self):

212

iterators_to_combine = [self._iter_mem_nodes()]

213

pos = -1

214

for pos, backing in enumerate(self._backing_indices):

215

if backing is None:

216

pos -= 1

217

break

218

iterators_to_combine.append(backing.iter_all_entries())

219

backing_pos = pos + 1

220

new_backing_file, size = \

221

self._write_nodes(self._iter_smallest(iterators_to_combine),

222

allow_optimize=False)

223

return new_backing_file, size, backing_pos

224

225

def add_nodes(self, nodes):

226

"""Add nodes to the index.

227

228

:param nodes: An iterable of (key, node_refs, value) entries to add.

229

"""

230

if self.reference_lists:

231

for (key, value, node_refs) in nodes:

232

self.add_node(key, value, node_refs)

233

else:

234

for (key, value) in nodes:

235

self.add_node(key, value)

236

237

def _iter_mem_nodes(self):

238

"""Iterate over the nodes held in memory."""

239

nodes = self._nodes

240

if self.reference_lists:

241

for key in sorted(nodes):

242

references, value = nodes[key]

243

yield self, key, value, references

244

else:

245

for key in sorted(nodes):

246

references, value = nodes[key]

247

yield self, key, value

248

249

def _iter_smallest(self, iterators_to_combine):

250

if len(iterators_to_combine) == 1:

251

for value in iterators_to_combine[0]:

252

yield value

253

return

254

current_values = []

255

for iterator in iterators_to_combine:

256

try:

257

current_values.append(iterator.next())

258

except StopIteration:

259

current_values.append(None)

260

last = None

261

while True:

262

# Decorate candidates with the value to allow 2.4's min to be used.

263

candidates = [(item[1][1], item) for item

264

in enumerate(current_values) if item[1] is not None]

265

if not len(candidates):

266

return

267

selected = min(candidates)

268

# undecorate back to (pos, node)

269

selected = selected[1]

270

if last == selected[1][1]:

271

raise errors.BadIndexDuplicateKey(last, self)

272

last = selected[1][1]

273

# Yield, with self as the index

274

yield (self,) + selected[1][1:]

275

pos = selected[0]

276

try:

277

current_values[pos] = iterators_to_combine[pos].next()

278

except StopIteration:

279

current_values[pos] = None

280

281

def _add_key(self, string_key, line, rows, allow_optimize=True):

282

"""Add a key to the current chunk.

283

284

:param string_key: The key to add.

285

:param line: The fully serialised key and value.

286

:param allow_optimize: If set to False, prevent setting the optimize

287

flag when writing out. This is used by the _spill_mem_keys_to_disk

288

functionality.

289

"""

290

if rows[-1].writer is None:

291

# opening a new leaf chunk;

292

for pos, internal_row in enumerate(rows[:-1]):

293

# flesh out any internal nodes that are needed to

294

# preserve the height of the tree

295

if internal_row.writer is None:

296

length = _PAGE_SIZE

297

if internal_row.nodes == 0:

298

length -= _RESERVED_HEADER_BYTES # padded

299

if allow_optimize:

300

optimize_for_size = self._optimize_for_size

301

else:

302

optimize_for_size = False

303

internal_row.writer = chunk_writer.ChunkWriter(length, 0,

304

optimize_for_size=optimize_for_size)

305

internal_row.writer.write(_INTERNAL_FLAG)

306

internal_row.writer.write(_INTERNAL_OFFSET +

307

str(rows[pos + 1].nodes) + "\n")

308

# add a new leaf

309

length = _PAGE_SIZE

310

if rows[-1].nodes == 0:

311

length -= _RESERVED_HEADER_BYTES # padded

312

rows[-1].writer = chunk_writer.ChunkWriter(length,

313

optimize_for_size=self._optimize_for_size)

314

rows[-1].writer.write(_LEAF_FLAG)

315

if rows[-1].writer.write(line):

316

# this key did not fit in the node:

317

rows[-1].finish_node()

318

key_line = string_key + "\n"

319

new_row = True

320

for row in reversed(rows[:-1]):

321

# Mark the start of the next node in the node above. If it

322

# doesn't fit then propagate upwards until we find one that

323

# it does fit into.

324

if row.writer.write(key_line):

325

row.finish_node()

326

else:

327

# We've found a node that can handle the pointer.

328

new_row = False

329

break

330

# If we reached the current root without being able to mark the

331

# division point, then we need a new root:

332

if new_row:

333

# We need a new row

334

if 'index' in debug.debug_flags:

335

trace.mutter('Inserting new global row.')

336

new_row = _InternalBuilderRow()

337

reserved_bytes = 0

338

rows.insert(0, new_row)

339

# This will be padded, hence the -100

340

new_row.writer = chunk_writer.ChunkWriter(

341

_PAGE_SIZE - _RESERVED_HEADER_BYTES,

342

reserved_bytes,

343

optimize_for_size=self._optimize_for_size)

344

new_row.writer.write(_INTERNAL_FLAG)

345

new_row.writer.write(_INTERNAL_OFFSET +

346

str(rows[1].nodes - 1) + "\n")

347

new_row.writer.write(key_line)

348

self._add_key(string_key, line, rows, allow_optimize=allow_optimize)

349

350

def _write_nodes(self, node_iterator, allow_optimize=True):

351

"""Write node_iterator out as a B+Tree.

352

353

:param node_iterator: An iterator of sorted nodes. Each node should

354

match the output given by iter_all_entries.

355

:param allow_optimize: If set to False, prevent setting the optimize

356

flag when writing out. This is used by the _spill_mem_keys_to_disk

357

functionality.

358

:return: A file handle for a temporary file containing a B+Tree for

359

the nodes.

360

"""

361

# The index rows - rows[0] is the root, rows[1] is the layer under it

362

# etc.

363

rows = []

364

# forward sorted by key. In future we may consider topological sorting,

365

# at the cost of table scans for direct lookup, or a second index for

366

# direct lookup

367

key_count = 0

368

# A stack with the number of nodes of each size. 0 is the root node

369

# and must always be 1 (if there are any nodes in the tree).

370

self.row_lengths = []

371

# Loop over all nodes adding them to the bottom row

372

# (rows[-1]). When we finish a chunk in a row,

373

# propagate the key that didn't fit (comes after the chunk) to the

374

# row above, transitively.

375

for node in node_iterator:

376

if key_count == 0:

377

# First key triggers the first row

378

rows.append(_LeafBuilderRow())

379

key_count += 1

380

string_key, line = _btree_serializer._flatten_node(node,

381

self.reference_lists)

382

self._add_key(string_key, line, rows, allow_optimize=allow_optimize)

383

for row in reversed(rows):

384

pad = (type(row) != _LeafBuilderRow)

385

row.finish_node(pad=pad)

386

result = tempfile.NamedTemporaryFile(prefix='bzr-index-')

387

lines = [_BTSIGNATURE]

388

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

389

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

390

lines.append(_OPTION_LEN + str(key_count) + '\n')

391

row_lengths = [row.nodes for row in rows]

392

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

393

result.writelines(lines)

394

position = sum(map(len, lines))

395

root_row = True

396

if position > _RESERVED_HEADER_BYTES:

397

raise AssertionError("Could not fit the header in the"

398

" reserved space: %d > %d"

399

% (position, _RESERVED_HEADER_BYTES))

400

# write the rows out:

401

for row in rows:

402

reserved = _RESERVED_HEADER_BYTES # reserved space for first node

403

row.spool.flush()

404

row.spool.seek(0)

405

# copy nodes to the finalised file.

406

# Special case the first node as it may be prefixed

407

node = row.spool.read(_PAGE_SIZE)

408

result.write(node[reserved:])

409

result.write("\x00" * (reserved - position))

410

position = 0 # Only the root row actually has an offset

411

copied_len = osutils.pumpfile(row.spool, result)

412

if copied_len != (row.nodes - 1) * _PAGE_SIZE:

413

if type(row) != _LeafBuilderRow:

414

raise AssertionError("Incorrect amount of data copied"

415

" expected: %d, got: %d"

416

% ((row.nodes - 1) * _PAGE_SIZE,

417

copied_len))

418

result.flush()

419

size = result.tell()

420

result.seek(0)

421

return result, size

422

423

def finish(self):

424

"""Finalise the index.

425

426

:return: A file handle for a temporary file containing the nodes added

427

to the index.

428

"""

429

return self._write_nodes(self.iter_all_entries())[0]

430

431

def iter_all_entries(self):

432

"""Iterate over all keys within the index

433

434

:return: An iterable of (index, key, reference_lists, value). There is no

435

defined order for the result iteration - it will be in the most

436

efficient order for the index (in this case dictionary hash order).

437

"""

438

if 'evil' in debug.debug_flags:

439

trace.mutter_callsite(3,

440

"iter_all_entries scales with size of history.")

441

# Doing serial rather than ordered would be faster; but this shouldn't

442

# be getting called routinely anyway.

443

iterators = [self._iter_mem_nodes()]

444

for backing in self._backing_indices:

445

if backing is not None:

446

iterators.append(backing.iter_all_entries())

447

if len(iterators) == 1:

448

return iterators[0]

449

return self._iter_smallest(iterators)

450

451

def iter_entries(self, keys):

452

"""Iterate over keys within the index.

453

454

:param keys: An iterable providing the keys to be retrieved.

455

:return: An iterable of (index, key, value, reference_lists). There is no

456

defined order for the result iteration - it will be in the most

457

efficient order for the index (keys iteration order in this case).

458

"""

459

keys = set(keys)

460

local_keys = keys.intersection(self._keys)

461

if self.reference_lists:

462

for key in local_keys:

463

node = self._nodes[key]

464

yield self, key, node[1], node[0]

465

else:

466

for key in local_keys:

467

node = self._nodes[key]

468

yield self, key, node[1]

469

# Find things that are in backing indices that have not been handled

470

# yet.

471

if not self._backing_indices:

472

return # We won't find anything there either

473

# Remove all of the keys that we found locally

474

keys.difference_update(local_keys)

475

for backing in self._backing_indices:

476

if backing is None:

477

continue

478

if not keys:

479

return

480

for node in backing.iter_entries(keys):

481

keys.remove(node[1])

482

yield (self,) + node[1:]

483

484

def iter_entries_prefix(self, keys):

485

"""Iterate over keys within the index using prefix matching.

486

487

Prefix matching is applied within the tuple of a key, not to within

488

the bytestring of each key element. e.g. if you have the keys ('foo',

489

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

490

only the former key is returned.

491

492

:param keys: An iterable providing the key prefixes to be retrieved.

493

Each key prefix takes the form of a tuple the length of a key, but

494

with the last N elements 'None' rather than a regular bytestring.

495

The first element cannot be 'None'.

496

:return: An iterable as per iter_all_entries, but restricted to the

497

keys with a matching prefix to those supplied. No additional keys

498

will be returned, and every match that is in the index will be

499

returned.

500

"""

501

# XXX: To much duplication with the GraphIndex class; consider finding

502

# a good place to pull out the actual common logic.

503

keys = set(keys)

504

if not keys:

505

return

506

for backing in self._backing_indices:

507

if backing is None:

508

continue

509

for node in backing.iter_entries_prefix(keys):

510

yield (self,) + node[1:]

511

if self._key_length == 1:

512

for key in keys:

513

# sanity check

514

if key[0] is None:

515

raise errors.BadIndexKey(key)

516

if len(key) != self._key_length:

517

raise errors.BadIndexKey(key)

518

try:

519

node = self._nodes[key]

520

except KeyError:

521

continue

522

if self.reference_lists:

523

yield self, key, node[1], node[0]

524

else:

525

yield self, key, node[1]

526

return

527

for key in keys:

528

# sanity check

529

if key[0] is None:

530

raise errors.BadIndexKey(key)

531

if len(key) != self._key_length:

532

raise errors.BadIndexKey(key)

533

# find what it refers to:

534

key_dict = self._get_nodes_by_key()

535

elements = list(key)

536

# find the subdict to return

537

try:

538

while len(elements) and elements[0] is not None:

539

key_dict = key_dict[elements[0]]

540

elements.pop(0)

541

except KeyError:

542

# a non-existant lookup.

543

continue

544

if len(elements):

545

dicts = [key_dict]

546

while dicts:

547

key_dict = dicts.pop(-1)

548

# can't be empty or would not exist

549

item, value = key_dict.iteritems().next()

550

if type(value) == dict:

551

# push keys

552

dicts.extend(key_dict.itervalues())

553

else:

554

# yield keys

555

for value in key_dict.itervalues():

556

yield (self, ) + value

557

else:

558

yield (self, ) + key_dict

559

560

def _get_nodes_by_key(self):

561

if self._nodes_by_key is None:

562

nodes_by_key = {}

563

if self.reference_lists:

564

for key, (references, value) in self._nodes.iteritems():

565

key_dict = nodes_by_key

566

for subkey in key[:-1]:

567

key_dict = key_dict.setdefault(subkey, {})

568

key_dict[key[-1]] = key, value, references

569

else:

570

for key, (references, value) in self._nodes.iteritems():

571

key_dict = nodes_by_key

572

for subkey in key[:-1]:

573

key_dict = key_dict.setdefault(subkey, {})

574

key_dict[key[-1]] = key, value

575

self._nodes_by_key = nodes_by_key

576

return self._nodes_by_key

577

578

def key_count(self):

579

"""Return an estimate of the number of keys in this index.

580

581

For InMemoryGraphIndex the estimate is exact.

582

"""

583

return len(self._keys) + sum(backing.key_count() for backing in

584

self._backing_indices if backing is not None)

585

586

def validate(self):

587

"""In memory index's have no known corruption at the moment."""

588

589

590

class _LeafNode(object):

591

"""A leaf node for a serialised B+Tree index."""

592

593

def __init__(self, bytes, key_length, ref_list_length):

594

"""Parse bytes to create a leaf node object."""

595

# splitlines mangles the \r delimiters.. don't use it.

596

self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,

597

key_length, ref_list_length))

598

599

600

class _InternalNode(object):

601

"""An internal node for a serialised B+Tree index."""

602

603

def __init__(self, bytes):

604

"""Parse bytes to create an internal node object."""

605

# splitlines mangles the \r delimiters.. don't use it.

606

self.keys = self._parse_lines(bytes.split('\n'))

607

608

def _parse_lines(self, lines):

609

nodes = []

610

self.offset = int(lines[1][7:])

611

for line in lines[2:]:

612

if line == '':

613

break

614

nodes.append(tuple(line.split('\0')))

615

return nodes

616

617

618

class BTreeGraphIndex(object):

619

"""Access to nodes via the standard GraphIndex interface for B+Tree's.

620

621

Individual nodes are held in a LRU cache. This holds the root node in

622

memory except when very large walks are done.

623

"""

624

625

def __init__(self, transport, name, size):

626

"""Create a B+Tree index object on the index name.

627

628

:param transport: The transport to read data for the index from.

629

:param name: The file name of the index on transport.

630

:param size: Optional size of the index in bytes. This allows

631

compatibility with the GraphIndex API, as well as ensuring that

632

the initial read (to read the root node header) can be done

633

without over-reading even on empty indices, and on small indices

634

allows single-IO to read the entire index.

635

"""

636

self._transport = transport

637

self._name = name

638

self._size = size

639

self._file = None

640

self._recommended_pages = self._compute_recommended_pages()

641

self._root_node = None

642

# Default max size is 100,000 leave values

643

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

644

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

645

# We could limit this, but even a 300k record btree has only 3k leaf

646

# nodes, and only 20 internal nodes. So the default of 100 nodes in an

647

# LRU would mean we always cache everything anyway, no need to pay the

648

# overhead of LRU

649

self._internal_node_cache = fifo_cache.FIFOCache(100)

650

self._key_count = None

651

self._row_lengths = None

652

self._row_offsets = None # Start of each row, [-1] is the end

653

654

def __eq__(self, other):

655

"""Equal when self and other were created with the same parameters."""

656

return (

657

type(self) == type(other) and

658

self._transport == other._transport and

659

self._name == other._name and

660

self._size == other._size)

661

662

def __ne__(self, other):

663

return not self.__eq__(other)

664

665

def _get_and_cache_nodes(self, nodes):

666

"""Read nodes and cache them in the lru.

667

668

The nodes list supplied is sorted and then read from disk, each node

669

being inserted it into the _node_cache.

670

671

Note: Asking for more nodes than the _node_cache can contain will

672

result in some of the results being immediately discarded, to prevent

673

this an assertion is raised if more nodes are asked for than are

674

cachable.

675

676

:return: A dict of {node_pos: node}

677

"""

678

found = {}

679

start_of_leaves = None

680

for node_pos, node in self._read_nodes(sorted(nodes)):

681

if node_pos == 0: # Special case

682

self._root_node = node

683

else:

684

if start_of_leaves is None:

685

start_of_leaves = self._row_offsets[-2]

686

if node_pos < start_of_leaves:

687

self._internal_node_cache.add(node_pos, node)

688

else:

689

self._leaf_node_cache.add(node_pos, node)

690

found[node_pos] = node

691

return found

692

693

def _compute_recommended_pages(self):

694

"""Convert transport's recommended_page_size into btree pages.

695

696

recommended_page_size is in bytes, we want to know how many _PAGE_SIZE

697

pages fit in that length.

698

"""

699

recommended_read = self._transport.recommended_page_size()

700

recommended_pages = int(math.ceil(recommended_read /

701

float(_PAGE_SIZE)))

702

return recommended_pages

703

704

def _compute_total_pages_in_index(self):

705

"""How many pages are in the index.

706

707

If we have read the header we will use the value stored there.

708

Otherwise it will be computed based on the length of the index.

709

"""

710

if self._size is None:

711

raise AssertionError('_compute_total_pages_in_index should not be'

712

' called when self._size is None')

713

if self._root_node is not None:

714

# This is the number of pages as defined by the header

715

return self._row_offsets[-1]

716

# This is the number of pages as defined by the size of the index. They

717

# should be indentical.

718

total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))

719

return total_pages

720

721

def _expand_offsets(self, offsets):

722

"""Find extra pages to download.

723

724

The idea is that we always want to make big-enough requests (like 64kB

725

for http), so that we don't waste round trips. So given the entries

726

that we already have cached and the new pages being downloaded figure

727

out what other pages we might want to read.

728

729

See also doc/developers/btree_index_prefetch.txt for more details.

730

731

:param offsets: The offsets to be read

732

:return: A list of offsets to download

733

"""

734

if 'index' in debug.debug_flags:

735

trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)

736

737

if len(offsets) >= self._recommended_pages:

738

# Don't add more, we are already requesting more than enough

739

if 'index' in debug.debug_flags:

740

trace.mutter(' not expanding large request (%s >= %s)',

741

len(offsets), self._recommended_pages)

742

return offsets

743

if self._size is None:

744

# Don't try anything, because we don't know where the file ends

745

if 'index' in debug.debug_flags:

746

trace.mutter(' not expanding without knowing index size')

747

return offsets

748

total_pages = self._compute_total_pages_in_index()

749

cached_offsets = self._get_offsets_to_cached_pages()

750

# If reading recommended_pages would read the rest of the index, just

751

# do so.

752

if total_pages - len(cached_offsets) <= self._recommended_pages:

753

# Read whatever is left

754

if cached_offsets:

755

expanded = [x for x in xrange(total_pages)

756

if x not in cached_offsets]

757

else:

758

expanded = range(total_pages)

759

if 'index' in debug.debug_flags:

760

trace.mutter(' reading all unread pages: %s', expanded)

761

return expanded

762

763

if self._root_node is None:

764

# ATM on the first read of the root node of a large index, we don't

765

# bother pre-reading any other pages. This is because the

766

# likelyhood of actually reading interesting pages is very low.

767

# See doc/developers/btree_index_prefetch.txt for a discussion, and

768

# a possible implementation when we are guessing that the second

769

# layer index is small

770

final_offsets = offsets

771

else:

772

tree_depth = len(self._row_lengths)

773

if len(cached_offsets) < tree_depth and len(offsets) == 1:

774

# We haven't read enough to justify expansion

775

# If we are only going to read the root node, and 1 leaf node,

776

# then it isn't worth expanding our request. Once we've read at

777

# least 2 nodes, then we are probably doing a search, and we

778

# start expanding our requests.

779

if 'index' in debug.debug_flags:

780

trace.mutter(' not expanding on first reads')

781

return offsets

782

final_offsets = self._expand_to_neighbors(offsets, cached_offsets,

783

total_pages)

784

785

final_offsets = sorted(final_offsets)

786

if 'index' in debug.debug_flags:

787

trace.mutter('expanded: %s', final_offsets)

788

return final_offsets

789

790

def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):

791

"""Expand requests to neighbors until we have enough pages.

792

793

This is called from _expand_offsets after policy has determined that we

794

want to expand.

795

We only want to expand requests within a given layer. We cheat a little

796

bit and assume all requests will be in the same layer. This is true

797

given the current design, but if it changes this algorithm may perform

798

oddly.

799

800

:param offsets: requested offsets

801

:param cached_offsets: offsets for pages we currently have cached

802

:return: A set() of offsets after expansion

803

"""

804

final_offsets = set(offsets)

805

first = end = None

806

new_tips = set(final_offsets)

807

while len(final_offsets) < self._recommended_pages and new_tips:

808

next_tips = set()

809

for pos in new_tips:

810

if first is None:

811

first, end = self._find_layer_first_and_end(pos)

812

previous = pos - 1

813

if (previous > 0

814

and previous not in cached_offsets

815

and previous not in final_offsets

816

and previous >= first):

817

next_tips.add(previous)

818

after = pos + 1

819

if (after < total_pages

820

and after not in cached_offsets

821

and after not in final_offsets

822

and after < end):

823

next_tips.add(after)

824

# This would keep us from going bigger than

825

# recommended_pages by only expanding the first offsets.

826

# However, if we are making a 'wide' request, it is

827

# reasonable to expand all points equally.

828

# if len(final_offsets) > recommended_pages:

829

# break

830

final_offsets.update(next_tips)

831

new_tips = next_tips

832

return final_offsets

833

834

def external_references(self, ref_list_num):

835

if self._root_node is None:

836

self._get_root_node()

837

if ref_list_num + 1 > self.node_ref_lists:

838

raise ValueError('No ref list %d, index has %d ref lists'

839

% (ref_list_num, self.node_ref_lists))

840

keys = set()

841

refs = set()

842

for node in self.iter_all_entries():

843

keys.add(node[1])

844

refs.update(node[3][ref_list_num])

845

return refs - keys

846

847

def _find_layer_first_and_end(self, offset):

848

"""Find the start/stop nodes for the layer corresponding to offset.

849

850

:return: (first, end)

851

first is the first node in this layer

852

end is the first node of the next layer

853

"""

854

first = end = 0

855

for roffset in self._row_offsets:

856

first = end

857

end = roffset

858

if offset < roffset:

859

break

860

return first, end

861

862

def _get_offsets_to_cached_pages(self):

863

"""Determine what nodes we already have cached."""

864

cached_offsets = set(self._internal_node_cache.keys())

865

cached_offsets.update(self._leaf_node_cache.keys())

866

if self._root_node is not None:

867

cached_offsets.add(0)

868

return cached_offsets

869

870

def _get_root_node(self):

871

if self._root_node is None:

872

# We may not have a root node yet

873

self._get_internal_nodes([0])

874

return self._root_node

875

876

def _get_nodes(self, cache, node_indexes):

877

found = {}

878

needed = []

879

for idx in node_indexes:

880

if idx == 0 and self._root_node is not None:

881

found[0] = self._root_node

882

continue

883

try:

884

found[idx] = cache[idx]

885

except KeyError:

886

needed.append(idx)

887

if not needed:

888

return found

889

needed = self._expand_offsets(needed)

890

found.update(self._get_and_cache_nodes(needed))

891

return found

892

893

def _get_internal_nodes(self, node_indexes):

894

"""Get a node, from cache or disk.

895

896

After getting it, the node will be cached.

897

"""

898

return self._get_nodes(self._internal_node_cache, node_indexes)

899

900

def _cache_leaf_values(self, nodes):

901

"""Cache directly from key => value, skipping the btree."""

902

if self._leaf_value_cache is not None:

903

for node in nodes.itervalues():

904

for key, value in node.keys.iteritems():

905

if key in self._leaf_value_cache:

906

# Don't add the rest of the keys, we've seen this node

907

# before.

908

break

909

self._leaf_value_cache[key] = value

910

911

def _get_leaf_nodes(self, node_indexes):

912

"""Get a bunch of nodes, from cache or disk."""

913

found = self._get_nodes(self._leaf_node_cache, node_indexes)

914

self._cache_leaf_values(found)

915

return found

916

917

def iter_all_entries(self):

918

"""Iterate over all keys within the index.

919

920

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

921

The former tuple is used when there are no reference lists in the

922

index, making the API compatible with simple key:value index types.

923

There is no defined order for the result iteration - it will be in

924

the most efficient order for the index.

925

"""

926

if 'evil' in debug.debug_flags:

927

trace.mutter_callsite(3,

928

"iter_all_entries scales with size of history.")

929

if not self.key_count():

930

return

931

if self._row_offsets[-1] == 1:

932

# There is only the root node, and we read that via key_count()

933

if self.node_ref_lists:

934

for key, (value, refs) in sorted(self._root_node.keys.items()):

935

yield (self, key, value, refs)

936

else:

937

for key, (value, refs) in sorted(self._root_node.keys.items()):

938

yield (self, key, value)

939

return

940

start_of_leaves = self._row_offsets[-2]

941

end_of_leaves = self._row_offsets[-1]

942

needed_offsets = range(start_of_leaves, end_of_leaves)

943

if needed_offsets == [0]:

944

# Special case when we only have a root node, as we have already

945

# read everything

946

nodes = [(0, self._root_node)]

947

else:

948

nodes = self._read_nodes(needed_offsets)

949

# We iterate strictly in-order so that we can use this function

950

# for spilling index builds to disk.

951

if self.node_ref_lists:

952

for _, node in nodes:

953

for key, (value, refs) in sorted(node.keys.items()):

954

yield (self, key, value, refs)

955

else:

956

for _, node in nodes:

957

for key, (value, refs) in sorted(node.keys.items()):

958

yield (self, key, value)

959

960

@staticmethod

961

def _multi_bisect_right(in_keys, fixed_keys):

962

"""Find the positions where each 'in_key' would fit in fixed_keys.

963

964

This is equivalent to doing "bisect_right" on each in_key into

965

fixed_keys

966

967

:param in_keys: A sorted list of keys to match with fixed_keys

968

:param fixed_keys: A sorted list of keys to match against

969

:return: A list of (integer position, [key list]) tuples.

970

"""

971

if not in_keys:

972

return []

973

if not fixed_keys:

974

# no pointers in the fixed_keys list, which means everything must

975

# fall to the left.

976

return [(0, in_keys)]

977

978

# TODO: Iterating both lists will generally take M + N steps

979

# Bisecting each key will generally take M * log2 N steps.

980

# If we had an efficient way to compare, we could pick the method

981

# based on which has the fewer number of steps.

982

# There is also the argument that bisect_right is a compiled

983

# function, so there is even more to be gained.

984

# iter_steps = len(in_keys) + len(fixed_keys)

985

# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)

986

if len(in_keys) == 1: # Bisect will always be faster for M = 1

987

return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]

988

# elif bisect_steps < iter_steps:

989

# offsets = {}

990

# for key in in_keys:

991

# offsets.setdefault(bisect_right(fixed_keys, key),

992

# []).append(key)

993

# return [(o, offsets[o]) for o in sorted(offsets)]

994

in_keys_iter = iter(in_keys)

995

fixed_keys_iter = enumerate(fixed_keys)

996

cur_in_key = in_keys_iter.next()

997

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

998

999

class InputDone(Exception): pass

1000

class FixedDone(Exception): pass

1001

1002

output = []

1003

cur_out = []

1004

1005

# TODO: Another possibility is that rather than iterating on each side,

1006

# we could use a combination of bisecting and iterating. For

1007

# example, while cur_in_key < fixed_key, bisect to find its

1008

# point, then iterate all matching keys, then bisect (restricted

1009

# to only the remainder) for the next one, etc.

1010

try:

1011

while True:

1012

if cur_in_key < cur_fixed_key:

1013

cur_keys = []

1014

cur_out = (cur_fixed_offset, cur_keys)

1015

output.append(cur_out)

1016

while cur_in_key < cur_fixed_key:

1017

cur_keys.append(cur_in_key)

1018

try:

1019

cur_in_key = in_keys_iter.next()

1020

except StopIteration:

1021

raise InputDone

1022

# At this point cur_in_key must be >= cur_fixed_key

1023

# step the cur_fixed_key until we pass the cur key, or walk off

1024

# the end

1025

while cur_in_key >= cur_fixed_key:

1026

try:

1027

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

1028

except StopIteration:

1029

raise FixedDone

1030

except InputDone:

1031

# We consumed all of the input, nothing more to do

1032

pass

1033

except FixedDone:

1034

# There was some input left, but we consumed all of fixed, so we

1035

# have to add one more for the tail

1036

cur_keys = [cur_in_key]

1037

cur_keys.extend(in_keys_iter)

1038

cur_out = (len(fixed_keys), cur_keys)

1039

output.append(cur_out)

1040

return output

1041

1042

def iter_entries(self, keys):

1043

"""Iterate over keys within the index.

1044

1045

:param keys: An iterable providing the keys to be retrieved.

1046

:return: An iterable as per iter_all_entries, but restricted to the

1047

keys supplied. No additional keys will be returned, and every

1048

key supplied that is in the index will be returned.

1049

"""

1050

# 6 seconds spent in miss_torture using the sorted() line.

1051

# Even with out of order disk IO it seems faster not to sort it when

1052

# large queries are being made.

1053

# However, now that we are doing multi-way bisecting, we need the keys

1054

# in sorted order anyway. We could change the multi-way code to not

1055

# require sorted order. (For example, it bisects for the first node,

1056

# does an in-order search until a key comes before the current point,

1057

# which it then bisects for, etc.)

1058

keys = frozenset(keys)

1059

if not keys:

1060

return

1061

1062

if not self.key_count():

1063

return

1064

1065

needed_keys = []

1066

if self._leaf_value_cache is None:

1067

needed_keys = keys

1068

else:

1069

for key in keys:

1070

value = self._leaf_value_cache.get(key, None)

1071

if value is not None:

1072

# This key is known not to be here, skip it

1073

value, refs = value

1074

if self.node_ref_lists:

1075

yield (self, key, value, refs)

1076

else:

1077

yield (self, key, value)

1078

else:

1079

needed_keys.append(key)

1080

1081

last_key = None

1082

needed_keys = keys

1083

if not needed_keys:

1084

return

1085

# 6 seconds spent in miss_torture using the sorted() line.

1086

# Even with out of order disk IO it seems faster not to sort it when

1087

# large queries are being made.

1088

needed_keys = sorted(needed_keys)

1089

1090

nodes_and_keys = [(0, needed_keys)]

1091

1092

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

1093

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1094

nodes = self._get_internal_nodes(node_indexes)

1095

1096

next_nodes_and_keys = []

1097

for node_index, sub_keys in nodes_and_keys:

1098

node = nodes[node_index]

1099

positions = self._multi_bisect_right(sub_keys, node.keys)

1100

node_offset = next_row_start + node.offset

1101

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

1102

for pos, s_keys in positions])

1103

nodes_and_keys = next_nodes_and_keys

1104

# We should now be at the _LeafNodes

1105

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1106

1107

# TODO: We may *not* want to always read all the nodes in one

1108

# big go. Consider setting a max size on this.

1109

1110

nodes = self._get_leaf_nodes(node_indexes)

1111

for node_index, sub_keys in nodes_and_keys:

1112

if not sub_keys:

1113

continue

1114

node = nodes[node_index]

1115

for next_sub_key in sub_keys:

1116

if next_sub_key in node.keys:

1117

value, refs = node.keys[next_sub_key]

1118

if self.node_ref_lists:

1119

yield (self, next_sub_key, value, refs)

1120

else:

1121

yield (self, next_sub_key, value)

1122

1123

def iter_entries_prefix(self, keys):

1124

"""Iterate over keys within the index using prefix matching.

1125

1126

Prefix matching is applied within the tuple of a key, not to within

1127

the bytestring of each key element. e.g. if you have the keys ('foo',

1128

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1129

only the former key is returned.

1130

1131

WARNING: Note that this method currently causes a full index parse

1132

unconditionally (which is reasonably appropriate as it is a means for

1133

thunking many small indices into one larger one and still supplies

1134

iter_all_entries at the thunk layer).

1135

1136

:param keys: An iterable providing the key prefixes to be retrieved.

1137

Each key prefix takes the form of a tuple the length of a key, but

1138

with the last N elements 'None' rather than a regular bytestring.

1139

The first element cannot be 'None'.

1140

:return: An iterable as per iter_all_entries, but restricted to the

1141

keys with a matching prefix to those supplied. No additional keys

1142

will be returned, and every match that is in the index will be

1143

returned.

1144

"""

1145

keys = sorted(set(keys))

1146

if not keys:

1147

return

1148

# Load if needed to check key lengths

1149

if self._key_count is None:

1150

self._get_root_node()

1151

# TODO: only access nodes that can satisfy the prefixes we are looking

1152

# for. For now, to meet API usage (as this function is not used by

1153

# current bzrlib) just suck the entire index and iterate in memory.

1154

nodes = {}

1155

if self.node_ref_lists:

1156

if self._key_length == 1:

1157

for _1, key, value, refs in self.iter_all_entries():

1158

nodes[key] = value, refs

1159

else:

1160

nodes_by_key = {}

1161

for _1, key, value, refs in self.iter_all_entries():

1162

key_value = key, value, refs

1163

# For a key of (foo, bar, baz) create

1164

# _nodes_by_key[foo][bar][baz] = key_value

1165

key_dict = nodes_by_key

1166

for subkey in key[:-1]:

1167

key_dict = key_dict.setdefault(subkey, {})

1168

key_dict[key[-1]] = key_value

1169

else:

1170

if self._key_length == 1:

1171

for _1, key, value in self.iter_all_entries():

1172

nodes[key] = value

1173

else:

1174

nodes_by_key = {}

1175

for _1, key, value in self.iter_all_entries():

1176

key_value = key, value

1177

# For a key of (foo, bar, baz) create

1178

# _nodes_by_key[foo][bar][baz] = key_value

1179

key_dict = nodes_by_key

1180

for subkey in key[:-1]:

1181

key_dict = key_dict.setdefault(subkey, {})

1182

key_dict[key[-1]] = key_value

1183

if self._key_length == 1:

1184

for key in keys:

1185

# sanity check

1186

if key[0] is None:

1187

raise errors.BadIndexKey(key)

1188

if len(key) != self._key_length:

1189

raise errors.BadIndexKey(key)

1190

try:

1191

if self.node_ref_lists:

1192

value, node_refs = nodes[key]

1193

yield self, key, value, node_refs

1194

else:

1195

yield self, key, nodes[key]

1196

except KeyError:

1197

pass

1198

return

1199

for key in keys:

1200

# sanity check

1201

if key[0] is None:

1202

raise errors.BadIndexKey(key)

1203

if len(key) != self._key_length:

1204

raise errors.BadIndexKey(key)

1205

# find what it refers to:

1206

key_dict = nodes_by_key

1207

elements = list(key)

1208

# find the subdict whose contents should be returned.

1209

try:

1210

while len(elements) and elements[0] is not None:

1211

key_dict = key_dict[elements[0]]

1212

elements.pop(0)

1213

except KeyError:

1214

# a non-existant lookup.

1215

continue

1216

if len(elements):

1217

dicts = [key_dict]

1218

while dicts:

1219

key_dict = dicts.pop(-1)

1220

# can't be empty or would not exist

1221

item, value = key_dict.iteritems().next()

1222

if type(value) == dict:

1223

# push keys

1224

dicts.extend(key_dict.itervalues())

1225

else:

1226

# yield keys

1227

for value in key_dict.itervalues():

1228

# each value is the key:value:node refs tuple

1229

# ready to yield.

1230

yield (self, ) + value

1231

else:

1232

# the last thing looked up was a terminal element

1233

yield (self, ) + key_dict

1234

1235

def key_count(self):

1236

"""Return an estimate of the number of keys in this index.

1237

1238

For BTreeGraphIndex the estimate is exact as it is contained in the

1239

header.

1240

"""

1241

if self._key_count is None:

1242

self._get_root_node()

1243

return self._key_count

1244

1245

def _compute_row_offsets(self):

1246

"""Fill out the _row_offsets attribute based on _row_lengths."""

1247

offsets = []

1248

row_offset = 0

1249

for row in self._row_lengths:

1250

offsets.append(row_offset)

1251

row_offset += row

1252

offsets.append(row_offset)

1253

self._row_offsets = offsets

1254

1255

def _parse_header_from_bytes(self, bytes):

1256

"""Parse the header from a region of bytes.

1257

1258

:param bytes: The data to parse.

1259

:return: An offset, data tuple such as readv yields, for the unparsed

1260

data. (which may be of length 0).

1261

"""

1262

signature = bytes[0:len(self._signature())]

1263

if not signature == self._signature():

1264

raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)

1265

lines = bytes[len(self._signature()):].splitlines()

1266

options_line = lines[0]

1267

if not options_line.startswith(_OPTION_NODE_REFS):

1268

raise errors.BadIndexOptions(self)

1269

try:

1270

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

1271

except ValueError:

1272

raise errors.BadIndexOptions(self)

1273

options_line = lines[1]

1274

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

1275

raise errors.BadIndexOptions(self)

1276

try:

1277

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

1278

except ValueError:

1279

raise errors.BadIndexOptions(self)

1280

options_line = lines[2]

1281

if not options_line.startswith(_OPTION_LEN):

1282

raise errors.BadIndexOptions(self)

1283

try:

1284

self._key_count = int(options_line[len(_OPTION_LEN):])

1285

except ValueError:

1286

raise errors.BadIndexOptions(self)

1287

options_line = lines[3]

1288

if not options_line.startswith(_OPTION_ROW_LENGTHS):

1289

raise errors.BadIndexOptions(self)

1290

try:

1291

self._row_lengths = map(int, [length for length in

1292

options_line[len(_OPTION_ROW_LENGTHS):].split(',')

1293

if len(length)])

1294

except ValueError:

1295

raise errors.BadIndexOptions(self)

1296

self._compute_row_offsets()

1297

1298

# calculate the bytes we have processed

1299

header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)

1300

return header_end, bytes[header_end:]

1301

1302

def _read_nodes(self, nodes):

1303

"""Read some nodes from disk into the LRU cache.

1304

1305

This performs a readv to get the node data into memory, and parses each

1306

node, then yields it to the caller. The nodes are requested in the

1307

supplied order. If possible doing sort() on the list before requesting

1308

a read may improve performance.

1309

1310

:param nodes: The nodes to read. 0 - first node, 1 - second node etc.

1311

:return: None

1312

"""

1313

# may be the byte string of the whole file

1314

bytes = None

1315

# list of (offset, length) regions of the file that should, evenually

1316

# be read in to data_ranges, either from 'bytes' or from the transport

1317

ranges = []

1318

for index in nodes:

1319

offset = index * _PAGE_SIZE

1320

size = _PAGE_SIZE

1321

if index == 0:

1322

# Root node - special case

1323

if self._size:

1324

size = min(_PAGE_SIZE, self._size)

1325

else:

1326

# The only case where we don't know the size, is for very

1327

# small indexes. So we read the whole thing

1328

bytes = self._transport.get_bytes(self._name)

1329

self._size = len(bytes)

1330

# the whole thing should be parsed out of 'bytes'

1331

ranges.append((0, len(bytes)))

1332

break

1333

else:

1334

if offset > self._size:

1335

raise AssertionError('tried to read past the end'

1336

' of the file %s > %s'

1337

% (offset, self._size))

1338

size = min(size, self._size - offset)

1339

ranges.append((offset, size))

1340

if not ranges:

1341

return

1342

elif bytes is not None:

1343

# already have the whole file

1344

data_ranges = [(start, bytes[start:start+_PAGE_SIZE])

1345

for start in xrange(0, len(bytes), _PAGE_SIZE)]

1346

elif self._file is None:

1347

data_ranges = self._transport.readv(self._name, ranges)

1348

else:

1349

data_ranges = []

1350

for offset, size in ranges:

1351

self._file.seek(offset)

1352

data_ranges.append((offset, self._file.read(size)))

1353

for offset, data in data_ranges:

1354

if offset == 0:

1355

# extract the header

1356

offset, data = self._parse_header_from_bytes(data)

1357

if len(data) == 0:

1358

continue

1359

bytes = zlib.decompress(data)

1360

if bytes.startswith(_LEAF_FLAG):

1361

node = _LeafNode(bytes, self._key_length, self.node_ref_lists)

1362

elif bytes.startswith(_INTERNAL_FLAG):

1363

node = _InternalNode(bytes)

1364

else:

1365

raise AssertionError("Unknown node type for %r" % bytes)

1366

yield offset / _PAGE_SIZE, node

1367

1368

def _signature(self):

1369

"""The file signature for this index type."""

1370

return _BTSIGNATURE

1371

1372

def validate(self):

1373

"""Validate that everything in the index can be accessed."""

1374

# just read and parse every node.

1375

self._get_root_node()

1376

if len(self._row_lengths) > 1:

1377

start_node = self._row_offsets[1]

1378

else:

1379

# We shouldn't be reading anything anyway

1380

start_node = 1

1381

node_end = self._row_offsets[-1]

1382

for node in self._read_nodes(range(start_node, node_end)):

1383

pass

1384

1385

1386

try:

1387

from bzrlib import _btree_serializer_c as _btree_serializer

1388

except ImportError:

1389

from bzrlib import _btree_serializer_py as _btree_serializer

Older »