~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

Committer: Martin Pool
Date: 2005-05-10 06:07:16 UTC
Revision ID: mbp@sourcefrog.net-20050510060716-0f939ce3ddea5d15

- New command update-stat-cache for testing
- work-cache always stored with unix newlines and in ascii

files added:
build-api

bzrlib/mdiff.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/statcache.py

bzrlib/tests.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

testbzr

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

Makefile

bzr.ico

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

contrib/bash/bzr

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files renamed:
contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

contrib/bash/bzr.simple => contrib/bash/bzr

bzrlib/util/elementtree/ => elementtree/

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

contrib/add-bzr-to-baz

contrib/zsh/_bzr

elementtree/ElementTree.py

setup.py *

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""B+Tree indices"""

import array

import bisect

from bisect import bisect_right

from copy import deepcopy

import math

import struct

import tempfile

import zlib

from bzrlib import (

chunk_writer,

debug,

errors,

index,

lru_cache,

osutils,

trace,

)

from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN

from bzrlib.transport import get_transport

_BTSIGNATURE = "B+Tree Graph Index 2\n"

_OPTION_ROW_LENGTHS = "row_lengths="

_LEAF_FLAG = "type=leaf\n"

_INTERNAL_FLAG = "type=internal\n"

_INTERNAL_OFFSET = "offset="

_RESERVED_HEADER_BYTES = 120

_PAGE_SIZE = 4096

# 4K per page: 4MB - 1000 entries

_NODE_CACHE_SIZE = 1000

class _BuilderRow(object):

"""The stored state accumulated while writing out a row in the index.

:ivar spool: A temporary file used to accumulate nodes for this row

in the tree.

:ivar nodes: The count of nodes emitted so far.

"""

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

skipped_bytes = padding

self.spool.writelines(byte_lines)

remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE

if remainder != 0:

raise AssertionError("incorrect node length: %d, %d"

% (self.spool.tell(), remainder))

self.nodes += 1

self.writer = None

class _InternalBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out internal rows."""

def finish_node(self, pad=True):

if not pad:

raise AssertionError("Must pad internal nodes only.")

_BuilderRow.finish_node(self)

class _LeafBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out a leaf rows."""

100

class BTreeBuilder(index.GraphIndexBuilder):

101

"""A Builder for B+Tree based Graph indices.

102

103

The resulting graph has the structure:

104

105

_SIGNATURE OPTIONS NODES

106

_SIGNATURE := 'B+Tree Graph Index 1' NEWLINE

107

OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH

108

REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE

109

KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE

110

LENGTH := 'len=' DIGITS NEWLINE

111

ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)*

112

NODES := NODE_COMPRESSED*

113

NODE_COMPRESSED:= COMPRESSED_BYTES{4096}

114

NODE_RAW := INTERNAL | LEAF

115

INTERNAL := INTERNAL_FLAG POINTERS

116

LEAF := LEAF_FLAG ROWS

117

KEY_ELEMENT := Not-whitespace-utf8

118

KEY := KEY_ELEMENT (NULL KEY_ELEMENT)*

119

ROWS := ROW*

120

ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

121

ABSENT := 'a'

122

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

123

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

124

REFERENCE := KEY

125

VALUE := no-newline-no-null-bytes

126

"""

127

128

def __init__(self, reference_lists=0, key_elements=1, spill_at=100000):

129

"""See GraphIndexBuilder.__init__.

130

131

:param spill_at: Optional parameter controlling the maximum number

132

of nodes that BTreeBuilder will hold in memory.

133

"""

134

index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,

135

key_elements=key_elements)

136

self._spill_at = spill_at

137

self._backing_indices = []

138

# A map of {key: (node_refs, value)}

139

self._nodes = {}

140

# Indicate it hasn't been built yet

141

self._nodes_by_key = None

142

self._optimize_for_size = False

143

144

def add_node(self, key, value, references=()):

145

"""Add a node to the index.

146

147

If adding the node causes the builder to reach its spill_at threshold,

148

disk spilling will be triggered.

149

150

:param key: The key. keys are non-empty tuples containing

151

as many whitespace-free utf8 bytestrings as the key length

152

defined for this index.

153

:param references: An iterable of iterables of keys. Each is a

154

reference to another key.

155

:param value: The value to associate with the key. It may be any

156

bytes as long as it does not contain \0 or \n.

157

"""

158

# we don't care about absent_references

159

node_refs, _ = self._check_key_ref_value(key, references, value)

160

if key in self._nodes:

161

raise errors.BadIndexDuplicateKey(key, self)

162

self._nodes[key] = (node_refs, value)

163

self._keys.add(key)

164

if self._nodes_by_key is not None and self._key_length > 1:

165

self._update_nodes_by_key(key, value, node_refs)

166

if len(self._keys) < self._spill_at:

167

return

168

self._spill_mem_keys_to_disk()

169

170

def _spill_mem_keys_to_disk(self):

171

"""Write the in memory keys down to disk to cap memory consumption.

172

173

If we already have some keys written to disk, we will combine them so

174

as to preserve the sorted order. The algorithm for combining uses

175

powers of two. So on the first spill, write all mem nodes into a

176

single index. On the second spill, combine the mem nodes with the nodes

177

on disk to create a 2x sized disk index and get rid of the first index.

178

On the third spill, create a single new disk index, which will contain

179

the mem nodes, and preserve the existing 2x sized index. On the fourth,

180

combine mem with the first and second indexes, creating a new one of

181

size 4x. On the fifth create a single new one, etc.

182

"""

183

iterators_to_combine = [self._iter_mem_nodes()]

184

pos = -1

185

for pos, backing in enumerate(self._backing_indices):

186

if backing is None:

187

pos -= 1

188

break

189

iterators_to_combine.append(backing.iter_all_entries())

190

backing_pos = pos + 1

191

new_backing_file, size = \

192

self._write_nodes(self._iter_smallest(iterators_to_combine))

193

dir_path, base_name = osutils.split(new_backing_file.name)

194

# Note: The transport here isn't strictly needed, because we will use

195

# direct access to the new_backing._file object

196

new_backing = BTreeGraphIndex(get_transport(dir_path),

197

base_name, size)

198

# GC will clean up the file

199

new_backing._file = new_backing_file

200

if len(self._backing_indices) == backing_pos:

201

self._backing_indices.append(None)

202

self._backing_indices[backing_pos] = new_backing

203

for pos in range(backing_pos):

204

self._backing_indices[pos] = None

205

self._keys = set()

206

self._nodes = {}

207

self._nodes_by_key = None

208

209

def add_nodes(self, nodes):

210

"""Add nodes to the index.

211

212

:param nodes: An iterable of (key, node_refs, value) entries to add.

213

"""

214

if self.reference_lists:

215

for (key, value, node_refs) in nodes:

216

self.add_node(key, value, node_refs)

217

else:

218

for (key, value) in nodes:

219

self.add_node(key, value)

220

221

def _iter_mem_nodes(self):

222

"""Iterate over the nodes held in memory."""

223

nodes = self._nodes

224

if self.reference_lists:

225

for key in sorted(nodes):

226

references, value = nodes[key]

227

yield self, key, value, references

228

else:

229

for key in sorted(nodes):

230

references, value = nodes[key]

231

yield self, key, value

232

233

def _iter_smallest(self, iterators_to_combine):

234

if len(iterators_to_combine) == 1:

235

for value in iterators_to_combine[0]:

236

yield value

237

return

238

current_values = []

239

for iterator in iterators_to_combine:

240

try:

241

current_values.append(iterator.next())

242

except StopIteration:

243

current_values.append(None)

244

last = None

245

while True:

246

# Decorate candidates with the value to allow 2.4's min to be used.

247

candidates = [(item[1][1], item) for item

248

in enumerate(current_values) if item[1] is not None]

249

if not len(candidates):

250

return

251

selected = min(candidates)

252

# undecorate back to (pos, node)

253

selected = selected[1]

254

if last == selected[1][1]:

255

raise errors.BadIndexDuplicateKey(last, self)

256

last = selected[1][1]

257

# Yield, with self as the index

258

yield (self,) + selected[1][1:]

259

pos = selected[0]

260

try:

261

current_values[pos] = iterators_to_combine[pos].next()

262

except StopIteration:

263

current_values[pos] = None

264

265

def _add_key(self, string_key, line, rows):

266

"""Add a key to the current chunk.

267

268

:param string_key: The key to add.

269

:param line: The fully serialised key and value.

270

"""

271

if rows[-1].writer is None:

272

# opening a new leaf chunk;

273

for pos, internal_row in enumerate(rows[:-1]):

274

# flesh out any internal nodes that are needed to

275

# preserve the height of the tree

276

if internal_row.writer is None:

277

length = _PAGE_SIZE

278

if internal_row.nodes == 0:

279

length -= _RESERVED_HEADER_BYTES # padded

280

internal_row.writer = chunk_writer.ChunkWriter(length, 0,

281

optimize_for_size=self._optimize_for_size)

282

internal_row.writer.write(_INTERNAL_FLAG)

283

internal_row.writer.write(_INTERNAL_OFFSET +

284

str(rows[pos + 1].nodes) + "\n")

285

# add a new leaf

286

length = _PAGE_SIZE

287

if rows[-1].nodes == 0:

288

length -= _RESERVED_HEADER_BYTES # padded

289

rows[-1].writer = chunk_writer.ChunkWriter(length,

290

optimize_for_size=self._optimize_for_size)

291

rows[-1].writer.write(_LEAF_FLAG)

292

if rows[-1].writer.write(line):

293

# this key did not fit in the node:

294

rows[-1].finish_node()

295

key_line = string_key + "\n"

296

new_row = True

297

for row in reversed(rows[:-1]):

298

# Mark the start of the next node in the node above. If it

299

# doesn't fit then propogate upwards until we find one that

300

# it does fit into.

301

if row.writer.write(key_line):

302

row.finish_node()

303

else:

304

# We've found a node that can handle the pointer.

305

new_row = False

306

break

307

# If we reached the current root without being able to mark the

308

# division point, then we need a new root:

309

if new_row:

310

# We need a new row

311

if 'index' in debug.debug_flags:

312

trace.mutter('Inserting new global row.')

313

new_row = _InternalBuilderRow()

314

reserved_bytes = 0

315

rows.insert(0, new_row)

316

# This will be padded, hence the -100

317

new_row.writer = chunk_writer.ChunkWriter(

318

_PAGE_SIZE - _RESERVED_HEADER_BYTES,

319

reserved_bytes,

320

optimize_for_size=self._optimize_for_size)

321

new_row.writer.write(_INTERNAL_FLAG)

322

new_row.writer.write(_INTERNAL_OFFSET +

323

str(rows[1].nodes - 1) + "\n")

324

new_row.writer.write(key_line)

325

self._add_key(string_key, line, rows)

326

327

def _write_nodes(self, node_iterator):

328

"""Write node_iterator out as a B+Tree.

329

330

:param node_iterator: An iterator of sorted nodes. Each node should

331

match the output given by iter_all_entries.

332

:return: A file handle for a temporary file containing a B+Tree for

333

the nodes.

334

"""

335

# The index rows - rows[0] is the root, rows[1] is the layer under it

336

# etc.

337

rows = []

338

# forward sorted by key. In future we may consider topological sorting,

339

# at the cost of table scans for direct lookup, or a second index for

340

# direct lookup

341

key_count = 0

342

# A stack with the number of nodes of each size. 0 is the root node

343

# and must always be 1 (if there are any nodes in the tree).

344

self.row_lengths = []

345

# Loop over all nodes adding them to the bottom row

346

# (rows[-1]). When we finish a chunk in a row,

347

# propogate the key that didn't fit (comes after the chunk) to the

348

# row above, transitively.

349

for node in node_iterator:

350

if key_count == 0:

351

# First key triggers the first row

352

rows.append(_LeafBuilderRow())

353

key_count += 1

354

string_key, line = _btree_serializer._flatten_node(node,

355

self.reference_lists)

356

self._add_key(string_key, line, rows)

357

for row in reversed(rows):

358

pad = (type(row) != _LeafBuilderRow)

359

row.finish_node(pad=pad)

360

result = tempfile.NamedTemporaryFile()

361

lines = [_BTSIGNATURE]

362

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

363

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

364

lines.append(_OPTION_LEN + str(key_count) + '\n')

365

row_lengths = [row.nodes for row in rows]

366

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

367

result.writelines(lines)

368

position = sum(map(len, lines))

369

root_row = True

370

if position > _RESERVED_HEADER_BYTES:

371

raise AssertionError("Could not fit the header in the"

372

" reserved space: %d > %d"

373

% (position, _RESERVED_HEADER_BYTES))

374

# write the rows out:

375

for row in rows:

376

reserved = _RESERVED_HEADER_BYTES # reserved space for first node

377

row.spool.flush()

378

row.spool.seek(0)

379

# copy nodes to the finalised file.

380

# Special case the first node as it may be prefixed

381

node = row.spool.read(_PAGE_SIZE)

382

result.write(node[reserved:])

383

result.write("\x00" * (reserved - position))

384

position = 0 # Only the root row actually has an offset

385

copied_len = osutils.pumpfile(row.spool, result)

386

if copied_len != (row.nodes - 1) * _PAGE_SIZE:

387

if type(row) != _LeafBuilderRow:

388

raise AssertionError("Incorrect amount of data copied"

389

" expected: %d, got: %d"

390

% ((row.nodes - 1) * _PAGE_SIZE,

391

copied_len))

392

result.flush()

393

size = result.tell()

394

result.seek(0)

395

return result, size

396

397

def finish(self):

398

"""Finalise the index.

399

400

:return: A file handle for a temporary file containing the nodes added

401

to the index.

402

"""

403

return self._write_nodes(self.iter_all_entries())[0]

404

405

def iter_all_entries(self):

406

"""Iterate over all keys within the index

407

408

:return: An iterable of (index, key, reference_lists, value). There is no

409

defined order for the result iteration - it will be in the most

410

efficient order for the index (in this case dictionary hash order).

411

"""

412

if 'evil' in debug.debug_flags:

413

trace.mutter_callsite(3,

414

"iter_all_entries scales with size of history.")

415

# Doing serial rather than ordered would be faster; but this shouldn't

416

# be getting called routinely anyway.

417

iterators = [self._iter_mem_nodes()]

418

for backing in self._backing_indices:

419

if backing is not None:

420

iterators.append(backing.iter_all_entries())

421

if len(iterators) == 1:

422

return iterators[0]

423

return self._iter_smallest(iterators)

424

425

def iter_entries(self, keys):

426

"""Iterate over keys within the index.

427

428

:param keys: An iterable providing the keys to be retrieved.

429

:return: An iterable of (index, key, value, reference_lists). There is no

430

defined order for the result iteration - it will be in the most

431

efficient order for the index (keys iteration order in this case).

432

"""

433

keys = set(keys)

434

local_keys = keys.intersection(self._keys)

435

if self.reference_lists:

436

for key in local_keys:

437

node = self._nodes[key]

438

yield self, key, node[1], node[0]

439

else:

440

for key in local_keys:

441

node = self._nodes[key]

442

yield self, key, node[1]

443

# Find things that are in backing indices that have not been handled

444

# yet.

445

if not self._backing_indices:

446

return # We won't find anything there either

447

# Remove all of the keys that we found locally

448

keys.difference_update(local_keys)

449

for backing in self._backing_indices:

450

if backing is None:

451

continue

452

if not keys:

453

return

454

for node in backing.iter_entries(keys):

455

keys.remove(node[1])

456

yield (self,) + node[1:]

457

458

def iter_entries_prefix(self, keys):

459

"""Iterate over keys within the index using prefix matching.

460

461

Prefix matching is applied within the tuple of a key, not to within

462

the bytestring of each key element. e.g. if you have the keys ('foo',

463

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

464

only the former key is returned.

465

466

:param keys: An iterable providing the key prefixes to be retrieved.

467

Each key prefix takes the form of a tuple the length of a key, but

468

with the last N elements 'None' rather than a regular bytestring.

469

The first element cannot be 'None'.

470

:return: An iterable as per iter_all_entries, but restricted to the

471

keys with a matching prefix to those supplied. No additional keys

472

will be returned, and every match that is in the index will be

473

returned.

474

"""

475

# XXX: To much duplication with the GraphIndex class; consider finding

476

# a good place to pull out the actual common logic.

477

keys = set(keys)

478

if not keys:

479

return

480

for backing in self._backing_indices:

481

if backing is None:

482

continue

483

for node in backing.iter_entries_prefix(keys):

484

yield (self,) + node[1:]

485

if self._key_length == 1:

486

for key in keys:

487

# sanity check

488

if key[0] is None:

489

raise errors.BadIndexKey(key)

490

if len(key) != self._key_length:

491

raise errors.BadIndexKey(key)

492

try:

493

node = self._nodes[key]

494

except KeyError:

495

continue

496

if self.reference_lists:

497

yield self, key, node[1], node[0]

498

else:

499

yield self, key, node[1]

500

return

501

for key in keys:

502

# sanity check

503

if key[0] is None:

504

raise errors.BadIndexKey(key)

505

if len(key) != self._key_length:

506

raise errors.BadIndexKey(key)

507

# find what it refers to:

508

key_dict = self._get_nodes_by_key()

509

elements = list(key)

510

# find the subdict to return

511

try:

512

while len(elements) and elements[0] is not None:

513

key_dict = key_dict[elements[0]]

514

elements.pop(0)

515

except KeyError:

516

# a non-existant lookup.

517

continue

518

if len(elements):

519

dicts = [key_dict]

520

while dicts:

521

key_dict = dicts.pop(-1)

522

# can't be empty or would not exist

523

item, value = key_dict.iteritems().next()

524

if type(value) == dict:

525

# push keys

526

dicts.extend(key_dict.itervalues())

527

else:

528

# yield keys

529

for value in key_dict.itervalues():

530

yield (self, ) + value

531

else:

532

yield (self, ) + key_dict

533

534

def _get_nodes_by_key(self):

535

if self._nodes_by_key is None:

536

nodes_by_key = {}

537

if self.reference_lists:

538

for key, (references, value) in self._nodes.iteritems():

539

key_dict = nodes_by_key

540

for subkey in key[:-1]:

541

key_dict = key_dict.setdefault(subkey, {})

542

key_dict[key[-1]] = key, value, references

543

else:

544

for key, (references, value) in self._nodes.iteritems():

545

key_dict = nodes_by_key

546

for subkey in key[:-1]:

547

key_dict = key_dict.setdefault(subkey, {})

548

key_dict[key[-1]] = key, value

549

self._nodes_by_key = nodes_by_key

550

return self._nodes_by_key

551

552

def key_count(self):

553

"""Return an estimate of the number of keys in this index.

554

555

For InMemoryGraphIndex the estimate is exact.

556

"""

557

return len(self._keys) + sum(backing.key_count() for backing in

558

self._backing_indices if backing is not None)

559

560

def validate(self):

561

"""In memory index's have no known corruption at the moment."""

562

563

564

class _LeafNode(object):

565

"""A leaf node for a serialised B+Tree index."""

566

567

def __init__(self, bytes, key_length, ref_list_length):

568

"""Parse bytes to create a leaf node object."""

569

# splitlines mangles the \r delimiters.. don't use it.

570

self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,

571

key_length, ref_list_length))

572

573

574

class _InternalNode(object):

575

"""An internal node for a serialised B+Tree index."""

576

577

def __init__(self, bytes):

578

"""Parse bytes to create an internal node object."""

579

# splitlines mangles the \r delimiters.. don't use it.

580

self.keys = self._parse_lines(bytes.split('\n'))

581

582

def _parse_lines(self, lines):

583

nodes = []

584

self.offset = int(lines[1][7:])

585

for line in lines[2:]:

586

if line == '':

587

break

588

nodes.append(tuple(line.split('\0')))

589

return nodes

590

591

592

class BTreeGraphIndex(object):

593

"""Access to nodes via the standard GraphIndex interface for B+Tree's.

594

595

Individual nodes are held in a LRU cache. This holds the root node in

596

memory except when very large walks are done.

597

"""

598

599

def __init__(self, transport, name, size):

600

"""Create a B+Tree index object on the index name.

601

602

:param transport: The transport to read data for the index from.

603

:param name: The file name of the index on transport.

604

:param size: Optional size of the index in bytes. This allows

605

compatibility with the GraphIndex API, as well as ensuring that

606

the initial read (to read the root node header) can be done

607

without over-reading even on empty indices, and on small indices

608

allows single-IO to read the entire index.

609

"""

610

self._transport = transport

611

self._name = name

612

self._size = size

613

self._file = None

614

self._recommended_pages = self._compute_recommended_pages()

615

self._root_node = None

616

# Default max size is 100,000 leave values

617

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

618

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

619

self._internal_node_cache = lru_cache.LRUCache()

620

self._key_count = None

621

self._row_lengths = None

622

self._row_offsets = None # Start of each row, [-1] is the end

623

624

def __eq__(self, other):

625

"""Equal when self and other were created with the same parameters."""

626

return (

627

type(self) == type(other) and

628

self._transport == other._transport and

629

self._name == other._name and

630

self._size == other._size)

631

632

def __ne__(self, other):

633

return not self.__eq__(other)

634

635

def _get_and_cache_nodes(self, nodes):

636

"""Read nodes and cache them in the lru.

637

638

The nodes list supplied is sorted and then read from disk, each node

639

being inserted it into the _node_cache.

640

641

Note: Asking for more nodes than the _node_cache can contain will

642

result in some of the results being immediately discarded, to prevent

643

this an assertion is raised if more nodes are asked for than are

644

cachable.

645

646

:return: A dict of {node_pos: node}

647

"""

648

found = {}

649

start_of_leaves = None

650

for node_pos, node in self._read_nodes(sorted(nodes)):

651

if node_pos == 0: # Special case

652

self._root_node = node

653

else:

654

if start_of_leaves is None:

655

start_of_leaves = self._row_offsets[-2]

656

if node_pos < start_of_leaves:

657

self._internal_node_cache.add(node_pos, node)

658

else:

659

self._leaf_node_cache.add(node_pos, node)

660

found[node_pos] = node

661

return found

662

663

def _compute_recommended_pages(self):

664

"""Convert transport's recommended_page_size into btree pages.

665

666

recommended_page_size is in bytes, we want to know how many _PAGE_SIZE

667

pages fit in that length.

668

"""

669

recommended_read = self._transport.recommended_page_size()

670

recommended_pages = int(math.ceil(recommended_read /

671

float(_PAGE_SIZE)))

672

return recommended_pages

673

674

def _compute_total_pages_in_index(self):

675

"""How many pages are in the index.

676

677

If we have read the header we will use the value stored there.

678

Otherwise it will be computed based on the length of the index.

679

"""

680

if self._size is None:

681

raise AssertionError('_compute_total_pages_in_index should not be'

682

' called when self._size is None')

683

if self._root_node is not None:

684

# This is the number of pages as defined by the header

685

return self._row_offsets[-1]

686

# This is the number of pages as defined by the size of the index. They

687

# should be indentical.

688

total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))

689

return total_pages

690

691

def _expand_offsets(self, offsets):

692

"""Find extra pages to download.

693

694

The idea is that we always want to make big-enough requests (like 64kB

695

for http), so that we don't waste round trips. So given the entries

696

that we already have cached and the new pages being downloaded figure

697

out what other pages we might want to read.

698

699

See also doc/developers/btree_index_prefetch.txt for more details.

700

701

:param offsets: The offsets to be read

702

:return: A list of offsets to download

703

"""

704

if 'index' in debug.debug_flags:

705

trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)

706

707

if len(offsets) >= self._recommended_pages:

708

# Don't add more, we are already requesting more than enough

709

if 'index' in debug.debug_flags:

710

trace.mutter(' not expanding large request (%s >= %s)',

711

len(offsets), self._recommended_pages)

712

return offsets

713

if self._size is None:

714

# Don't try anything, because we don't know where the file ends

715

if 'index' in debug.debug_flags:

716

trace.mutter(' not expanding without knowing index size')

717

return offsets

718

total_pages = self._compute_total_pages_in_index()

719

cached_offsets = self._get_offsets_to_cached_pages()

720

# If reading recommended_pages would read the rest of the index, just

721

# do so.

722

if total_pages - len(cached_offsets) <= self._recommended_pages:

723

# Read whatever is left

724

if cached_offsets:

725

expanded = [x for x in xrange(total_pages)

726

if x not in cached_offsets]

727

else:

728

expanded = range(total_pages)

729

if 'index' in debug.debug_flags:

730

trace.mutter(' reading all unread pages: %s', expanded)

731

return expanded

732

733

if self._root_node is None:

734

# ATM on the first read of the root node of a large index, we don't

735

# bother pre-reading any other pages. This is because the

736

# likelyhood of actually reading interesting pages is very low.

737

# See doc/developers/btree_index_prefetch.txt for a discussion, and

738

# a possible implementation when we are guessing that the second

739

# layer index is small

740

final_offsets = offsets

741

else:

742

tree_depth = len(self._row_lengths)

743

if len(cached_offsets) < tree_depth and len(offsets) == 1:

744

# We haven't read enough to justify expansion

745

# If we are only going to read the root node, and 1 leaf node,

746

# then it isn't worth expanding our request. Once we've read at

747

# least 2 nodes, then we are probably doing a search, and we

748

# start expanding our requests.

749

if 'index' in debug.debug_flags:

750

trace.mutter(' not expanding on first reads')

751

return offsets

752

final_offsets = self._expand_to_neighbors(offsets, cached_offsets,

753

total_pages)

754

755

final_offsets = sorted(final_offsets)

756

if 'index' in debug.debug_flags:

757

trace.mutter('expanded: %s', final_offsets)

758

return final_offsets

759

760

def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):

761

"""Expand requests to neighbors until we have enough pages.

762

763

This is called from _expand_offsets after policy has determined that we

764

want to expand.

765

We only want to expand requests within a given layer. We cheat a little

766

bit and assume all requests will be in the same layer. This is true

767

given the current design, but if it changes this algorithm may perform

768

oddly.

769

770

:param offsets: requested offsets

771

:param cached_offsets: offsets for pages we currently have cached

772

:return: A set() of offsets after expansion

773

"""

774

final_offsets = set(offsets)

775

first = end = None

776

new_tips = set(final_offsets)

777

while len(final_offsets) < self._recommended_pages and new_tips:

778

next_tips = set()

779

for pos in new_tips:

780

if first is None:

781

first, end = self._find_layer_first_and_end(pos)

782

previous = pos - 1

783

if (previous > 0

784

and previous not in cached_offsets

785

and previous not in final_offsets

786

and previous >= first):

787

next_tips.add(previous)

788

after = pos + 1

789

if (after < total_pages

790

and after not in cached_offsets

791

and after not in final_offsets

792

and after < end):

793

next_tips.add(after)

794

# This would keep us from going bigger than

795

# recommended_pages by only expanding the first offsets.

796

# However, if we are making a 'wide' request, it is

797

# reasonable to expand all points equally.

798

# if len(final_offsets) > recommended_pages:

799

# break

800

final_offsets.update(next_tips)

801

new_tips = next_tips

802

return final_offsets

803

804

def _find_layer_first_and_end(self, offset):

805

"""Find the start/stop nodes for the layer corresponding to offset.

806

807

:return: (first, end)

808

first is the first node in this layer

809

end is the first node of the next layer

810

"""

811

first = end = 0

812

for roffset in self._row_offsets:

813

first = end

814

end = roffset

815

if offset < roffset:

816

break

817

return first, end

818

819

def _get_offsets_to_cached_pages(self):

820

"""Determine what nodes we already have cached."""

821

cached_offsets = set(self._internal_node_cache.keys())

822

cached_offsets.update(self._leaf_node_cache.keys())

823

if self._root_node is not None:

824

cached_offsets.add(0)

825

return cached_offsets

826

827

def _get_root_node(self):

828

if self._root_node is None:

829

# We may not have a root node yet

830

self._get_internal_nodes([0])

831

return self._root_node

832

833

def _get_nodes(self, cache, node_indexes):

834

found = {}

835

needed = []

836

for idx in node_indexes:

837

if idx == 0 and self._root_node is not None:

838

found[0] = self._root_node

839

continue

840

try:

841

found[idx] = cache[idx]

842

except KeyError:

843

needed.append(idx)

844

if not needed:

845

return found

846

needed = self._expand_offsets(needed)

847

found.update(self._get_and_cache_nodes(needed))

848

return found

849

850

def _get_internal_nodes(self, node_indexes):

851

"""Get a node, from cache or disk.

852

853

After getting it, the node will be cached.

854

"""

855

return self._get_nodes(self._internal_node_cache, node_indexes)

856

857

def _cache_leaf_values(self, nodes):

858

"""Cache directly from key => value, skipping the btree."""

859

if self._leaf_value_cache is not None:

860

for node in nodes.itervalues():

861

for key, value in node.keys.iteritems():

862

if key in self._leaf_value_cache:

863

# Don't add the rest of the keys, we've seen this node

864

# before.

865

break

866

self._leaf_value_cache[key] = value

867

868

def _get_leaf_nodes(self, node_indexes):

869

"""Get a bunch of nodes, from cache or disk."""

870

found = self._get_nodes(self._leaf_node_cache, node_indexes)

871

self._cache_leaf_values(found)

872

return found

873

874

def iter_all_entries(self):

875

"""Iterate over all keys within the index.

876

877

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

878

The former tuple is used when there are no reference lists in the

879

index, making the API compatible with simple key:value index types.

880

There is no defined order for the result iteration - it will be in

881

the most efficient order for the index.

882

"""

883

if 'evil' in debug.debug_flags:

884

trace.mutter_callsite(3,

885

"iter_all_entries scales with size of history.")

886

if not self.key_count():

887

return

888

if self._row_offsets[-1] == 1:

889

# There is only the root node, and we read that via key_count()

890

if self.node_ref_lists:

891

for key, (value, refs) in sorted(self._root_node.keys.items()):

892

yield (self, key, value, refs)

893

else:

894

for key, (value, refs) in sorted(self._root_node.keys.items()):

895

yield (self, key, value)

896

return

897

start_of_leaves = self._row_offsets[-2]

898

end_of_leaves = self._row_offsets[-1]

899

needed_offsets = range(start_of_leaves, end_of_leaves)

900

if needed_offsets == [0]:

901

# Special case when we only have a root node, as we have already

902

# read everything

903

nodes = [(0, self._root_node)]

904

else:

905

nodes = self._read_nodes(needed_offsets)

906

# We iterate strictly in-order so that we can use this function

907

# for spilling index builds to disk.

908

if self.node_ref_lists:

909

for _, node in nodes:

910

for key, (value, refs) in sorted(node.keys.items()):

911

yield (self, key, value, refs)

912

else:

913

for _, node in nodes:

914

for key, (value, refs) in sorted(node.keys.items()):

915

yield (self, key, value)

916

917

@staticmethod

918

def _multi_bisect_right(in_keys, fixed_keys):

919

"""Find the positions where each 'in_key' would fit in fixed_keys.

920

921

This is equivalent to doing "bisect_right" on each in_key into

922

fixed_keys

923

924

:param in_keys: A sorted list of keys to match with fixed_keys

925

:param fixed_keys: A sorted list of keys to match against

926

:return: A list of (integer position, [key list]) tuples.

927

"""

928

if not in_keys:

929

return []

930

if not fixed_keys:

931

# no pointers in the fixed_keys list, which means everything must

932

# fall to the left.

933

return [(0, in_keys)]

934

935

# TODO: Iterating both lists will generally take M + N steps

936

# Bisecting each key will generally take M * log2 N steps.

937

# If we had an efficient way to compare, we could pick the method

938

# based on which has the fewer number of steps.

939

# There is also the argument that bisect_right is a compiled

940

# function, so there is even more to be gained.

941

# iter_steps = len(in_keys) + len(fixed_keys)

942

# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)

943

if len(in_keys) == 1: # Bisect will always be faster for M = 1

944

return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]

945

# elif bisect_steps < iter_steps:

946

# offsets = {}

947

# for key in in_keys:

948

# offsets.setdefault(bisect_right(fixed_keys, key),

949

# []).append(key)

950

# return [(o, offsets[o]) for o in sorted(offsets)]

951

in_keys_iter = iter(in_keys)

952

fixed_keys_iter = enumerate(fixed_keys)

953

cur_in_key = in_keys_iter.next()

954

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

955

956

class InputDone(Exception): pass

957

class FixedDone(Exception): pass

958

959

output = []

960

cur_out = []

961

962

# TODO: Another possibility is that rather than iterating on each side,

963

# we could use a combination of bisecting and iterating. For

964

# example, while cur_in_key < fixed_key, bisect to find its

965

# point, then iterate all matching keys, then bisect (restricted

966

# to only the remainder) for the next one, etc.

967

try:

968

while True:

969

if cur_in_key < cur_fixed_key:

970

cur_keys = []

971

cur_out = (cur_fixed_offset, cur_keys)

972

output.append(cur_out)

973

while cur_in_key < cur_fixed_key:

974

cur_keys.append(cur_in_key)

975

try:

976

cur_in_key = in_keys_iter.next()

977

except StopIteration:

978

raise InputDone

979

# At this point cur_in_key must be >= cur_fixed_key

980

# step the cur_fixed_key until we pass the cur key, or walk off

981

# the end

982

while cur_in_key >= cur_fixed_key:

983

try:

984

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

985

except StopIteration:

986

raise FixedDone

987

except InputDone:

988

# We consumed all of the input, nothing more to do

989

pass

990

except FixedDone:

991

# There was some input left, but we consumed all of fixed, so we

992

# have to add one more for the tail

993

cur_keys = [cur_in_key]

994

cur_keys.extend(in_keys_iter)

995

cur_out = (len(fixed_keys), cur_keys)

996

output.append(cur_out)

997

return output

998

999

def iter_entries(self, keys):

1000

"""Iterate over keys within the index.

1001

1002

:param keys: An iterable providing the keys to be retrieved.

1003

:return: An iterable as per iter_all_entries, but restricted to the

1004

keys supplied. No additional keys will be returned, and every

1005

key supplied that is in the index will be returned.

1006

"""

1007

# 6 seconds spent in miss_torture using the sorted() line.

1008

# Even with out of order disk IO it seems faster not to sort it when

1009

# large queries are being made.

1010

# However, now that we are doing multi-way bisecting, we need the keys

1011

# in sorted order anyway. We could change the multi-way code to not

1012

# require sorted order. (For example, it bisects for the first node,

1013

# does an in-order search until a key comes before the current point,

1014

# which it then bisects for, etc.)

1015

keys = frozenset(keys)

1016

if not keys:

1017

return

1018

1019

if not self.key_count():

1020

return

1021

1022

needed_keys = []

1023

if self._leaf_value_cache is None:

1024

needed_keys = keys

1025

else:

1026

for key in keys:

1027

value = self._leaf_value_cache.get(key, None)

1028

if value is not None:

1029

# This key is known not to be here, skip it

1030

value, refs = value

1031

if self.node_ref_lists:

1032

yield (self, key, value, refs)

1033

else:

1034

yield (self, key, value)

1035

else:

1036

needed_keys.append(key)

1037

1038

last_key = None

1039

needed_keys = keys

1040

if not needed_keys:

1041

return

1042

# 6 seconds spent in miss_torture using the sorted() line.

1043

# Even with out of order disk IO it seems faster not to sort it when

1044

# large queries are being made.

1045

needed_keys = sorted(needed_keys)

1046

1047

nodes_and_keys = [(0, needed_keys)]

1048

1049

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

1050

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1051

nodes = self._get_internal_nodes(node_indexes)

1052

1053

next_nodes_and_keys = []

1054

for node_index, sub_keys in nodes_and_keys:

1055

node = nodes[node_index]

1056

positions = self._multi_bisect_right(sub_keys, node.keys)

1057

node_offset = next_row_start + node.offset

1058

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

1059

for pos, s_keys in positions])

1060

nodes_and_keys = next_nodes_and_keys

1061

# We should now be at the _LeafNodes

1062

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1063

1064

# TODO: We may *not* want to always read all the nodes in one

1065

# big go. Consider setting a max size on this.

1066

1067

nodes = self._get_leaf_nodes(node_indexes)

1068

for node_index, sub_keys in nodes_and_keys:

1069

if not sub_keys:

1070

continue

1071

node = nodes[node_index]

1072

for next_sub_key in sub_keys:

1073

if next_sub_key in node.keys:

1074

value, refs = node.keys[next_sub_key]

1075

if self.node_ref_lists:

1076

yield (self, next_sub_key, value, refs)

1077

else:

1078

yield (self, next_sub_key, value)

1079

1080

def iter_entries_prefix(self, keys):

1081

"""Iterate over keys within the index using prefix matching.

1082

1083

Prefix matching is applied within the tuple of a key, not to within

1084

the bytestring of each key element. e.g. if you have the keys ('foo',

1085

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1086

only the former key is returned.

1087

1088

WARNING: Note that this method currently causes a full index parse

1089

unconditionally (which is reasonably appropriate as it is a means for

1090

thunking many small indices into one larger one and still supplies

1091

iter_all_entries at the thunk layer).

1092

1093

:param keys: An iterable providing the key prefixes to be retrieved.

1094

Each key prefix takes the form of a tuple the length of a key, but

1095

with the last N elements 'None' rather than a regular bytestring.

1096

The first element cannot be 'None'.

1097

:return: An iterable as per iter_all_entries, but restricted to the

1098

keys with a matching prefix to those supplied. No additional keys

1099

will be returned, and every match that is in the index will be

1100

returned.

1101

"""

1102

keys = sorted(set(keys))

1103

if not keys:

1104

return

1105

# Load if needed to check key lengths

1106

if self._key_count is None:

1107

self._get_root_node()

1108

# TODO: only access nodes that can satisfy the prefixes we are looking

1109

# for. For now, to meet API usage (as this function is not used by

1110

# current bzrlib) just suck the entire index and iterate in memory.

1111

nodes = {}

1112

if self.node_ref_lists:

1113

if self._key_length == 1:

1114

for _1, key, value, refs in self.iter_all_entries():

1115

nodes[key] = value, refs

1116

else:

1117

nodes_by_key = {}

1118

for _1, key, value, refs in self.iter_all_entries():

1119

key_value = key, value, refs

1120

# For a key of (foo, bar, baz) create

1121

# _nodes_by_key[foo][bar][baz] = key_value

1122

key_dict = nodes_by_key

1123

for subkey in key[:-1]:

1124

key_dict = key_dict.setdefault(subkey, {})

1125

key_dict[key[-1]] = key_value

1126

else:

1127

if self._key_length == 1:

1128

for _1, key, value in self.iter_all_entries():

1129

nodes[key] = value

1130

else:

1131

nodes_by_key = {}

1132

for _1, key, value in self.iter_all_entries():

1133

key_value = key, value

1134

# For a key of (foo, bar, baz) create

1135

# _nodes_by_key[foo][bar][baz] = key_value

1136

key_dict = nodes_by_key

1137

for subkey in key[:-1]:

1138

key_dict = key_dict.setdefault(subkey, {})

1139

key_dict[key[-1]] = key_value

1140

if self._key_length == 1:

1141

for key in keys:

1142

# sanity check

1143

if key[0] is None:

1144

raise errors.BadIndexKey(key)

1145

if len(key) != self._key_length:

1146

raise errors.BadIndexKey(key)

1147

try:

1148

if self.node_ref_lists:

1149

value, node_refs = nodes[key]

1150

yield self, key, value, node_refs

1151

else:

1152

yield self, key, nodes[key]

1153

except KeyError:

1154

pass

1155

return

1156

for key in keys:

1157

# sanity check

1158

if key[0] is None:

1159

raise errors.BadIndexKey(key)

1160

if len(key) != self._key_length:

1161

raise errors.BadIndexKey(key)

1162

# find what it refers to:

1163

key_dict = nodes_by_key

1164

elements = list(key)

1165

# find the subdict whose contents should be returned.

1166

try:

1167

while len(elements) and elements[0] is not None:

1168

key_dict = key_dict[elements[0]]

1169

elements.pop(0)

1170

except KeyError:

1171

# a non-existant lookup.

1172

continue

1173

if len(elements):

1174

dicts = [key_dict]

1175

while dicts:

1176

key_dict = dicts.pop(-1)

1177

# can't be empty or would not exist

1178

item, value = key_dict.iteritems().next()

1179

if type(value) == dict:

1180

# push keys

1181

dicts.extend(key_dict.itervalues())

1182

else:

1183

# yield keys

1184

for value in key_dict.itervalues():

1185

# each value is the key:value:node refs tuple

1186

# ready to yield.

1187

yield (self, ) + value

1188

else:

1189

# the last thing looked up was a terminal element

1190

yield (self, ) + key_dict

1191

1192

def key_count(self):

1193

"""Return an estimate of the number of keys in this index.

1194

1195

For BTreeGraphIndex the estimate is exact as it is contained in the

1196

header.

1197

"""

1198

if self._key_count is None:

1199

self._get_root_node()

1200

return self._key_count

1201

1202

def _compute_row_offsets(self):

1203

"""Fill out the _row_offsets attribute based on _row_lengths."""

1204

offsets = []

1205

row_offset = 0

1206

for row in self._row_lengths:

1207

offsets.append(row_offset)

1208

row_offset += row

1209

offsets.append(row_offset)

1210

self._row_offsets = offsets

1211

1212

def _parse_header_from_bytes(self, bytes):

1213

"""Parse the header from a region of bytes.

1214

1215

:param bytes: The data to parse.

1216

:return: An offset, data tuple such as readv yields, for the unparsed

1217

data. (which may be of length 0).

1218

"""

1219

signature = bytes[0:len(self._signature())]

1220

if not signature == self._signature():

1221

raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)

1222

lines = bytes[len(self._signature()):].splitlines()

1223

options_line = lines[0]

1224

if not options_line.startswith(_OPTION_NODE_REFS):

1225

raise errors.BadIndexOptions(self)

1226

try:

1227

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

1228

except ValueError:

1229

raise errors.BadIndexOptions(self)

1230

options_line = lines[1]

1231

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

1232

raise errors.BadIndexOptions(self)

1233

try:

1234

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

1235

except ValueError:

1236

raise errors.BadIndexOptions(self)

1237

options_line = lines[2]

1238

if not options_line.startswith(_OPTION_LEN):

1239

raise errors.BadIndexOptions(self)

1240

try:

1241

self._key_count = int(options_line[len(_OPTION_LEN):])

1242

except ValueError:

1243

raise errors.BadIndexOptions(self)

1244

options_line = lines[3]

1245

if not options_line.startswith(_OPTION_ROW_LENGTHS):

1246

raise errors.BadIndexOptions(self)

1247

try:

1248

self._row_lengths = map(int, [length for length in

1249

options_line[len(_OPTION_ROW_LENGTHS):].split(',')

1250

if len(length)])

1251

except ValueError:

1252

raise errors.BadIndexOptions(self)

1253

self._compute_row_offsets()

1254

1255

# calculate the bytes we have processed

1256

header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)

1257

return header_end, bytes[header_end:]

1258

1259

def _read_nodes(self, nodes):

1260

"""Read some nodes from disk into the LRU cache.

1261

1262

This performs a readv to get the node data into memory, and parses each

1263

node, then yields it to the caller. The nodes are requested in the

1264

supplied order. If possible doing sort() on the list before requesting

1265

a read may improve performance.

1266

1267

:param nodes: The nodes to read. 0 - first node, 1 - second node etc.

1268

:return: None

1269

"""

1270

# may be the byte string of the whole file

1271

bytes = None

1272

# list of (offset, length) regions of the file that should, evenually

1273

# be read in to data_ranges, either from 'bytes' or from the transport

1274

ranges = []

1275

for index in nodes:

1276

offset = index * _PAGE_SIZE

1277

size = _PAGE_SIZE

1278

if index == 0:

1279

# Root node - special case

1280

if self._size:

1281

size = min(_PAGE_SIZE, self._size)

1282

else:

1283

# The only case where we don't know the size, is for very

1284

# small indexes. So we read the whole thing

1285

bytes = self._transport.get_bytes(self._name)

1286

self._size = len(bytes)

1287

# the whole thing should be parsed out of 'bytes'

1288

ranges.append((0, len(bytes)))

1289

break

1290

else:

1291

if offset > self._size:

1292

raise AssertionError('tried to read past the end'

1293

' of the file %s > %s'

1294

% (offset, self._size))

1295

size = min(size, self._size - offset)

1296

ranges.append((offset, size))

1297

if not ranges:

1298

return

1299

elif bytes is not None:

1300

# already have the whole file

1301

data_ranges = [(start, bytes[start:start+_PAGE_SIZE])

1302

for start in xrange(0, len(bytes), _PAGE_SIZE)]

1303

elif self._file is None:

1304

data_ranges = self._transport.readv(self._name, ranges)

1305

else:

1306

data_ranges = []

1307

for offset, size in ranges:

1308

self._file.seek(offset)

1309

data_ranges.append((offset, self._file.read(size)))

1310

for offset, data in data_ranges:

1311

if offset == 0:

1312

# extract the header

1313

offset, data = self._parse_header_from_bytes(data)

1314

if len(data) == 0:

1315

continue

1316

bytes = zlib.decompress(data)

1317

if bytes.startswith(_LEAF_FLAG):

1318

node = _LeafNode(bytes, self._key_length, self.node_ref_lists)

1319

elif bytes.startswith(_INTERNAL_FLAG):

1320

node = _InternalNode(bytes)

1321

else:

1322

raise AssertionError("Unknown node type for %r" % bytes)

1323

yield offset / _PAGE_SIZE, node

1324

1325

def _signature(self):

1326

"""The file signature for this index type."""

1327

return _BTSIGNATURE

1328

1329

def validate(self):

1330

"""Validate that everything in the index can be accessed."""

1331

# just read and parse every node.

1332

self._get_root_node()

1333

if len(self._row_lengths) > 1:

1334

start_node = self._row_offsets[1]

1335

else:

1336

# We shouldn't be reading anything anyway

1337

start_node = 1

1338

node_end = self._row_offsets[-1]

1339

for node in self._read_nodes(range(start_node, node_end)):

1340

pass

1341

1342

1343

try:

1344

from bzrlib import _btree_serializer_c as _btree_serializer

1345

except ImportError:

1346

from bzrlib import _btree_serializer_py as _btree_serializer

Older »