~bzr-pqm/bzr/bzr.dev

Committer: John Arbash Meinel
Date: 2009-06-04 17:12:29 UTC
mto: This revision was merged to the branch mainline in revision 4410.
Revision ID: john@arbash-meinel.com-20090604171229-kbgfatt63y3u3uh1

Some small tweaks to decoding strings (avoid passing over the length 2x)

Down to 1.1s (from 1.4s) for decoding all of bzr.dev.
Also, favor decoding strings and then lists in _decode_object, since that is the
frequency we have those types inside Revisions.

files added:
BRANCH.TODO

COPYING.txt

bzr.ico

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_tags.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bencode.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/inventory_delta.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store/revision

bzrlib/store/versioned

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_file_with_stat.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transform.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/urlutils.py

bzrlib/util/_bencode_py.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave_commands.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
NEWS.developers

build-api

bzrlib/clone.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/revfile.py

bzrlib/store/compressed_text.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

testbzr

files renamed:
bzrlib/changeset.py => bzrlib/bundle/__init__.py

bzrlib/graph.py => bzrlib/deprecated_graph.py

bzrlib/store/weave.py => bzrlib/store/versioned/__init__.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/testannotate.py => bzrlib/tests/blackbox/test_annotate.py

bzrlib/selftest/test_revision_info.py => bzrlib/tests/blackbox/test_revision_info.py

bzrlib/selftest/teststatus.py => bzrlib/tests/blackbox/test_status.py

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/branch_implementations/test_branch.py

bzrlib/selftest/test_parent.py => bzrlib/tests/branch_implementations/test_parent.py

bzrlib/selftest/HTTPTestUtil.py => bzrlib/tests/http_utils.py

bzrlib/selftest/testrevprops.py => bzrlib/tests/per_repository/test_revision.py

bzrlib/selftest/testconfig.py => bzrlib/tests/test_config.py

bzrlib/selftest/testgraph.py => bzrlib/tests/test_deprecated_graph.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testfetch.py => bzrlib/tests/test_fetch.py

bzrlib/selftest/testgpg.py => bzrlib/tests/test_gpg.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testhttp.py => bzrlib/tests/test_http.py

bzrlib/selftest/testidentitymap.py => bzrlib/tests/test_identitymap.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge.py => bzrlib/tests/test_merge.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/testnonascii.py => bzrlib/tests/test_nonascii.py

bzrlib/selftest/testoptions.py => bzrlib/tests/test_options.py

bzrlib/selftest/testplugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionspec.py

bzrlib/selftest/testsampler.py => bzrlib/tests/test_sampler.py

bzrlib/selftest/teststore.py => bzrlib/tests/test_store.py

bzrlib/selftest/testtestament.py => bzrlib/tests/test_testament.py

bzrlib/selftest/testtransactions.py => bzrlib/tests/test_transactions.py

bzrlib/selftest/testtransport.py => bzrlib/tests/test_transport.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

bzrlib/selftest/testworkingtree.py => bzrlib/tests/test_workingtree.py

bzrlib/transport/http.py => bzrlib/transport/http/__init__.py

bzrlib/ui.py => bzrlib/ui/__init__.py

bzrlib/xml5.py => bzrlib/xml8.py

bzrlib/xml.py => bzrlib/xml_serializer.py

HACKING => doc/en/developer-guide/HACKING.txt

tutorial.txt => doc/en/tutorials/tutorial.txt

bzr-man.py => tools/doc_generate/autodoc_man.py

files modified:
.bzrignore

.rsyncexclude

INSTALL

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/testament.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transport/__init__.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

contrib/bash/bzr.simple

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/zsh/_bzr

setup.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/chk_map.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Persistent maps from tuple_of_strings->string using CHK stores.

Overview and current status:

The CHKMap class implements a dict from tuple_of_strings->string by using a trie

with internal nodes of 8-bit fan out; The key tuples are mapped to strings by

joining them by \x00, and \x00 padding shorter keys out to the length of the

longest key. Leaf nodes are packed as densely as possible, and internal nodes

are all an additional 8-bits wide leading to a sparse upper tree.

Updates to a CHKMap are done preferentially via the apply_delta method, to

allow optimisation of the update operation; but individual map/unmap calls are

possible and supported. All changes via map/unmap are buffered in memory until

the _save method is called to force serialisation of the tree. apply_delta

performs a _save implicitly.

TODO:

-----

Densely packed upper nodes.

"""

import heapq

import time

from bzrlib import lazy_import

lazy_import.lazy_import(globals(), """

from bzrlib import versionedfile

""")

from bzrlib import (

errors,

lru_cache,

osutils,

registry,

trace,

)

# approx 4MB

# If each line is 50 bytes, and you have 255 internal pages, with 255-way fan

# out, it takes 3.1MB to cache the layer.

_PAGE_CACHE_SIZE = 4*1024*1024

# We are caching bytes so len(value) is perfectly accurate

_page_cache = lru_cache.LRUSizeCache(_PAGE_CACHE_SIZE)

# If a ChildNode falls below this many bytes, we check for a remap

_INTERESTING_NEW_SIZE = 50

# If a ChildNode shrinks by more than this amount, we check for a remap

_INTERESTING_SHRINKAGE_LIMIT = 20

# If we delete more than this many nodes applying a delta, we check for a remap

_INTERESTING_DELETES_LIMIT = 5

def _search_key_plain(key):

"""Map the key tuple into a search string that just uses the key bytes."""

return '\x00'.join(key)

search_key_registry = registry.Registry()

search_key_registry.register('plain', _search_key_plain)

class CHKMap(object):

"""A persistent map from string to string backed by a CHK store."""

def __init__(self, store, root_key, search_key_func=None):

"""Create a CHKMap object.

:param store: The store the CHKMap is stored in.

:param root_key: The root key of the map. None to create an empty

CHKMap.

:param search_key_func: A function mapping a key => bytes. These bytes

are then used by the internal nodes to split up leaf nodes into

multiple pages.

"""

self._store = store

if search_key_func is None:

search_key_func = _search_key_plain

self._search_key_func = search_key_func

if root_key is None:

self._root_node = LeafNode(search_key_func=search_key_func)

else:

self._root_node = self._node_key(root_key)

100

101

def apply_delta(self, delta):

102

"""Apply a delta to the map.

103

104

:param delta: An iterable of old_key, new_key, new_value tuples.

105

If new_key is not None, then new_key->new_value is inserted

106

into the map; if old_key is not None, then the old mapping

107

of old_key is removed.

108

"""

109

delete_count = 0

110

for old, new, value in delta:

111

if old is not None and old != new:

112

self.unmap(old, check_remap=False)

113

delete_count += 1

114

for old, new, value in delta:

115

if new is not None:

116

self.map(new, value)

117

if delete_count > _INTERESTING_DELETES_LIMIT:

118

trace.mutter("checking remap as %d deletions", delete_count)

119

self._check_remap()

120

return self._save()

121

122

def _ensure_root(self):

123

"""Ensure that the root node is an object not a key."""

124

if type(self._root_node) == tuple:

125

# Demand-load the root

126

self._root_node = self._get_node(self._root_node)

127

128

def _get_node(self, node):

129

"""Get a node.

130

131

Note that this does not update the _items dict in objects containing a

132

reference to this node. As such it does not prevent subsequent IO being

133

performed.

134

135

:param node: A tuple key or node object.

136

:return: A node object.

137

"""

138

if type(node) == tuple:

139

bytes = self._read_bytes(node)

140

return _deserialise(bytes, node,

141

search_key_func=self._search_key_func)

142

else:

143

return node

144

145

def _read_bytes(self, key):

146

try:

147

return _page_cache[key]

148

except KeyError:

149

stream = self._store.get_record_stream([key], 'unordered', True)

150

bytes = stream.next().get_bytes_as('fulltext')

151

_page_cache[key] = bytes

152

return bytes

153

154

def _dump_tree(self, include_keys=False):

155

"""Return the tree in a string representation."""

156

self._ensure_root()

157

res = self._dump_tree_node(self._root_node, prefix='', indent='',

158

include_keys=include_keys)

159

res.append('') # Give a trailing '\n'

160

return '\n'.join(res)

161

162

def _dump_tree_node(self, node, prefix, indent, include_keys=True):

163

"""For this node and all children, generate a string representation."""

164

result = []

165

if not include_keys:

166

key_str = ''

167

else:

168

node_key = node.key()

169

if node_key is not None:

170

key_str = ' %s' % (node_key[0],)

171

else:

172

key_str = ' None'

173

result.append('%s%r %s%s' % (indent, prefix, node.__class__.__name__,

174

key_str))

175

if type(node) is InternalNode:

176

# Trigger all child nodes to get loaded

177

list(node._iter_nodes(self._store))

178

for prefix, sub in sorted(node._items.iteritems()):

179

result.extend(self._dump_tree_node(sub, prefix, indent + ' ',

180

include_keys=include_keys))

181

else:

182

for key, value in sorted(node._items.iteritems()):

183

# Don't use prefix nor indent here to line up when used in

184

# tests in conjunction with assertEqualDiff

185

result.append(' %r %r' % (key, value))

186

return result

187

188

@classmethod

189

def from_dict(klass, store, initial_value, maximum_size=0, key_width=1,

190

search_key_func=None):

191

"""Create a CHKMap in store with initial_value as the content.

192

193

:param store: The store to record initial_value in, a VersionedFiles

194

object with 1-tuple keys supporting CHK key generation.

195

:param initial_value: A dict to store in store. Its keys and values

196

must be bytestrings.

197

:param maximum_size: The maximum_size rule to apply to nodes. This

198

determines the size at which no new data is added to a single node.

199

:param key_width: The number of elements in each key_tuple being stored

200

in this map.

201

:param search_key_func: A function mapping a key => bytes. These bytes

202

are then used by the internal nodes to split up leaf nodes into

203

multiple pages.

204

:return: The root chk of the resulting CHKMap.

205

"""

206

result = CHKMap(store, None, search_key_func=search_key_func)

207

result._root_node.set_maximum_size(maximum_size)

208

result._root_node._key_width = key_width

209

delta = []

210

for key, value in initial_value.items():

211

delta.append((None, key, value))

212

return result.apply_delta(delta)

213

214

def iter_changes(self, basis):

215

"""Iterate over the changes between basis and self.

216

217

:return: An iterator of tuples: (key, old_value, new_value). Old_value

218

is None for keys only in self; new_value is None for keys only in

219

basis.

220

"""

221

# Overview:

222

# Read both trees in lexographic, highest-first order.

223

# Any identical nodes we skip

224

# Any unique prefixes we output immediately.

225

# values in a leaf node are treated as single-value nodes in the tree

226

# which allows them to be not-special-cased. We know to output them

227

# because their value is a string, not a key(tuple) or node.

228

229

# corner cases to beware of when considering this function:

230

# *) common references are at different heights.

231

# consider two trees:

232

# {'a': LeafNode={'aaa':'foo', 'aab':'bar'}, 'b': LeafNode={'b'}}

233

# {'a': InternalNode={'aa':LeafNode={'aaa':'foo', 'aab':'bar'},

234

# 'ab':LeafNode={'ab':'bar'}}

235

# 'b': LeafNode={'b'}}

236

# the node with aaa/aab will only be encountered in the second tree

237

# after reading the 'a' subtree, but it is encountered in the first

238

# tree immediately. Variations on this may have read internal nodes

239

# like this. we want to cut the entire pending subtree when we

240

# realise we have a common node. For this we use a list of keys -

241

# the path to a node - and check the entire path is clean as we

242

# process each item.

243

if self._node_key(self._root_node) == self._node_key(basis._root_node):

244

return

245

self._ensure_root()

246

basis._ensure_root()

247

excluded_keys = set()

248

self_node = self._root_node

249

basis_node = basis._root_node

250

# A heap, each element is prefix, node(tuple/NodeObject/string),

251

# key_path (a list of tuples, tail-sharing down the tree.)

252

self_pending = []

253

basis_pending = []

254

def process_node(node, path, a_map, pending):

255

# take a node and expand it

256

node = a_map._get_node(node)

257

if type(node) == LeafNode:

258

path = (node._key, path)

259

for key, value in node._items.items():

260

# For a LeafNode, the key is a serialized_key, rather than

261

# a search_key, but the heap is using search_keys

262

search_key = node._search_key_func(key)

263

heapq.heappush(pending, (search_key, key, value, path))

264

else:

265

# type(node) == InternalNode

266

path = (node._key, path)

267

for prefix, child in node._items.items():

268

heapq.heappush(pending, (prefix, None, child, path))

269

def process_common_internal_nodes(self_node, basis_node):

270

self_items = set(self_node._items.items())

271

basis_items = set(basis_node._items.items())

272

path = (self_node._key, None)

273

for prefix, child in self_items - basis_items:

274

heapq.heappush(self_pending, (prefix, None, child, path))

275

path = (basis_node._key, None)

276

for prefix, child in basis_items - self_items:

277

heapq.heappush(basis_pending, (prefix, None, child, path))

278

def process_common_leaf_nodes(self_node, basis_node):

279

self_items = set(self_node._items.items())

280

basis_items = set(basis_node._items.items())

281

path = (self_node._key, None)

282

for key, value in self_items - basis_items:

283

prefix = self._search_key_func(key)

284

heapq.heappush(self_pending, (prefix, key, value, path))

285

path = (basis_node._key, None)

286

for key, value in basis_items - self_items:

287

prefix = basis._search_key_func(key)

288

heapq.heappush(basis_pending, (prefix, key, value, path))

289

def process_common_prefix_nodes(self_node, self_path,

290

basis_node, basis_path):

291

# Would it be more efficient if we could request both at the same

292

# time?

293

self_node = self._get_node(self_node)

294

basis_node = basis._get_node(basis_node)

295

if (type(self_node) == InternalNode

296

and type(basis_node) == InternalNode):

297

# Matching internal nodes

298

process_common_internal_nodes(self_node, basis_node)

299

elif (type(self_node) == LeafNode

300

and type(basis_node) == LeafNode):

301

process_common_leaf_nodes(self_node, basis_node)

302

else:

303

process_node(self_node, self_path, self, self_pending)

304

process_node(basis_node, basis_path, basis, basis_pending)

305

process_common_prefix_nodes(self_node, None, basis_node, None)

306

self_seen = set()

307

basis_seen = set()

308

excluded_keys = set()

309

def check_excluded(key_path):

310

# Note that this is N^2, it depends on us trimming trees

311

# aggressively to not become slow.

312

# A better implementation would probably have a reverse map

313

# back to the children of a node, and jump straight to it when

314

# a common node is detected, the proceed to remove the already

315

# pending children. bzrlib.graph has a searcher module with a

316

# similar problem.

317

while key_path is not None:

318

key, key_path = key_path

319

if key in excluded_keys:

320

return True

321

return False

322

323

loop_counter = 0

324

while self_pending or basis_pending:

325

loop_counter += 1

326

if not self_pending:

327

# self is exhausted: output remainder of basis

328

for prefix, key, node, path in basis_pending:

329

if check_excluded(path):

330

continue

331

node = basis._get_node(node)

332

if key is not None:

333

# a value

334

yield (key, node, None)

335

else:

336

# subtree - fastpath the entire thing.

337

for key, value in node.iteritems(basis._store):

338

yield (key, value, None)

339

return

340

elif not basis_pending:

341

# basis is exhausted: output remainder of self.

342

for prefix, key, node, path in self_pending:

343

if check_excluded(path):

344

continue

345

node = self._get_node(node)

346

if key is not None:

347

# a value

348

yield (key, None, node)

349

else:

350

# subtree - fastpath the entire thing.

351

for key, value in node.iteritems(self._store):

352

yield (key, None, value)

353

return

354

else:

355

# XXX: future optimisation - yield the smaller items

356

# immediately rather than pushing everything on/off the

357

# heaps. Applies to both internal nodes and leafnodes.

358

if self_pending[0][0] < basis_pending[0][0]:

359

# expand self

360

prefix, key, node, path = heapq.heappop(self_pending)

361

if check_excluded(path):

362

continue

363

if key is not None:

364

# a value

365

yield (key, None, node)

366

else:

367

process_node(node, path, self, self_pending)

368

continue

369

elif self_pending[0][0] > basis_pending[0][0]:

370

# expand basis

371

prefix, key, node, path = heapq.heappop(basis_pending)

372

if check_excluded(path):

373

continue

374

if key is not None:

375

# a value

376

yield (key, node, None)

377

else:

378

process_node(node, path, basis, basis_pending)

379

continue

380

else:

381

# common prefix: possibly expand both

382

if self_pending[0][1] is None:

383

# process next self

384

read_self = True

385

else:

386

read_self = False

387

if basis_pending[0][1] is None:

388

# process next basis

389

read_basis = True

390

else:

391

read_basis = False

392

if not read_self and not read_basis:

393

# compare a common value

394

self_details = heapq.heappop(self_pending)

395

basis_details = heapq.heappop(basis_pending)

396

if self_details[2] != basis_details[2]:

397

yield (self_details[1],

398

basis_details[2], self_details[2])

399

continue

400

# At least one side wasn't a simple value

401

if (self._node_key(self_pending[0][2]) ==

402

self._node_key(basis_pending[0][2])):

403

# Identical pointers, skip (and don't bother adding to

404

# excluded, it won't turn up again.

405

heapq.heappop(self_pending)

406

heapq.heappop(basis_pending)

407

continue

408

# Now we need to expand this node before we can continue

409

if read_self and read_basis:

410

# Both sides start with the same prefix, so process

411

# them in parallel

412

self_prefix, _, self_node, self_path = heapq.heappop(

413

self_pending)

414

basis_prefix, _, basis_node, basis_path = heapq.heappop(

415

basis_pending)

416

if self_prefix != basis_prefix:

417

raise AssertionError(

418

'%r != %r' % (self_prefix, basis_prefix))

419

process_common_prefix_nodes(

420

self_node, self_path,

421

basis_node, basis_path)

422

continue

423

if read_self:

424

prefix, key, node, path = heapq.heappop(self_pending)

425

if check_excluded(path):

426

continue

427

process_node(node, path, self, self_pending)

428

if read_basis:

429

prefix, key, node, path = heapq.heappop(basis_pending)

430

if check_excluded(path):

431

continue

432

process_node(node, path, basis, basis_pending)

433

# print loop_counter

434

435

def iteritems(self, key_filter=None):

436

"""Iterate over the entire CHKMap's contents."""

437

self._ensure_root()

438

return self._root_node.iteritems(self._store, key_filter=key_filter)

439

440

def key(self):

441

"""Return the key for this map."""

442

if type(self._root_node) is tuple:

443

return self._root_node

444

else:

445

return self._root_node._key

446

447

def __len__(self):

448

self._ensure_root()

449

return len(self._root_node)

450

451

def map(self, key, value):

452

"""Map a key tuple to value."""

453

# Need a root object.

454

self._ensure_root()

455

prefix, node_details = self._root_node.map(self._store, key, value)

456

if len(node_details) == 1:

457

self._root_node = node_details[0][1]

458

else:

459

self._root_node = InternalNode(prefix,

460

search_key_func=self._search_key_func)

461

self._root_node.set_maximum_size(node_details[0][1].maximum_size)

462

self._root_node._key_width = node_details[0][1]._key_width

463

for split, node in node_details:

464

self._root_node.add_node(split, node)

465

466

def _node_key(self, node):

467

"""Get the key for a node whether it's a tuple or node."""

468

if type(node) == tuple:

469

return node

470

else:

471

return node._key

472

473

def unmap(self, key, check_remap=True):

474

"""remove key from the map."""

475

self._ensure_root()

476

if type(self._root_node) is InternalNode:

477

unmapped = self._root_node.unmap(self._store, key,

478

check_remap=check_remap)

479

else:

480

unmapped = self._root_node.unmap(self._store, key)

481

self._root_node = unmapped

482

483

def _check_remap(self):

484

"""Check if nodes can be collapsed."""

485

self._ensure_root()

486

if type(self._root_node) is InternalNode:

487

self._root_node._check_remap(self._store)

488

489

def _save(self):

490

"""Save the map completely.

491

492

:return: The key of the root node.

493

"""

494

if type(self._root_node) == tuple:

495

# Already saved.

496

return self._root_node

497

keys = list(self._root_node.serialise(self._store))

498

return keys[-1]

499

500

501

class Node(object):

502

"""Base class defining the protocol for CHK Map nodes.

503

504

:ivar _raw_size: The total size of the serialized key:value data, before

505

adding the header bytes, and without prefix compression.

506

"""

507

508

def __init__(self, key_width=1):

509

"""Create a node.

510

511

:param key_width: The width of keys for this node.

512

"""

513

self._key = None

514

# Current number of elements

515

self._len = 0

516

self._maximum_size = 0

517

self._key_width = key_width

518

# current size in bytes

519

self._raw_size = 0

520

# The pointers/values this node has - meaning defined by child classes.

521

self._items = {}

522

# The common search prefix

523

self._search_prefix = None

524

525

def __repr__(self):

526

items_str = str(sorted(self._items))

527

if len(items_str) > 20:

528

items_str = items_str[:16] + '...]'

529

return '%s(key:%s len:%s size:%s max:%s prefix:%s items:%s)' % (

530

self.__class__.__name__, self._key, self._len, self._raw_size,

531

self._maximum_size, self._search_prefix, items_str)

532

533

def key(self):

534

return self._key

535

536

def __len__(self):

537

return self._len

538

539

@property

540

def maximum_size(self):

541

"""What is the upper limit for adding references to a node."""

542

return self._maximum_size

543

544

def set_maximum_size(self, new_size):

545

"""Set the size threshold for nodes.

546

547

:param new_size: The size at which no data is added to a node. 0 for

548

unlimited.

549

"""

550

self._maximum_size = new_size

551

552

@classmethod

553

def common_prefix(cls, prefix, key):

554

"""Given 2 strings, return the longest prefix common to both.

555

556

:param prefix: This has been the common prefix for other keys, so it is

557

more likely to be the common prefix in this case as well.

558

:param key: Another string to compare to

559

"""

560

if key.startswith(prefix):

561

return prefix

562

pos = -1

563

# Is there a better way to do this?

564

for pos, (left, right) in enumerate(zip(prefix, key)):

565

if left != right:

566

pos -= 1

567

break

568

common = prefix[:pos+1]

569

return common

570

571

@classmethod

572

def common_prefix_for_keys(cls, keys):

573

"""Given a list of keys, find their common prefix.

574

575

:param keys: An iterable of strings.

576

:return: The longest common prefix of all keys.

577

"""

578

common_prefix = None

579

for key in keys:

580

if common_prefix is None:

581

common_prefix = key

582

continue

583

common_prefix = cls.common_prefix(common_prefix, key)

584

if not common_prefix:

585

# if common_prefix is the empty string, then we know it won't

586

# change further

587

return ''

588

return common_prefix

589

590

591

# Singleton indicating we have not computed _search_prefix yet

592

_unknown = object()

593

594

class LeafNode(Node):

595

"""A node containing actual key:value pairs.

596

597

:ivar _items: A dict of key->value items. The key is in tuple form.

598

:ivar _size: The number of bytes that would be used by serializing all of

599

the key/value pairs.

600

"""

601

602

def __init__(self, search_key_func=None):

603

Node.__init__(self)

604

# All of the keys in this leaf node share this common prefix

605

self._common_serialised_prefix = None

606

self._serialise_key = '\x00'.join

607

if search_key_func is None:

608

self._search_key_func = _search_key_plain

609

else:

610

self._search_key_func = search_key_func

611

612

def __repr__(self):

613

items_str = str(sorted(self._items))

614

if len(items_str) > 20:

615

items_str = items_str[:16] + '...]'

616

return \

617

'%s(key:%s len:%s size:%s max:%s prefix:%s keywidth:%s items:%s)' \

618

% (self.__class__.__name__, self._key, self._len, self._raw_size,

619

self._maximum_size, self._search_prefix, self._key_width, items_str)

620

621

def _current_size(self):

622

"""Answer the current serialised size of this node.

623

624

This differs from self._raw_size in that it includes the bytes used for

625

the header.

626

"""

627

if self._common_serialised_prefix is None:

628

bytes_for_items = 0

629

prefix_len = 0

630

else:

631

# We will store a single string with the common prefix

632

# And then that common prefix will not be stored in any of the

633

# entry lines

634

prefix_len = len(self._common_serialised_prefix)

635

bytes_for_items = (self._raw_size - (prefix_len * self._len))

636

return (9 # 'chkleaf:\n'

637

+ len(str(self._maximum_size)) + 1

638

+ len(str(self._key_width)) + 1

639

+ len(str(self._len)) + 1

640

+ prefix_len + 1

641

+ bytes_for_items)

642

643

@classmethod

644

def deserialise(klass, bytes, key, search_key_func=None):

645

"""Deserialise bytes, with key key, into a LeafNode.

646

647

:param bytes: The bytes of the node.

648

:param key: The key that the serialised node has.

649

"""

650

return _deserialise_leaf_node(bytes, key,

651

search_key_func=search_key_func)

652

653

def iteritems(self, store, key_filter=None):

654

"""Iterate over items in the node.

655

656

:param key_filter: A filter to apply to the node. It should be a

657

list/set/dict or similar repeatedly iterable container.

658

"""

659

if key_filter is not None:

660

# Adjust the filter - short elements go to a prefix filter. All

661

# other items are looked up directly.

662

# XXX: perhaps defaultdict? Profiling<rinse and repeat>

663

filters = {}

664

for key in key_filter:

665

if len(key) == self._key_width:

666

# This filter is meant to match exactly one key, yield it

667

# if we have it.

668

try:

669

yield key, self._items[key]

670

except KeyError:

671

# This key is not present in this map, continue

672

pass

673

else:

674

# Short items, we need to match based on a prefix

675

length_filter = filters.setdefault(len(key), set())

676

length_filter.add(key)

677

if filters:

678

filters = filters.items()

679

for item in self._items.iteritems():

680

for length, length_filter in filters:

681

if item[0][:length] in length_filter:

682

yield item

683

break

684

else:

685

for item in self._items.iteritems():

686

yield item

687

688

def _key_value_len(self, key, value):

689

# TODO: Should probably be done without actually joining the key, but

690

# then that can be done via the C extension

691

return (len(self._serialise_key(key)) + 1

692

+ len(str(value.count('\n'))) + 1

693

+ len(value) + 1)

694

695

def _search_key(self, key):

696

return self._search_key_func(key)

697

698

def _map_no_split(self, key, value):

699

"""Map a key to a value.

700

701

This assumes either the key does not already exist, or you have already

702

removed its size and length from self.

703

704

:return: True if adding this node should cause us to split.

705

"""

706

self._items[key] = value

707

self._raw_size += self._key_value_len(key, value)

708

self._len += 1

709

serialised_key = self._serialise_key(key)

710

if self._common_serialised_prefix is None:

711

self._common_serialised_prefix = serialised_key

712

else:

713

self._common_serialised_prefix = self.common_prefix(

714

self._common_serialised_prefix, serialised_key)

715

search_key = self._search_key(key)

716

if self._search_prefix is _unknown:

717

self._compute_search_prefix()

718

if self._search_prefix is None:

719

self._search_prefix = search_key

720

else:

721

self._search_prefix = self.common_prefix(

722

self._search_prefix, search_key)

723

if (self._len > 1

724

and self._maximum_size

725

and self._current_size() > self._maximum_size):

726

# Check to see if all of the search_keys for this node are

727

# identical. We allow the node to grow under that circumstance

728

# (we could track this as common state, but it is infrequent)

729

if (search_key != self._search_prefix

730

or not self._are_search_keys_identical()):

731

return True

732

return False

733

734

def _split(self, store):

735

"""We have overflowed.

736

737

Split this node into multiple LeafNodes, return it up the stack so that

738

the next layer creates a new InternalNode and references the new nodes.

739

740

:return: (common_serialised_prefix, [(node_serialised_prefix, node)])

741

"""

742

if self._search_prefix is _unknown:

743

raise AssertionError('Search prefix must be known')

744

common_prefix = self._search_prefix

745

split_at = len(common_prefix) + 1

746

result = {}

747

for key, value in self._items.iteritems():

748

search_key = self._search_key(key)

749

prefix = search_key[:split_at]

750

# TODO: Generally only 1 key can be exactly the right length,

751

# which means we can only have 1 key in the node pointed

752

# at by the 'prefix\0' key. We might want to consider

753

# folding it into the containing InternalNode rather than

754

# having a fixed length-1 node.

755

# Note this is probably not true for hash keys, as they

756

# may get a '\00' node anywhere, but won't have keys of

757

# different lengths.

758

if len(prefix) < split_at:

759

prefix += '\x00'*(split_at - len(prefix))

760

if prefix not in result:

761

node = LeafNode(search_key_func=self._search_key_func)

762

node.set_maximum_size(self._maximum_size)

763

node._key_width = self._key_width

764

result[prefix] = node

765

else:

766

node = result[prefix]

767

node.map(store, key, value)

768

return common_prefix, result.items()

769

770

def map(self, store, key, value):

771

"""Map key to value."""

772

if key in self._items:

773

self._raw_size -= self._key_value_len(key, self._items[key])

774

self._len -= 1

775

self._key = None

776

if self._map_no_split(key, value):

777

return self._split(store)

778

else:

779

if self._search_prefix is _unknown:

780

raise AssertionError('%r must be known' % self._search_prefix)

781

return self._search_prefix, [("", self)]

782

783

def serialise(self, store):

784

"""Serialise the LeafNode to store.

785

786

:param store: A VersionedFiles honouring the CHK extensions.

787

:return: An iterable of the keys inserted by this operation.

788

"""

789

lines = ["chkleaf:\n"]

790

lines.append("%d\n" % self._maximum_size)

791

lines.append("%d\n" % self._key_width)

792

lines.append("%d\n" % self._len)

793

if self._common_serialised_prefix is None:

794

lines.append('\n')

795

if len(self._items) != 0:

796

raise AssertionError('If _common_serialised_prefix is None'

797

' we should have no items')

798

else:

799

lines.append('%s\n' % (self._common_serialised_prefix,))

800

prefix_len = len(self._common_serialised_prefix)

801

for key, value in sorted(self._items.items()):

802

# Always add a final newline

803

value_lines = osutils.chunks_to_lines([value + '\n'])

804

serialized = "%s\x00%s\n" % (self._serialise_key(key),

805

len(value_lines))

806

if not serialized.startswith(self._common_serialised_prefix):

807

raise AssertionError('We thought the common prefix was %r'

808

' but entry %r does not have it in common'

809

% (self._common_serialised_prefix, serialized))

810

lines.append(serialized[prefix_len:])

811

lines.extend(value_lines)

812

sha1, _, _ = store.add_lines((None,), (), lines)

813

self._key = ("sha1:" + sha1,)

814

bytes = ''.join(lines)

815

if len(bytes) != self._current_size():

816

raise AssertionError('Invalid _current_size')

817

_page_cache.add(self._key, bytes)

818

return [self._key]

819

820

def refs(self):

821

"""Return the references to other CHK's held by this node."""

822

return []

823

824

def _compute_search_prefix(self):

825

"""Determine the common search prefix for all keys in this node.

826

827

:return: A bytestring of the longest search key prefix that is

828

unique within this node.

829

"""

830

search_keys = [self._search_key_func(key) for key in self._items]

831

self._search_prefix = self.common_prefix_for_keys(search_keys)

832

return self._search_prefix

833

834

def _are_search_keys_identical(self):

835

"""Check to see if the search keys for all entries are the same.

836

837

When using a hash as the search_key it is possible for non-identical

838

keys to collide. If that happens enough, we may try overflow a

839

LeafNode, but as all are collisions, we must not split.

840

"""

841

common_search_key = None

842

for key in self._items:

843

search_key = self._search_key(key)

844

if common_search_key is None:

845

common_search_key = search_key

846

elif search_key != common_search_key:

847

return False

848

return True

849

850

def _compute_serialised_prefix(self):

851

"""Determine the common prefix for serialised keys in this node.

852

853

:return: A bytestring of the longest serialised key prefix that is

854

unique within this node.

855

"""

856

serialised_keys = [self._serialise_key(key) for key in self._items]

857

self._common_serialised_prefix = self.common_prefix_for_keys(

858

serialised_keys)

859

return self._common_serialised_prefix

860

861

def unmap(self, store, key):

862

"""Unmap key from the node."""

863

try:

864

self._raw_size -= self._key_value_len(key, self._items[key])

865

except KeyError:

866

trace.mutter("key %s not found in %r", key, self._items)

867

raise

868

self._len -= 1

869

del self._items[key]

870

self._key = None

871

# Recompute from scratch

872

self._compute_search_prefix()

873

self._compute_serialised_prefix()

874

return self

875

876

877

class InternalNode(Node):

878

"""A node that contains references to other nodes.

879

880

An InternalNode is responsible for mapping search key prefixes to child

881

nodes.

882

883

:ivar _items: serialised_key => node dictionary. node may be a tuple,

884

LeafNode or InternalNode.

885

"""

886

887

def __init__(self, prefix='', search_key_func=None):

888

Node.__init__(self)

889

# The size of an internalnode with default values and no children.

890

# How many octets key prefixes within this node are.

891

self._node_width = 0

892

self._search_prefix = prefix

893

if search_key_func is None:

894

self._search_key_func = _search_key_plain

895

else:

896

self._search_key_func = search_key_func

897

898

def add_node(self, prefix, node):

899

"""Add a child node with prefix prefix, and node node.

900

901

:param prefix: The search key prefix for node.

902

:param node: The node being added.

903

"""

904

if self._search_prefix is None:

905

raise AssertionError("_search_prefix should not be None")

906

if not prefix.startswith(self._search_prefix):

907

raise AssertionError("prefixes mismatch: %s must start with %s"

908

% (prefix,self._search_prefix))

909

if len(prefix) != len(self._search_prefix) + 1:

910

raise AssertionError("prefix wrong length: len(%s) is not %d" %

911

(prefix, len(self._search_prefix) + 1))

912

self._len += len(node)

913

if not len(self._items):

914

self._node_width = len(prefix)

915

if self._node_width != len(self._search_prefix) + 1:

916

raise AssertionError("node width mismatch: %d is not %d" %

917

(self._node_width, len(self._search_prefix) + 1))

918

self._items[prefix] = node

919

self._key = None

920

921

def _current_size(self):

922

"""Answer the current serialised size of this node."""

923

return (self._raw_size + len(str(self._len)) + len(str(self._key_width)) +

924

len(str(self._maximum_size)))

925

926

@classmethod

927

def deserialise(klass, bytes, key, search_key_func=None):

928

"""Deserialise bytes to an InternalNode, with key key.

929

930

:param bytes: The bytes of the node.

931

:param key: The key that the serialised node has.

932

:return: An InternalNode instance.

933

"""

934

return _deserialise_internal_node(bytes, key,

935

search_key_func=search_key_func)

936

937

def iteritems(self, store, key_filter=None):

938

for node, node_filter in self._iter_nodes(store, key_filter=key_filter):

939

for item in node.iteritems(store, key_filter=node_filter):

940

yield item

941

942

def _iter_nodes(self, store, key_filter=None, batch_size=None):

943

"""Iterate over node objects which match key_filter.

944

945

:param store: A store to use for accessing content.

946

:param key_filter: A key filter to filter nodes. Only nodes that might

947

contain a key in key_filter will be returned.

948

:param batch_size: If not None, then we will return the nodes that had

949

to be read using get_record_stream in batches, rather than reading

950

them all at once.

951

:return: An iterable of nodes. This function does not have to be fully

952

consumed. (There will be no pending I/O when items are being returned.)

953

"""

954

# Map from chk key ('sha1:...',) to (prefix, key_filter)

955

# prefix is the key in self._items to use, key_filter is the key_filter

956

# entries that would match this node

957

keys = {}

958

if key_filter is None:

959

for prefix, node in self._items.iteritems():

960

if type(node) == tuple:

961

keys[node] = (prefix, None)

962

else:

963

yield node, None

964

else:

965

# XXX defaultdict ?

966

prefix_to_keys = {}

967

length_filters = {}

968

for key in key_filter:

969

search_key = self._search_prefix_filter(key)

970

length_filter = length_filters.setdefault(

971

len(search_key), set())

972

length_filter.add(search_key)

973

prefix_to_keys.setdefault(search_key, []).append(key)

974

length_filters = length_filters.items()

975

for prefix, node in self._items.iteritems():

976

node_key_filter = []

977

for length, length_filter in length_filters:

978

sub_prefix = prefix[:length]

979

if sub_prefix in length_filter:

980

node_key_filter.extend(prefix_to_keys[sub_prefix])

981

if node_key_filter: # this key matched something, yield it

982

if type(node) == tuple:

983

keys[node] = (prefix, node_key_filter)

984

else:

985

yield node, node_key_filter

986

if keys:

987

# Look in the page cache for some more bytes

988

found_keys = set()

989

for key in keys:

990

try:

991

bytes = _page_cache[key]

992

except KeyError:

993

continue

994

else:

995

node = _deserialise(bytes, key,

996

search_key_func=self._search_key_func)

997

prefix, node_key_filter = keys[key]

998

self._items[prefix] = node

999

found_keys.add(key)

1000

yield node, node_key_filter

1001

for key in found_keys:

1002

del keys[key]

1003

if keys:

1004

# demand load some pages.

1005

if batch_size is None:

1006

# Read all the keys in

1007

batch_size = len(keys)

1008

key_order = list(keys)

1009

for batch_start in range(0, len(key_order), batch_size):

1010

batch = key_order[batch_start:batch_start + batch_size]

1011

# We have to fully consume the stream so there is no pending

1012

# I/O, so we buffer the nodes for now.

1013

stream = store.get_record_stream(batch, 'unordered', True)

1014

node_and_filters = []

1015

for record in stream:

1016

bytes = record.get_bytes_as('fulltext')

1017

node = _deserialise(bytes, record.key,

1018

search_key_func=self._search_key_func)

1019

prefix, node_key_filter = keys[record.key]

1020

node_and_filters.append((node, node_key_filter))

1021

self._items[prefix] = node

1022

_page_cache.add(record.key, bytes)

1023

for info in node_and_filters:

1024

yield info

1025

1026

def map(self, store, key, value):

1027

"""Map key to value."""

1028

if not len(self._items):

1029

raise AssertionError("can't map in an empty InternalNode.")

1030

search_key = self._search_key(key)

1031

if self._node_width != len(self._search_prefix) + 1:

1032

raise AssertionError("node width mismatch: %d is not %d" %

1033

(self._node_width, len(self._search_prefix) + 1))

1034

if not search_key.startswith(self._search_prefix):

1035

# This key doesn't fit in this index, so we need to split at the

1036

# point where it would fit, insert self into that internal node,

1037

# and then map this key into that node.

1038

new_prefix = self.common_prefix(self._search_prefix,

1039

search_key)

1040

new_parent = InternalNode(new_prefix,

1041

search_key_func=self._search_key_func)

1042

new_parent.set_maximum_size(self._maximum_size)

1043

new_parent._key_width = self._key_width

1044

new_parent.add_node(self._search_prefix[:len(new_prefix)+1],

1045

self)

1046

return new_parent.map(store, key, value)

1047

children = [node for node, _

1048

in self._iter_nodes(store, key_filter=[key])]

1049

if children:

1050

child = children[0]

1051

else:

1052

# new child needed:

1053

child = self._new_child(search_key, LeafNode)

1054

old_len = len(child)

1055

if type(child) is LeafNode:

1056

old_size = child._current_size()

1057

else:

1058

old_size = None

1059

prefix, node_details = child.map(store, key, value)

1060

if len(node_details) == 1:

1061

# child may have shrunk, or might be a new node

1062

child = node_details[0][1]

1063

self._len = self._len - old_len + len(child)

1064

self._items[search_key] = child

1065

self._key = None

1066

new_node = self

1067

if type(child) is LeafNode:

1068

if old_size is None:

1069

# The old node was an InternalNode which means it has now

1070

# collapsed, so we need to check if it will chain to a

1071

# collapse at this level.

1072

trace.mutter("checking remap as InternalNode -> LeafNode")

1073

new_node = self._check_remap(store)

1074

else:

1075

# If the LeafNode has shrunk in size, we may want to run

1076

# a remap check. Checking for a remap is expensive though

1077

# and the frequency of a successful remap is very low.

1078

# Shrinkage by small amounts is common, so we only do the

1079

# remap check if the new_size is low or the shrinkage

1080

# amount is over a configurable limit.

1081

new_size = child._current_size()

1082

shrinkage = old_size - new_size

1083

if (shrinkage > 0 and new_size < _INTERESTING_NEW_SIZE

1084

or shrinkage > _INTERESTING_SHRINKAGE_LIMIT):

1085

trace.mutter(

1086

"checking remap as size shrunk by %d to be %d",

1087

shrinkage, new_size)

1088

new_node = self._check_remap(store)

1089

if new_node._search_prefix is None:

1090

raise AssertionError("_search_prefix should not be None")

1091

return new_node._search_prefix, [('', new_node)]

1092

# child has overflown - create a new intermediate node.

1093

# XXX: This is where we might want to try and expand our depth

1094

# to refer to more bytes of every child (which would give us

1095

# multiple pointers to child nodes, but less intermediate nodes)

1096

child = self._new_child(search_key, InternalNode)

1097

child._search_prefix = prefix

1098

for split, node in node_details:

1099

child.add_node(split, node)

1100

self._len = self._len - old_len + len(child)

1101

self._key = None

1102

return self._search_prefix, [("", self)]

1103

1104

def _new_child(self, search_key, klass):

1105

"""Create a new child node of type klass."""

1106

child = klass()

1107

child.set_maximum_size(self._maximum_size)

1108

child._key_width = self._key_width

1109

child._search_key_func = self._search_key_func

1110

self._items[search_key] = child

1111

return child

1112

1113

def serialise(self, store):

1114

"""Serialise the node to store.

1115

1116

:param store: A VersionedFiles honouring the CHK extensions.

1117

:return: An iterable of the keys inserted by this operation.

1118

"""

1119

for node in self._items.itervalues():

1120

if type(node) == tuple:

1121

# Never deserialised.

1122

continue

1123

if node._key is not None:

1124

# Never altered

1125

continue

1126

for key in node.serialise(store):

1127

yield key

1128

lines = ["chknode:\n"]

1129

lines.append("%d\n" % self._maximum_size)

1130

lines.append("%d\n" % self._key_width)

1131

lines.append("%d\n" % self._len)

1132

if self._search_prefix is None:

1133

raise AssertionError("_search_prefix should not be None")

1134

lines.append('%s\n' % (self._search_prefix,))

1135

prefix_len = len(self._search_prefix)

1136

for prefix, node in sorted(self._items.items()):

1137

if type(node) == tuple:

1138

key = node[0]

1139

else:

1140

key = node._key[0]

1141

serialised = "%s\x00%s\n" % (prefix, key)

1142

if not serialised.startswith(self._search_prefix):

1143

raise AssertionError("prefixes mismatch: %s must start with %s"

1144

% (serialised, self._search_prefix))

1145

lines.append(serialised[prefix_len:])

1146

sha1, _, _ = store.add_lines((None,), (), lines)

1147

self._key = ("sha1:" + sha1,)

1148

_page_cache.add(self._key, ''.join(lines))

1149

yield self._key

1150

1151

def _search_key(self, key):

1152

"""Return the serialised key for key in this node."""

1153

# search keys are fixed width. All will be self._node_width wide, so we

1154

# pad as necessary.

1155

return (self._search_key_func(key) + '\x00'*self._node_width)[:self._node_width]

1156

1157

def _search_prefix_filter(self, key):

1158

"""Serialise key for use as a prefix filter in iteritems."""

1159

return self._search_key_func(key)[:self._node_width]

1160

1161

def _split(self, offset):

1162

"""Split this node into smaller nodes starting at offset.

1163

1164

:param offset: The offset to start the new child nodes at.

1165

:return: An iterable of (prefix, node) tuples. prefix is a byte

1166

prefix for reaching node.

1167

"""

1168

if offset >= self._node_width:

1169

for node in self._items.values():

1170

for result in node._split(offset):

1171

yield result

1172

return

1173

for key, node in self._items.items():

1174

pass

1175

1176

def refs(self):

1177

"""Return the references to other CHK's held by this node."""

1178

if self._key is None:

1179

raise AssertionError("unserialised nodes have no refs.")

1180

refs = []

1181

for value in self._items.itervalues():

1182

if type(value) == tuple:

1183

refs.append(value)

1184

else:

1185

refs.append(value.key())

1186

return refs

1187

1188

def _compute_search_prefix(self, extra_key=None):

1189

"""Return the unique key prefix for this node.

1190

1191

:return: A bytestring of the longest search key prefix that is

1192

unique within this node.

1193

"""

1194

self._search_prefix = self.common_prefix_for_keys(self._items)

1195

return self._search_prefix

1196

1197

def unmap(self, store, key, check_remap=True):

1198

"""Remove key from this node and it's children."""

1199

if not len(self._items):

1200

raise AssertionError("can't unmap in an empty InternalNode.")

1201

children = [node for node, _

1202

in self._iter_nodes(store, key_filter=[key])]

1203

if children:

1204

child = children[0]

1205

else:

1206

raise KeyError(key)

1207

self._len -= 1

1208

unmapped = child.unmap(store, key)

1209

self._key = None

1210

search_key = self._search_key(key)

1211

if len(unmapped) == 0:

1212

# All child nodes are gone, remove the child:

1213

del self._items[search_key]

1214

unmapped = None

1215

else:

1216

# Stash the returned node

1217

self._items[search_key] = unmapped

1218

if len(self._items) == 1:

1219

# this node is no longer needed:

1220

return self._items.values()[0]

1221

if type(unmapped) is InternalNode:

1222

return self

1223

if check_remap:

1224

return self._check_remap(store)

1225

else:

1226

return self

1227

1228

def _check_remap(self, store):

1229

"""Check if all keys contained by children fit in a single LeafNode.

1230

1231

:param store: A store to use for reading more nodes

1232

:return: Either self, or a new LeafNode which should replace self.

1233

"""

1234

# Logic for how we determine when we need to rebuild

1235

# 1) Implicitly unmap() is removing a key which means that the child

1236

# nodes are going to be shrinking by some extent.

1237

# 2) If all children are LeafNodes, it is possible that they could be

1238

# combined into a single LeafNode, which can then completely replace

1239

# this internal node with a single LeafNode

1240

# 3) If *one* child is an InternalNode, we assume it has already done

1241

# all the work to determine that its children cannot collapse, and

1242

# we can then assume that those nodes *plus* the current nodes don't

1243

# have a chance of collapsing either.

1244

# So a very cheap check is to just say if 'unmapped' is an

1245

# InternalNode, we don't have to check further.

1246

1247

# TODO: Another alternative is to check the total size of all known

1248

# LeafNodes. If there is some formula we can use to determine the

1249

# final size without actually having to read in any more

1250

# children, it would be nice to have. However, we have to be

1251

# careful with stuff like nodes that pull out the common prefix

1252

# of each key, as adding a new key can change the common prefix

1253

# and cause size changes greater than the length of one key.

1254

# So for now, we just add everything to a new Leaf until it

1255

# splits, as we know that will give the right answer

1256

new_leaf = LeafNode(search_key_func=self._search_key_func)

1257

new_leaf.set_maximum_size(self._maximum_size)

1258

new_leaf._key_width = self._key_width

1259

# A batch_size of 16 was chosen because:

1260

# a) In testing, a 4k page held 14 times. So if we have more than 16

1261

# leaf nodes we are unlikely to hold them in a single new leaf

1262

# node. This still allows for 1 round trip

1263

# b) With 16-way fan out, we can still do a single round trip

1264

# c) With 255-way fan out, we don't want to read all 255 and destroy

1265

# the page cache, just to determine that we really don't need it.

1266

for node, _ in self._iter_nodes(store, batch_size=16):

1267

if type(node) is InternalNode:

1268

# Without looking at any leaf nodes, we are sure

1269

return self

1270

for key, value in node._items.iteritems():

1271

if new_leaf._map_no_split(key, value):

1272

return self

1273

trace.mutter("remap generated a new LeafNode")

1274

return new_leaf

1275

1276

1277

def _deserialise(bytes, key, search_key_func):

1278

"""Helper for repositorydetails - convert bytes to a node."""

1279

if bytes.startswith("chkleaf:\n"):

1280

node = LeafNode.deserialise(bytes, key, search_key_func=search_key_func)

1281

elif bytes.startswith("chknode:\n"):

1282

node = InternalNode.deserialise(bytes, key,

1283

search_key_func=search_key_func)

1284

else:

1285

raise AssertionError("Unknown node type.")

1286

return node

1287

1288

1289

def _find_children_info(store, interesting_keys, uninteresting_keys, pb):

1290

"""Read the associated records, and determine what is interesting."""

1291

uninteresting_keys = set(uninteresting_keys)

1292

chks_to_read = uninteresting_keys.union(interesting_keys)

1293

next_uninteresting = set()

1294

next_interesting = set()

1295

uninteresting_items = set()

1296

interesting_items = set()

1297

interesting_to_yield = []

1298

for record in store.get_record_stream(chks_to_read, 'unordered', True):

1299

# records_read.add(record.key())

1300

if pb is not None:

1301

pb.tick()

1302

bytes = record.get_bytes_as('fulltext')

1303

# We don't care about search_key_func for this code, because we only

1304

# care about external references.

1305

node = _deserialise(bytes, record.key, search_key_func=None)

1306

if record.key in uninteresting_keys:

1307

if type(node) is InternalNode:

1308

next_uninteresting.update(node.refs())

1309

else:

1310

# We know we are at a LeafNode, so we can pass None for the

1311

# store

1312

uninteresting_items.update(node.iteritems(None))

1313

else:

1314

interesting_to_yield.append(record.key)

1315

if type(node) is InternalNode:

1316

next_interesting.update(node.refs())

1317

else:

1318

interesting_items.update(node.iteritems(None))

1319

return (next_uninteresting, uninteresting_items,

1320

next_interesting, interesting_to_yield, interesting_items)

1321

1322

1323

def _find_all_uninteresting(store, interesting_root_keys,

1324

uninteresting_root_keys, pb):

1325

"""Determine the full set of uninteresting keys."""

1326

# What about duplicates between interesting_root_keys and

1327

# uninteresting_root_keys?

1328

if not uninteresting_root_keys:

1329

# Shortcut case. We know there is nothing uninteresting to filter out

1330

# So we just let the rest of the algorithm do the work

1331

# We know there is nothing uninteresting, and we didn't have to read

1332

# any interesting records yet.

1333

return (set(), set(), set(interesting_root_keys), [], set())

1334

all_uninteresting_chks = set(uninteresting_root_keys)

1335

all_uninteresting_items = set()

1336

1337

# First step, find the direct children of both the interesting and

1338

# uninteresting set

1339

(uninteresting_keys, uninteresting_items,

1340

interesting_keys, interesting_to_yield,

1341

interesting_items) = _find_children_info(store, interesting_root_keys,

1342

uninteresting_root_keys,

1343

pb=pb)

1344

all_uninteresting_chks.update(uninteresting_keys)

1345

all_uninteresting_items.update(uninteresting_items)

1346

del uninteresting_items

1347

# Note: Exact matches between interesting and uninteresting do not need

1348

# to be search further. Non-exact matches need to be searched in case

1349

# there is a future exact-match

1350

uninteresting_keys.difference_update(interesting_keys)

1351

1352

# Second, find the full set of uninteresting bits reachable by the

1353

# uninteresting roots

1354

chks_to_read = uninteresting_keys

1355

while chks_to_read:

1356

next_chks = set()

1357

for record in store.get_record_stream(chks_to_read, 'unordered', False):

1358

# TODO: Handle 'absent'

1359

if pb is not None:

1360

pb.tick()

1361

bytes = record.get_bytes_as('fulltext')

1362

# We don't care about search_key_func for this code, because we

1363

# only care about external references.

1364

node = _deserialise(bytes, record.key, search_key_func=None)

1365

if type(node) is InternalNode:

1366

# uninteresting_prefix_chks.update(node._items.iteritems())

1367

chks = node._items.values()

1368

# TODO: We remove the entries that are already in

1369

# uninteresting_chks ?

1370

next_chks.update(chks)

1371

all_uninteresting_chks.update(chks)

1372

else:

1373

all_uninteresting_items.update(node._items.iteritems())

1374

chks_to_read = next_chks

1375

return (all_uninteresting_chks, all_uninteresting_items,

1376

interesting_keys, interesting_to_yield, interesting_items)

1377

1378

1379

def iter_interesting_nodes(store, interesting_root_keys,

1380

uninteresting_root_keys, pb=None):

1381

"""Given root keys, find interesting nodes.

1382

1383

Evaluate nodes referenced by interesting_root_keys. Ones that are also

1384

referenced from uninteresting_root_keys are not considered interesting.

1385

1386

:param interesting_root_keys: keys which should be part of the

1387

"interesting" nodes (which will be yielded)

1388

:param uninteresting_root_keys: keys which should be filtered out of the

1389

result set.

1390

:return: Yield

1391

(interesting record, {interesting key:values})

1392

"""

1393

# TODO: consider that it may be more memory efficient to use the 20-byte

1394

# sha1 string, rather than tuples of hexidecimal sha1 strings.

1395

# TODO: Try to factor out a lot of the get_record_stream() calls into a

1396

# helper function similar to _read_bytes. This function should be

1397

# able to use nodes from the _page_cache as well as actually

1398

# requesting bytes from the store.

1399

1400

(all_uninteresting_chks, all_uninteresting_items, interesting_keys,

1401

interesting_to_yield, interesting_items) = _find_all_uninteresting(store,

1402

interesting_root_keys, uninteresting_root_keys, pb)

1403

1404

# Now that we know everything uninteresting, we can yield information from

1405

# our first request

1406

interesting_items.difference_update(all_uninteresting_items)

1407

interesting_to_yield = set(interesting_to_yield) - all_uninteresting_chks

1408

if interesting_items:

1409

yield None, interesting_items

1410

if interesting_to_yield:

1411

# We request these records again, rather than buffering the root

1412

# records, most likely they are still in the _group_cache anyway.

1413

for record in store.get_record_stream(interesting_to_yield,

1414

'unordered', False):

1415

yield record, []

1416

all_uninteresting_chks.update(interesting_to_yield)

1417

interesting_keys.difference_update(all_uninteresting_chks)

1418

1419

chks_to_read = interesting_keys

1420

counter = 0

1421

while chks_to_read:

1422

next_chks = set()

1423

for record in store.get_record_stream(chks_to_read, 'unordered', False):

1424

counter += 1

1425

if pb is not None:

1426

pb.update('find chk pages', counter)

1427

# TODO: Handle 'absent'?

1428

bytes = record.get_bytes_as('fulltext')

1429

# We don't care about search_key_func for this code, because we

1430

# only care about external references.

1431

node = _deserialise(bytes, record.key, search_key_func=None)

1432

if type(node) is InternalNode:

1433

# all_uninteresting_chks grows large, as it lists all nodes we

1434

# don't want to process (including already seen interesting

1435

# nodes).

1436

# small.difference_update(large) scales O(large), but

1437

# small.difference(large) scales O(small).

1438

# Also, we know we just _deserialised this node, so we can

1439

# access the dict directly.

1440

chks = set(node._items.itervalues()).difference(

1441

all_uninteresting_chks)

1442

# Is set() and .difference_update better than:

1443

# chks = [chk for chk in node.refs()

1444

# if chk not in all_uninteresting_chks]

1445

next_chks.update(chks)

1446

# These are now uninteresting everywhere else

1447

all_uninteresting_chks.update(chks)

1448

interesting_items = []

1449

else:

1450

interesting_items = [item for item in node._items.iteritems()

1451

if item not in all_uninteresting_items]

1452

# TODO: Do we need to filter out items that we have already

1453

# seen on other pages? We don't really want to buffer the

1454

# whole thing, but it does mean that callers need to

1455

# understand they may get duplicate values.

1456

# all_uninteresting_items.update(interesting_items)

1457

yield record, interesting_items

1458

chks_to_read = next_chks

1459

1460

1461

try:

1462

from bzrlib._chk_map_pyx import (

1463

_search_key_16,

1464

_search_key_255,

1465

_deserialise_leaf_node,

1466

_deserialise_internal_node,

1467

)

1468

except ImportError:

1469

from bzrlib._chk_map_py import (

1470

_search_key_16,

1471

_search_key_255,

1472

_deserialise_leaf_node,

1473

_deserialise_internal_node,

1474

)

1475

search_key_registry.register('hash-16-way', _search_key_16)

1476

search_key_registry.register('hash-255-way', _search_key_255)

Older »