~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/index.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-03-17 01:53:05 UTC
mfrom: (2360.1.8 iter_changes_unicode_unknowns)
Revision ID: pqm@pqm.ubuntu.com-20070317015305-7b7562331da9f786

(John Arbash Meinel) Update dirstate._iter_changes to return unicode for all paths (bug #92608)

files added:
NEWS.developers

build-api

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/xml6.py

doc/README.1st

files removed:
bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_walkdirs_win32.h

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_pack.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v4.py

bzrlib/counted_lock.py

bzrlib/directory_service.py

bzrlib/email_message.py

bzrlib/graph.py

bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/push.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/pack_repo.py

bzrlib/rules.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_add_fallback_repository.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/transport/brokenrename.py

bzrlib/transport/nosmart.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version_info_formats/format_custom.py

bzrlib/xml5.py

bzrlib/xml6.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/package_mf.py

tools/win32/run_script.py

tools/win32/survey.txt

files renamed:
doc/developers/HACKING.txt => HACKING

bzrlib/deprecated_graph.py => bzrlib/graph.py

bzrlib/help_topics/__init__.py => bzrlib/help_topics.py

bzrlib/_patiencediff_py.py => bzrlib/patiencediff.py

bzrlib/plugins/launchpad/lp_directory.py => bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_directory.py => bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/http_server.py => bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_send.py => bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/test_deprecated_graph.py => bzrlib/tests/test_graph.py

bzrlib/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/transport/remote.py => bzrlib/transport/smart.py

bzrlib/xml8.py => bzrlib/xml5.py

doc/en/tutorials/centralized_workflow.txt => doc/centralized_workflow.txt

bzrlib/help_topics/en/configuration.txt => doc/configuration.txt

doc/en/user-guide/http_smart_server.txt => doc/http_smart_server.txt

doc/en/user-guide/index.txt => doc/index.txt

doc/en/user-guide/plugins.txt => doc/plugins.txt

doc/en/user-guide/server.txt => doc/server.txt

doc/en/user-guide/setting_up_email.txt => doc/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt => doc/specifying_revisions.txt

doc/en/tutorials/tutorial.txt => doc/tutorial.txt

doc/en/user-guide/using_aliases.txt => doc/using_aliases.txt

doc/en/user-guide/version_info.txt => doc/version_info.txt

files modified:
.bzrignore

INSTALL

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

doc/bazaar-vcs.org.kid

doc/default.css

generate_docs.py

setup.py *

tools/capture_tree.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Indexing facilities."""

__all__ = [

'CombinedGraphIndex',

'GraphIndex',

'GraphIndexBuilder',

'GraphIndexPrefixAdapter',

'InMemoryGraphIndex',

]

from bisect import bisect_right

from cStringIO import StringIO

import re

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import trace

from bzrlib.bisect_multi import bisect_multi_bytes

from bzrlib.revision import NULL_REVISION

from bzrlib.trace import mutter

""")

from bzrlib import (

debug,

errors,

symbol_versioning,

)

_HEADER_READV = (0, 200)

_OPTION_KEY_ELEMENTS = "key_elements="

_OPTION_LEN = "len="

_OPTION_NODE_REFS = "node_ref_lists="

_SIGNATURE = "Bazaar Graph Index 1\n"

_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')

_newline_null_re = re.compile('[\n\0]')

class GraphIndexBuilder(object):

"""A builder that can build a GraphIndex.

The resulting graph has the structure:

_SIGNATURE OPTIONS NODES NEWLINE

_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE

OPTIONS := 'node_ref_lists=' DIGITS NEWLINE

NODES := NODE*

NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

KEY := Not-whitespace-utf8

ABSENT := 'a'

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

REFERENCE := DIGITS ; digits is the byte offset in the index of the

; referenced key.

VALUE := no-newline-no-null-bytes

"""

def __init__(self, reference_lists=0, key_elements=1):

"""Create a GraphIndex builder.

:param reference_lists: The number of node references lists for each

entry.

:param key_elements: The number of bytestrings in each key.

"""

self.reference_lists = reference_lists

self._keys = set()

self._nodes = {}

self._nodes_by_key = {}

self._key_length = key_elements

def _check_key(self, key):

"""Raise BadIndexKey if key is not a valid key for this index."""

if type(key) != tuple:

raise errors.BadIndexKey(key)

if self._key_length != len(key):

raise errors.BadIndexKey(key)

for element in key:

if not element or _whitespace_re.search(element) is not None:

raise errors.BadIndexKey(element)

def add_node(self, key, value, references=()):

"""Add a node to the index.

100

:param key: The key. keys are non-empty tuples containing

101

as many whitespace-free utf8 bytestrings as the key length

102

defined for this index.

103

:param references: An iterable of iterables of keys. Each is a

104

reference to another key.

105

:param value: The value to associate with the key. It may be any

106

bytes as long as it does not contain \0 or \n.

107

"""

108

self._check_key(key)

109

if _newline_null_re.search(value) is not None:

110

raise errors.BadIndexValue(value)

111

if len(references) != self.reference_lists:

112

raise errors.BadIndexValue(references)

113

node_refs = []

114

for reference_list in references:

115

for reference in reference_list:

116

self._check_key(reference)

117

if reference not in self._nodes:

118

self._nodes[reference] = ('a', (), '')

119

node_refs.append(tuple(reference_list))

120

if key in self._nodes and self._nodes[key][0] == '':

121

raise errors.BadIndexDuplicateKey(key, self)

122

self._nodes[key] = ('', tuple(node_refs), value)

123

self._keys.add(key)

124

if self._key_length > 1:

125

key_dict = self._nodes_by_key

126

if self.reference_lists:

127

key_value = key, value, tuple(node_refs)

128

else:

129

key_value = key, value

130

# possibly should do this on-demand, but it seems likely it is

131

# always wanted

132

# For a key of (foo, bar, baz) create

133

# _nodes_by_key[foo][bar][baz] = key_value

134

for subkey in key[:-1]:

135

key_dict = key_dict.setdefault(subkey, {})

136

key_dict[key[-1]] = key_value

137

138

def finish(self):

139

lines = [_SIGNATURE]

140

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

141

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

142

lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')

143

prefix_length = sum(len(x) for x in lines)

144

# references are byte offsets. To avoid having to do nasty

145

# polynomial work to resolve offsets (references to later in the

146

# file cannot be determined until all the inbetween references have

147

# been calculated too) we pad the offsets with 0's to make them be

148

# of consistent length. Using binary offsets would break the trivial

149

# file parsing.

150

# to calculate the width of zero's needed we do three passes:

151

# one to gather all the non-reference data and the number of references.

152

# one to pad all the data with reference-length and determine entry

153

# addresses.

154

# One to serialise.

155

156

# forward sorted by key. In future we may consider topological sorting,

157

# at the cost of table scans for direct lookup, or a second index for

158

# direct lookup

159

nodes = sorted(self._nodes.items())

160

# if we do not prepass, we don't know how long it will be up front.

161

expected_bytes = None

162

# we only need to pre-pass if we have reference lists at all.

163

if self.reference_lists:

164

key_offset_info = []

165

non_ref_bytes = prefix_length

166

total_references = 0

167

# TODO use simple multiplication for the constants in this loop.

168

for key, (absent, references, value) in nodes:

169

# record the offset known *so far* for this key:

170

# the non reference bytes to date, and the total references to

171

# date - saves reaccumulating on the second pass

172

key_offset_info.append((key, non_ref_bytes, total_references))

173

# key is literal, value is literal, there are 3 null's, 1 NL

174

# key is variable length tuple, \x00 between elements

175

non_ref_bytes += sum(len(element) for element in key)

176

if self._key_length > 1:

177

non_ref_bytes += self._key_length - 1

178

# value is literal bytes, there are 3 null's, 1 NL.

179

non_ref_bytes += len(value) + 3 + 1

180

# one byte for absent if set.

181

if absent:

182

non_ref_bytes += 1

183

elif self.reference_lists:

184

# (ref_lists -1) tabs

185

non_ref_bytes += self.reference_lists - 1

186

# (ref-1 cr's per ref_list)

187

for ref_list in references:

188

# how many references across the whole file?

189

total_references += len(ref_list)

190

# accrue reference separators

191

if ref_list:

192

non_ref_bytes += len(ref_list) - 1

193

# how many digits are needed to represent the total byte count?

194

digits = 1

195

possible_total_bytes = non_ref_bytes + total_references*digits

196

while 10 ** digits < possible_total_bytes:

197

digits += 1

198

possible_total_bytes = non_ref_bytes + total_references*digits

199

expected_bytes = possible_total_bytes + 1 # terminating newline

200

# resolve key addresses.

201

key_addresses = {}

202

for key, non_ref_bytes, total_references in key_offset_info:

203

key_addresses[key] = non_ref_bytes + total_references*digits

204

# serialise

205

format_string = '%%0%sd' % digits

206

for key, (absent, references, value) in nodes:

207

flattened_references = []

208

for ref_list in references:

209

ref_addresses = []

210

for reference in ref_list:

211

ref_addresses.append(format_string % key_addresses[reference])

212

flattened_references.append('\r'.join(ref_addresses))

213

string_key = '\x00'.join(key)

214

lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,

215

'\t'.join(flattened_references), value))

216

lines.append('\n')

217

result = StringIO(''.join(lines))

218

if expected_bytes and len(result.getvalue()) != expected_bytes:

219

raise errors.BzrError('Failed index creation. Internal error:'

220

' mismatched output length and expected length: %d %d' %

221

(len(result.getvalue()), expected_bytes))

222

return result

223

224

225

class GraphIndex(object):

226

"""An index for data with embedded graphs.

227

228

The index maps keys to a list of key reference lists, and a value.

229

Each node has the same number of key reference lists. Each key reference

230

list can be empty or an arbitrary length. The value is an opaque NULL

231

terminated string without any newlines. The storage of the index is

232

hidden in the interface: keys and key references are always tuples of

233

bytestrings, never the internal representation (e.g. dictionary offsets).

234

235

It is presumed that the index will not be mutated - it is static data.

236

237

Successive iter_all_entries calls will read the entire index each time.

238

Additionally, iter_entries calls will read the index linearly until the

239

desired keys are found. XXX: This must be fixed before the index is

240

suitable for production use. :XXX

241

"""

242

243

def __init__(self, transport, name, size):

244

"""Open an index called name on transport.

245

246

:param transport: A bzrlib.transport.Transport.

247

:param name: A path to provide to transport API calls.

248

:param size: The size of the index in bytes. This is used for bisection

249

logic to perform partial index reads. While the size could be

250

obtained by statting the file this introduced an additional round

251

trip as well as requiring stat'able transports, both of which are

252

avoided by having it supplied. If size is None, then bisection

253

support will be disabled and accessing the index will just stream

254

all the data.

255

"""

256

self._transport = transport

257

self._name = name

258

# Becomes a dict of key:(value, reference-list-byte-locations) used by

259

# the bisection interface to store parsed but not resolved keys.

260

self._bisect_nodes = None

261

# Becomes a dict of key:(value, reference-list-keys) which are ready to

262

# be returned directly to callers.

263

self._nodes = None

264

# a sorted list of slice-addresses for the parsed bytes of the file.

265

# e.g. (0,1) would mean that byte 0 is parsed.

266

self._parsed_byte_map = []

267

# a sorted list of keys matching each slice address for parsed bytes

268

# e.g. (None, 'foo@bar') would mean that the first byte contained no

269

# key, and the end byte of the slice is the of the data for 'foo@bar'

270

self._parsed_key_map = []

271

self._key_count = None

272

self._keys_by_offset = None

273

self._nodes_by_key = None

274

self._size = size

275

276

def __eq__(self, other):

277

"""Equal when self and other were created with the same parameters."""

278

return (

279

type(self) == type(other) and

280

self._transport == other._transport and

281

self._name == other._name and

282

self._size == other._size)

283

284

def __ne__(self, other):

285

return not self.__eq__(other)

286

287

def __repr__(self):

288

return "%s(%r)" % (self.__class__.__name__,

289

self._transport.abspath(self._name))

290

291

def _buffer_all(self):

292

"""Buffer all the index data.

293

294

Mutates self._nodes and self.keys_by_offset.

295

"""

296

if 'index' in debug.debug_flags:

297

mutter('Reading entire index %s', self._transport.abspath(self._name))

298

stream = self._transport.get(self._name)

299

self._read_prefix(stream)

300

self._expected_elements = 3 + self._key_length

301

line_count = 0

302

# raw data keyed by offset

303

self._keys_by_offset = {}

304

# ready-to-return key:value or key:value, node_ref_lists

305

self._nodes = {}

306

self._nodes_by_key = {}

307

trailers = 0

308

pos = stream.tell()

309

lines = stream.read().split('\n')

310

del lines[-1]

311

_, _, _, trailers = self._parse_lines(lines, pos)

312

for key, absent, references, value in self._keys_by_offset.itervalues():

313

if absent:

314

continue

315

# resolve references:

316

if self.node_ref_lists:

317

node_value = (value, self._resolve_references(references))

318

else:

319

node_value = value

320

self._nodes[key] = node_value

321

if self._key_length > 1:

322

subkey = list(reversed(key[:-1]))

323

key_dict = self._nodes_by_key

324

if self.node_ref_lists:

325

key_value = key, node_value[0], node_value[1]

326

else:

327

key_value = key, node_value

328

# possibly should do this on-demand, but it seems likely it is

329

# always wanted

330

# For a key of (foo, bar, baz) create

331

# _nodes_by_key[foo][bar][baz] = key_value

332

for subkey in key[:-1]:

333

key_dict = key_dict.setdefault(subkey, {})

334

key_dict[key[-1]] = key_value

335

# cache the keys for quick set intersections

336

self._keys = set(self._nodes)

337

if trailers != 1:

338

# there must be one line - the empty trailer line.

339

raise errors.BadIndexData(self)

340

341

def iter_all_entries(self):

342

"""Iterate over all keys within the index.

343

344

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

345

The former tuple is used when there are no reference lists in the

346

index, making the API compatible with simple key:value index types.

347

There is no defined order for the result iteration - it will be in

348

the most efficient order for the index.

349

"""

350

if 'evil' in debug.debug_flags:

351

trace.mutter_callsite(3,

352

"iter_all_entries scales with size of history.")

353

if self._nodes is None:

354

self._buffer_all()

355

if self.node_ref_lists:

356

for key, (value, node_ref_lists) in self._nodes.iteritems():

357

yield self, key, value, node_ref_lists

358

else:

359

for key, value in self._nodes.iteritems():

360

yield self, key, value

361

362

def _read_prefix(self, stream):

363

signature = stream.read(len(self._signature()))

364

if not signature == self._signature():

365

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

366

options_line = stream.readline()

367

if not options_line.startswith(_OPTION_NODE_REFS):

368

raise errors.BadIndexOptions(self)

369

try:

370

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])

371

except ValueError:

372

raise errors.BadIndexOptions(self)

373

options_line = stream.readline()

374

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

375

raise errors.BadIndexOptions(self)

376

try:

377

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])

378

except ValueError:

379

raise errors.BadIndexOptions(self)

380

options_line = stream.readline()

381

if not options_line.startswith(_OPTION_LEN):

382

raise errors.BadIndexOptions(self)

383

try:

384

self._key_count = int(options_line[len(_OPTION_LEN):-1])

385

except ValueError:

386

raise errors.BadIndexOptions(self)

387

388

def _resolve_references(self, references):

389

"""Return the resolved key references for references.

390

391

References are resolved by looking up the location of the key in the

392

_keys_by_offset map and substituting the key name, preserving ordering.

393

394

:param references: An iterable of iterables of key locations. e.g.

395

[[123, 456], [123]]

396

:return: A tuple of tuples of keys.

397

"""

398

node_refs = []

399

for ref_list in references:

400

node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))

401

return tuple(node_refs)

402

403

def _find_index(self, range_map, key):

404

"""Helper for the _parsed_*_index calls.

405

406

Given a range map - [(start, end), ...], finds the index of the range

407

in the map for key if it is in the map, and if it is not there, the

408

immediately preceeding range in the map.

409

"""

410

result = bisect_right(range_map, key) - 1

411

if result + 1 < len(range_map):

412

# check the border condition, it may be in result + 1

413

if range_map[result + 1][0] == key[0]:

414

return result + 1

415

return result

416

417

def _parsed_byte_index(self, offset):

418

"""Return the index of the entry immediately before offset.

419

420

e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that

421

there is one unparsed byte (the 11th, addressed as[10]). then:

422

asking for 0 will return 0

423

asking for 10 will return 0

424

asking for 11 will return 1

425

asking for 12 will return 1

426

"""

427

key = (offset, 0)

428

return self._find_index(self._parsed_byte_map, key)

429

430

def _parsed_key_index(self, key):

431

"""Return the index of the entry immediately before key.

432

433

e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,

434

meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive

435

have been parsed, then:

436

asking for '' will return 0

437

asking for 'a' will return 0

438

asking for 'b' will return 1

439

asking for 'e' will return 1

440

"""

441

search_key = (key, None)

442

return self._find_index(self._parsed_key_map, search_key)

443

444

def _is_parsed(self, offset):

445

"""Returns True if offset has been parsed."""

446

index = self._parsed_byte_index(offset)

447

if index == len(self._parsed_byte_map):

448

return offset < self._parsed_byte_map[index - 1][1]

449

start, end = self._parsed_byte_map[index]

450

return offset >= start and offset < end

451

452

def _iter_entries_from_total_buffer(self, keys):

453

"""Iterate over keys when the entire index is parsed."""

454

keys = keys.intersection(self._keys)

455

if self.node_ref_lists:

456

for key in keys:

457

value, node_refs = self._nodes[key]

458

yield self, key, value, node_refs

459

else:

460

for key in keys:

461

yield self, key, self._nodes[key]

462

463

def iter_entries(self, keys):

464

"""Iterate over keys within the index.

465

466

:param keys: An iterable providing the keys to be retrieved.

467

:return: An iterable as per iter_all_entries, but restricted to the

468

keys supplied. No additional keys will be returned, and every

469

key supplied that is in the index will be returned.

470

"""

471

# PERFORMANCE TODO: parse and bisect all remaining data at some

472

# threshold of total-index processing/get calling layers that expect to

473

# read the entire index to use the iter_all_entries method instead.

474

keys = set(keys)

475

if not keys:

476

return []

477

if self._size is None and self._nodes is None:

478

self._buffer_all()

479

if self._nodes is not None:

480

return self._iter_entries_from_total_buffer(keys)

481

else:

482

return (result[1] for result in bisect_multi_bytes(

483

self._lookup_keys_via_location, self._size, keys))

484

485

def iter_entries_prefix(self, keys):

486

"""Iterate over keys within the index using prefix matching.

487

488

Prefix matching is applied within the tuple of a key, not to within

489

the bytestring of each key element. e.g. if you have the keys ('foo',

490

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

491

only the former key is returned.

492

493

WARNING: Note that this method currently causes a full index parse

494

unconditionally (which is reasonably appropriate as it is a means for

495

thunking many small indices into one larger one and still supplies

496

iter_all_entries at the thunk layer).

497

498

:param keys: An iterable providing the key prefixes to be retrieved.

499

Each key prefix takes the form of a tuple the length of a key, but

500

with the last N elements 'None' rather than a regular bytestring.

501

The first element cannot be 'None'.

502

:return: An iterable as per iter_all_entries, but restricted to the

503

keys with a matching prefix to those supplied. No additional keys

504

will be returned, and every match that is in the index will be

505

returned.

506

"""

507

keys = set(keys)

508

if not keys:

509

return

510

# load data - also finds key lengths

511

if self._nodes is None:

512

self._buffer_all()

513

if self._key_length == 1:

514

for key in keys:

515

# sanity check

516

if key[0] is None:

517

raise errors.BadIndexKey(key)

518

if len(key) != self._key_length:

519

raise errors.BadIndexKey(key)

520

if self.node_ref_lists:

521

value, node_refs = self._nodes[key]

522

yield self, key, value, node_refs

523

else:

524

yield self, key, self._nodes[key]

525

return

526

for key in keys:

527

# sanity check

528

if key[0] is None:

529

raise errors.BadIndexKey(key)

530

if len(key) != self._key_length:

531

raise errors.BadIndexKey(key)

532

# find what it refers to:

533

key_dict = self._nodes_by_key

534

elements = list(key)

535

# find the subdict whose contents should be returned.

536

try:

537

while len(elements) and elements[0] is not None:

538

key_dict = key_dict[elements[0]]

539

elements.pop(0)

540

except KeyError:

541

# a non-existant lookup.

542

continue

543

if len(elements):

544

dicts = [key_dict]

545

while dicts:

546

key_dict = dicts.pop(-1)

547

# can't be empty or would not exist

548

item, value = key_dict.iteritems().next()

549

if type(value) == dict:

550

# push keys

551

dicts.extend(key_dict.itervalues())

552

else:

553

# yield keys

554

for value in key_dict.itervalues():

555

# each value is the key:value:node refs tuple

556

# ready to yield.

557

yield (self, ) + value

558

else:

559

# the last thing looked up was a terminal element

560

yield (self, ) + key_dict

561

562

def key_count(self):

563

"""Return an estimate of the number of keys in this index.

564

565

For GraphIndex the estimate is exact.

566

"""

567

if self._key_count is None:

568

self._read_and_parse([_HEADER_READV])

569

return self._key_count

570

571

def _lookup_keys_via_location(self, location_keys):

572

"""Public interface for implementing bisection.

573

574

If _buffer_all has been called, then all the data for the index is in

575

memory, and this method should not be called, as it uses a separate

576

cache because it cannot pre-resolve all indices, which buffer_all does

577

for performance.

578

579

:param location_keys: A list of location(byte offset), key tuples.

580

:return: A list of (location_key, result) tuples as expected by

581

bzrlib.bisect_multi.bisect_multi_bytes.

582

"""

583

# Possible improvements:

584

# - only bisect lookup each key once

585

# - sort the keys first, and use that to reduce the bisection window

586

# -----

587

# this progresses in three parts:

588

# read data

589

# parse it

590

# attempt to answer the question from the now in memory data.

591

# build the readv request

592

# for each location, ask for 800 bytes - much more than rows we've seen

593

# anywhere.

594

readv_ranges = []

595

for location, key in location_keys:

596

# can we answer from cache?

597

if self._bisect_nodes and key in self._bisect_nodes:

598

# We have the key parsed.

599

continue

600

index = self._parsed_key_index(key)

601

if (len(self._parsed_key_map) and

602

self._parsed_key_map[index][0] <= key and

603

(self._parsed_key_map[index][1] >= key or

604

# end of the file has been parsed

605

self._parsed_byte_map[index][1] == self._size)):

606

# the key has been parsed, so no lookup is needed even if its

607

# not present.

608

continue

609

# - if we have examined this part of the file already - yes

610

index = self._parsed_byte_index(location)

611

if (len(self._parsed_byte_map) and

612

self._parsed_byte_map[index][0] <= location and

613

self._parsed_byte_map[index][1] > location):

614

# the byte region has been parsed, so no read is needed.

615

continue

616

length = 800

617

if location + length > self._size:

618

length = self._size - location

619

# todo, trim out parsed locations.

620

if length > 0:

621

readv_ranges.append((location, length))

622

# read the header if needed

623

if self._bisect_nodes is None:

624

readv_ranges.append(_HEADER_READV)

625

self._read_and_parse(readv_ranges)

626

# generate results:

627

# - figure out <, >, missing, present

628

# - result present references so we can return them.

629

result = []

630

# keys that we cannot answer until we resolve references

631

pending_references = []

632

pending_locations = set()

633

for location, key in location_keys:

634

# can we answer from cache?

635

if key in self._bisect_nodes:

636

# the key has been parsed, so no lookup is needed

637

if self.node_ref_lists:

638

# the references may not have been all parsed.

639

value, refs = self._bisect_nodes[key]

640

wanted_locations = []

641

for ref_list in refs:

642

for ref in ref_list:

643

if ref not in self._keys_by_offset:

644

wanted_locations.append(ref)

645

if wanted_locations:

646

pending_locations.update(wanted_locations)

647

pending_references.append((location, key))

648

continue

649

result.append(((location, key), (self, key,

650

value, self._resolve_references(refs))))

651

else:

652

result.append(((location, key),

653

(self, key, self._bisect_nodes[key])))

654

continue

655

else:

656

# has the region the key should be in, been parsed?

657

index = self._parsed_key_index(key)

658

if (self._parsed_key_map[index][0] <= key and

659

(self._parsed_key_map[index][1] >= key or

660

# end of the file has been parsed

661

self._parsed_byte_map[index][1] == self._size)):

662

result.append(((location, key), False))

663

continue

664

# no, is the key above or below the probed location:

665

# get the range of the probed & parsed location

666

index = self._parsed_byte_index(location)

667

# if the key is below the start of the range, its below

668

if key < self._parsed_key_map[index][0]:

669

direction = -1

670

else:

671

direction = +1

672

result.append(((location, key), direction))

673

readv_ranges = []

674

# lookup data to resolve references

675

for location in pending_locations:

676

length = 800

677

if location + length > self._size:

678

length = self._size - location

679

# TODO: trim out parsed locations (e.g. if the 800 is into the

680

# parsed region trim it, and dont use the adjust_for_latency

681

# facility)

682

if length > 0:

683

readv_ranges.append((location, length))

684

self._read_and_parse(readv_ranges)

685

for location, key in pending_references:

686

# answer key references we had to look-up-late.

687

index = self._parsed_key_index(key)

688

value, refs = self._bisect_nodes[key]

689

result.append(((location, key), (self, key,

690

value, self._resolve_references(refs))))

691

return result

692

693

def _parse_header_from_bytes(self, bytes):

694

"""Parse the header from a region of bytes.

695

696

:param bytes: The data to parse.

697

:return: An offset, data tuple such as readv yields, for the unparsed

698

data. (which may length 0).

699

"""

700

signature = bytes[0:len(self._signature())]

701

if not signature == self._signature():

702

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

703

lines = bytes[len(self._signature()):].splitlines()

704

options_line = lines[0]

705

if not options_line.startswith(_OPTION_NODE_REFS):

706

raise errors.BadIndexOptions(self)

707

try:

708

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

709

except ValueError:

710

raise errors.BadIndexOptions(self)

711

options_line = lines[1]

712

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

713

raise errors.BadIndexOptions(self)

714

try:

715

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

716

except ValueError:

717

raise errors.BadIndexOptions(self)

718

options_line = lines[2]

719

if not options_line.startswith(_OPTION_LEN):

720

raise errors.BadIndexOptions(self)

721

try:

722

self._key_count = int(options_line[len(_OPTION_LEN):])

723

except ValueError:

724

raise errors.BadIndexOptions(self)

725

# calculate the bytes we have processed

726

header_end = (len(signature) + len(lines[0]) + len(lines[1]) +

727

len(lines[2]) + 3)

728

self._parsed_bytes(0, None, header_end, None)

729

# setup parsing state

730

self._expected_elements = 3 + self._key_length

731

# raw data keyed by offset

732

self._keys_by_offset = {}

733

# keys with the value and node references

734

self._bisect_nodes = {}

735

return header_end, bytes[header_end:]

736

737

def _parse_region(self, offset, data):

738

"""Parse node data returned from a readv operation.

739

740

:param offset: The byte offset the data starts at.

741

:param data: The data to parse.

742

"""

743

# trim the data.

744

# end first:

745

end = offset + len(data)

746

high_parsed = offset

747

while True:

748

# Trivial test - if the current index's end is within the

749

# low-matching parsed range, we're done.

750

index = self._parsed_byte_index(high_parsed)

751

if end < self._parsed_byte_map[index][1]:

752

return

753

# print "[%d:%d]" % (offset, end), \

754

# self._parsed_byte_map[index:index + 2]

755

high_parsed, last_segment = self._parse_segment(

756

offset, data, end, index)

757

if last_segment:

758

return

759

760

def _parse_segment(self, offset, data, end, index):

761

"""Parse one segment of data.

762

763

:param offset: Where 'data' begins in the file.

764

:param data: Some data to parse a segment of.

765

:param end: Where data ends

766

:param index: The current index into the parsed bytes map.

767

:return: True if the parsed segment is the last possible one in the

768

range of data.

769

:return: high_parsed_byte, last_segment.

770

high_parsed_byte is the location of the highest parsed byte in this

771

segment, last_segment is True if the parsed segment is the last

772

possible one in the data block.

773

"""

774

# default is to use all data

775

trim_end = None

776

# accomodate overlap with data before this.

777

if offset < self._parsed_byte_map[index][1]:

778

# overlaps the lower parsed region

779

# skip the parsed data

780

trim_start = self._parsed_byte_map[index][1] - offset

781

# don't trim the start for \n

782

start_adjacent = True

783

elif offset == self._parsed_byte_map[index][1]:

784

# abuts the lower parsed region

785

# use all data

786

trim_start = None

787

# do not trim anything

788

start_adjacent = True

789

else:

790

# does not overlap the lower parsed region

791

# use all data

792

trim_start = None

793

# but trim the leading \n

794

start_adjacent = False

795

if end == self._size:

796

# lines up to the end of all data:

797

# use it all

798

trim_end = None

799

# do not strip to the last \n

800

end_adjacent = True

801

last_segment = True

802

elif index + 1 == len(self._parsed_byte_map):

803

# at the end of the parsed data

804

# use it all

805

trim_end = None

806

# but strip to the last \n

807

end_adjacent = False

808

last_segment = True

809

elif end == self._parsed_byte_map[index + 1][0]:

810

# buts up against the next parsed region

811

# use it all

812

trim_end = None

813

# do not strip to the last \n

814

end_adjacent = True

815

last_segment = True

816

elif end > self._parsed_byte_map[index + 1][0]:

817

# overlaps into the next parsed region

818

# only consider the unparsed data

819

trim_end = self._parsed_byte_map[index + 1][0] - offset

820

# do not strip to the last \n as we know its an entire record

821

end_adjacent = True

822

last_segment = end < self._parsed_byte_map[index + 1][1]

823

else:

824

# does not overlap into the next region

825

# use it all

826

trim_end = None

827

# but strip to the last \n

828

end_adjacent = False

829

last_segment = True

830

# now find bytes to discard if needed

831

if not start_adjacent:

832

# work around python bug in rfind

833

if trim_start is None:

834

trim_start = data.find('\n') + 1

835

else:

836

trim_start = data.find('\n', trim_start) + 1

837

if not (trim_start != 0):

838

raise AssertionError('no \n was present')

839

# print 'removing start', offset, trim_start, repr(data[:trim_start])

840

if not end_adjacent:

841

# work around python bug in rfind

842

if trim_end is None:

843

trim_end = data.rfind('\n') + 1

844

else:

845

trim_end = data.rfind('\n', None, trim_end) + 1

846

if not (trim_end != 0):

847

raise AssertionError('no \n was present')

848

# print 'removing end', offset, trim_end, repr(data[trim_end:])

849

# adjust offset and data to the parseable data.

850

trimmed_data = data[trim_start:trim_end]

851

if not (trimmed_data):

852

raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'

853

% (trim_start, trim_end, offset, offset + len(data)))

854

if trim_start:

855

offset += trim_start

856

# print "parsing", repr(trimmed_data)

857

# splitlines mangles the \r delimiters.. don't use it.

858

lines = trimmed_data.split('\n')

859

del lines[-1]

860

pos = offset

861

first_key, last_key, nodes, _ = self._parse_lines(lines, pos)

862

for key, value in nodes:

863

self._bisect_nodes[key] = value

864

self._parsed_bytes(offset, first_key,

865

offset + len(trimmed_data), last_key)

866

return offset + len(trimmed_data), last_segment

867

868

def _parse_lines(self, lines, pos):

869

key = None

870

first_key = None

871

trailers = 0

872

nodes = []

873

for line in lines:

874

if line == '':

875

# must be at the end

876

if self._size:

877

if not (self._size == pos + 1):

878

raise AssertionError("%s %s" % (self._size, pos))

879

trailers += 1

880

continue

881

elements = line.split('\0')

882

if len(elements) != self._expected_elements:

883

raise errors.BadIndexData(self)

884

# keys are tuples. Each element is a string that may occur many

885

# times, so we intern them to save space. AB, RC, 200807

886

key = tuple(intern(element) for element in elements[:self._key_length])

887

if first_key is None:

888

first_key = key

889

absent, references, value = elements[-3:]

890

ref_lists = []

891

for ref_string in references.split('\t'):

892

ref_lists.append(tuple([

893

int(ref) for ref in ref_string.split('\r') if ref

894

]))

895

ref_lists = tuple(ref_lists)

896

self._keys_by_offset[pos] = (key, absent, ref_lists, value)

897

pos += len(line) + 1 # +1 for the \n

898

if absent:

899

continue

900

if self.node_ref_lists:

901

node_value = (value, ref_lists)

902

else:

903

node_value = value

904

nodes.append((key, node_value))

905

# print "parsed ", key

906

return first_key, key, nodes, trailers

907

908

def _parsed_bytes(self, start, start_key, end, end_key):

909

"""Mark the bytes from start to end as parsed.

910

911

Calling self._parsed_bytes(1,2) will mark one byte (the one at offset

912

1) as parsed.

913

914

:param start: The start of the parsed region.

915

:param end: The end of the parsed region.

916

"""

917

index = self._parsed_byte_index(start)

918

new_value = (start, end)

919

new_key = (start_key, end_key)

920

if index == -1:

921

# first range parsed is always the beginning.

922

self._parsed_byte_map.insert(index, new_value)

923

self._parsed_key_map.insert(index, new_key)

924

return

925

# four cases:

926

# new region

927

# extend lower region

928

# extend higher region

929

# combine two regions

930

if (index + 1 < len(self._parsed_byte_map) and

931

self._parsed_byte_map[index][1] == start and

932

self._parsed_byte_map[index + 1][0] == end):

933

# combine two regions

934

self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],

935

self._parsed_byte_map[index + 1][1])

936

self._parsed_key_map[index] = (self._parsed_key_map[index][0],

937

self._parsed_key_map[index + 1][1])

938

del self._parsed_byte_map[index + 1]

939

del self._parsed_key_map[index + 1]

940

elif self._parsed_byte_map[index][1] == start:

941

# extend the lower entry

942

self._parsed_byte_map[index] = (

943

self._parsed_byte_map[index][0], end)

944

self._parsed_key_map[index] = (

945

self._parsed_key_map[index][0], end_key)

946

elif (index + 1 < len(self._parsed_byte_map) and

947

self._parsed_byte_map[index + 1][0] == end):

948

# extend the higher entry

949

self._parsed_byte_map[index + 1] = (

950

start, self._parsed_byte_map[index + 1][1])

951

self._parsed_key_map[index + 1] = (

952

start_key, self._parsed_key_map[index + 1][1])

953

else:

954

# new entry

955

self._parsed_byte_map.insert(index + 1, new_value)

956

self._parsed_key_map.insert(index + 1, new_key)

957

958

def _read_and_parse(self, readv_ranges):

959

"""Read the the ranges and parse the resulting data.

960

961

:param readv_ranges: A prepared readv range list.

962

"""

963

if readv_ranges:

964

readv_data = self._transport.readv(self._name, readv_ranges, True,

965

self._size)

966

# parse

967

for offset, data in readv_data:

968

if self._bisect_nodes is None:

969

# this must be the start

970

if not (offset == 0):

971

raise AssertionError()

972

offset, data = self._parse_header_from_bytes(data)

973

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

974

self._parse_region(offset, data)

975

976

def _signature(self):

977

"""The file signature for this index type."""

978

return _SIGNATURE

979

980

def validate(self):

981

"""Validate that everything in the index can be accessed."""

982

# iter_all validates completely at the moment, so just do that.

983

for node in self.iter_all_entries():

984

pass

985

986

987

class CombinedGraphIndex(object):

988

"""A GraphIndex made up from smaller GraphIndices.

989

990

The backing indices must implement GraphIndex, and are presumed to be

991

static data.

992

993

Queries against the combined index will be made against the first index,

994

and then the second and so on. The order of index's can thus influence

995

performance significantly. For example, if one index is on local disk and a

996

second on a remote server, the local disk index should be before the other

997

in the index list.

998

"""

999

1000

def __init__(self, indices):

1001

"""Create a CombinedGraphIndex backed by indices.

1002

1003

:param indices: An ordered list of indices to query for data.

1004

"""

1005

self._indices = indices

1006

1007

def __repr__(self):

1008

return "%s(%s)" % (

1009

self.__class__.__name__,

1010

', '.join(map(repr, self._indices)))

1011

1012

@symbol_versioning.deprecated_method(symbol_versioning.one_one)

1013

def get_parents(self, revision_ids):

1014

"""See graph._StackedParentsProvider.get_parents.

1015

1016

This implementation thunks the graph.Graph.get_parents api across to

1017

GraphIndex.

1018

1019

:param revision_ids: An iterable of graph keys for this graph.

1020

:return: A list of parent details for each key in revision_ids.

1021

Each parent details will be one of:

1022

* None when the key was missing

1023

* (NULL_REVISION,) when the key has no parents.

1024

* (parent_key, parent_key...) otherwise.

1025

"""

1026

parent_map = self.get_parent_map(revision_ids)

1027

return [parent_map.get(r, None) for r in revision_ids]

1028

1029

def get_parent_map(self, keys):

1030

"""See graph._StackedParentsProvider.get_parent_map"""

1031

search_keys = set(keys)

1032

if NULL_REVISION in search_keys:

1033

search_keys.discard(NULL_REVISION)

1034

found_parents = {NULL_REVISION:[]}

1035

else:

1036

found_parents = {}

1037

for index, key, value, refs in self.iter_entries(search_keys):

1038

parents = refs[0]

1039

if not parents:

1040

parents = (NULL_REVISION,)

1041

found_parents[key] = parents

1042

return found_parents

1043

1044

def insert_index(self, pos, index):

1045

"""Insert a new index in the list of indices to query.

1046

1047

:param pos: The position to insert the index.

1048

:param index: The index to insert.

1049

"""

1050

self._indices.insert(pos, index)

1051

1052

def iter_all_entries(self):

1053

"""Iterate over all keys within the index

1054

1055

Duplicate keys across child indices are presumed to have the same

1056

value and are only reported once.

1057

1058

:return: An iterable of (index, key, reference_lists, value).

1059

There is no defined order for the result iteration - it will be in

1060

the most efficient order for the index.

1061

"""

1062

seen_keys = set()

1063

for index in self._indices:

1064

for node in index.iter_all_entries():

1065

if node[1] not in seen_keys:

1066

yield node

1067

seen_keys.add(node[1])

1068

1069

def iter_entries(self, keys):

1070

"""Iterate over keys within the index.

1071

1072

Duplicate keys across child indices are presumed to have the same

1073

value and are only reported once.

1074

1075

:param keys: An iterable providing the keys to be retrieved.

1076

:return: An iterable of (index, key, reference_lists, value). There is no

1077

defined order for the result iteration - it will be in the most

1078

efficient order for the index.

1079

"""

1080

keys = set(keys)

1081

for index in self._indices:

1082

if not keys:

1083

return

1084

for node in index.iter_entries(keys):

1085

keys.remove(node[1])

1086

yield node

1087

1088

def iter_entries_prefix(self, keys):

1089

"""Iterate over keys within the index using prefix matching.

1090

1091

Duplicate keys across child indices are presumed to have the same

1092

value and are only reported once.

1093

1094

Prefix matching is applied within the tuple of a key, not to within

1095

the bytestring of each key element. e.g. if you have the keys ('foo',

1096

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1097

only the former key is returned.

1098

1099

:param keys: An iterable providing the key prefixes to be retrieved.

1100

Each key prefix takes the form of a tuple the length of a key, but

1101

with the last N elements 'None' rather than a regular bytestring.

1102

The first element cannot be 'None'.

1103

:return: An iterable as per iter_all_entries, but restricted to the

1104

keys with a matching prefix to those supplied. No additional keys

1105

will be returned, and every match that is in the index will be

1106

returned.

1107

"""

1108

keys = set(keys)

1109

if not keys:

1110

return

1111

seen_keys = set()

1112

for index in self._indices:

1113

for node in index.iter_entries_prefix(keys):

1114

if node[1] in seen_keys:

1115

continue

1116

seen_keys.add(node[1])

1117

yield node

1118

1119

def key_count(self):

1120

"""Return an estimate of the number of keys in this index.

1121

1122

For CombinedGraphIndex this is approximated by the sum of the keys of

1123

the child indices. As child indices may have duplicate keys this can

1124

have a maximum error of the number of child indices * largest number of

1125

keys in any index.

1126

"""

1127

return sum((index.key_count() for index in self._indices), 0)

1128

1129

def validate(self):

1130

"""Validate that everything in the index can be accessed."""

1131

for index in self._indices:

1132

index.validate()

1133

1134

1135

class InMemoryGraphIndex(GraphIndexBuilder):

1136

"""A GraphIndex which operates entirely out of memory and is mutable.

1137

1138

This is designed to allow the accumulation of GraphIndex entries during a

1139

single write operation, where the accumulated entries need to be immediately

1140

available - for example via a CombinedGraphIndex.

1141

"""

1142

1143

def add_nodes(self, nodes):

1144

"""Add nodes to the index.

1145

1146

:param nodes: An iterable of (key, node_refs, value) entries to add.

1147

"""

1148

if self.reference_lists:

1149

for (key, value, node_refs) in nodes:

1150

self.add_node(key, value, node_refs)

1151

else:

1152

for (key, value) in nodes:

1153

self.add_node(key, value)

1154

1155

def iter_all_entries(self):

1156

"""Iterate over all keys within the index

1157

1158

:return: An iterable of (index, key, reference_lists, value). There is no

1159

defined order for the result iteration - it will be in the most

1160

efficient order for the index (in this case dictionary hash order).

1161

"""

1162

if 'evil' in debug.debug_flags:

1163

trace.mutter_callsite(3,

1164

"iter_all_entries scales with size of history.")

1165

if self.reference_lists:

1166

for key, (absent, references, value) in self._nodes.iteritems():

1167

if not absent:

1168

yield self, key, value, references

1169

else:

1170

for key, (absent, references, value) in self._nodes.iteritems():

1171

if not absent:

1172

yield self, key, value

1173

1174

def iter_entries(self, keys):

1175

"""Iterate over keys within the index.

1176

1177

:param keys: An iterable providing the keys to be retrieved.

1178

:return: An iterable of (index, key, value, reference_lists). There is no

1179

defined order for the result iteration - it will be in the most

1180

efficient order for the index (keys iteration order in this case).

1181

"""

1182

keys = set(keys)

1183

if self.reference_lists:

1184

for key in keys.intersection(self._keys):

1185

node = self._nodes[key]

1186

if not node[0]:

1187

yield self, key, node[2], node[1]

1188

else:

1189

for key in keys.intersection(self._keys):

1190

node = self._nodes[key]

1191

if not node[0]:

1192

yield self, key, node[2]

1193

1194

def iter_entries_prefix(self, keys):

1195

"""Iterate over keys within the index using prefix matching.

1196

1197

Prefix matching is applied within the tuple of a key, not to within

1198

the bytestring of each key element. e.g. if you have the keys ('foo',

1199

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1200

only the former key is returned.

1201

1202

:param keys: An iterable providing the key prefixes to be retrieved.

1203

Each key prefix takes the form of a tuple the length of a key, but

1204

with the last N elements 'None' rather than a regular bytestring.

1205

The first element cannot be 'None'.

1206

:return: An iterable as per iter_all_entries, but restricted to the

1207

keys with a matching prefix to those supplied. No additional keys

1208

will be returned, and every match that is in the index will be

1209

returned.

1210

"""

1211

# XXX: To much duplication with the GraphIndex class; consider finding

1212

# a good place to pull out the actual common logic.

1213

keys = set(keys)

1214

if not keys:

1215

return

1216

if self._key_length == 1:

1217

for key in keys:

1218

# sanity check

1219

if key[0] is None:

1220

raise errors.BadIndexKey(key)

1221

if len(key) != self._key_length:

1222

raise errors.BadIndexKey(key)

1223

node = self._nodes[key]

1224

if node[0]:

1225

continue

1226

if self.reference_lists:

1227

yield self, key, node[2], node[1]

1228

else:

1229

yield self, key, node[2]

1230

return

1231

for key in keys:

1232

# sanity check

1233

if key[0] is None:

1234

raise errors.BadIndexKey(key)

1235

if len(key) != self._key_length:

1236

raise errors.BadIndexKey(key)

1237

# find what it refers to:

1238

key_dict = self._nodes_by_key

1239

elements = list(key)

1240

# find the subdict to return

1241

try:

1242

while len(elements) and elements[0] is not None:

1243

key_dict = key_dict[elements[0]]

1244

elements.pop(0)

1245

except KeyError:

1246

# a non-existant lookup.

1247

continue

1248

if len(elements):

1249

dicts = [key_dict]

1250

while dicts:

1251

key_dict = dicts.pop(-1)

1252

# can't be empty or would not exist

1253

item, value = key_dict.iteritems().next()

1254

if type(value) == dict:

1255

# push keys

1256

dicts.extend(key_dict.itervalues())

1257

else:

1258

# yield keys

1259

for value in key_dict.itervalues():

1260

yield (self, ) + value

1261

else:

1262

yield (self, ) + key_dict

1263

1264

def key_count(self):

1265

"""Return an estimate of the number of keys in this index.

1266

1267

For InMemoryGraphIndex the estimate is exact.

1268

"""

1269

return len(self._keys)

1270

1271

def validate(self):

1272

"""In memory index's have no known corruption at the moment."""

1273

1274

1275

class GraphIndexPrefixAdapter(object):

1276

"""An adapter between GraphIndex with different key lengths.

1277

1278

Queries against this will emit queries against the adapted Graph with the

1279

prefix added, queries for all items use iter_entries_prefix. The returned

1280

nodes will have their keys and node references adjusted to remove the

1281

prefix. Finally, an add_nodes_callback can be supplied - when called the

1282

nodes and references being added will have prefix prepended.

1283

"""

1284

1285

def __init__(self, adapted, prefix, missing_key_length,

1286

add_nodes_callback=None):

1287

"""Construct an adapter against adapted with prefix."""

1288

self.adapted = adapted

1289

self.prefix_key = prefix + (None,)*missing_key_length

1290

self.prefix = prefix

1291

self.prefix_len = len(prefix)

1292

self.add_nodes_callback = add_nodes_callback

1293

1294

def add_nodes(self, nodes):

1295

"""Add nodes to the index.

1296

1297

:param nodes: An iterable of (key, node_refs, value) entries to add.

1298

"""

1299

# save nodes in case its an iterator

1300

nodes = tuple(nodes)

1301

translated_nodes = []

1302

try:

1303

# Add prefix_key to each reference node_refs is a tuple of tuples,

1304

# so split it apart, and add prefix_key to the internal reference

1305

for (key, value, node_refs) in nodes:

1306

adjusted_references = (

1307

tuple(tuple(self.prefix + ref_node for ref_node in ref_list)

1308

for ref_list in node_refs))

1309

translated_nodes.append((self.prefix + key, value,

1310

adjusted_references))

1311

except ValueError:

1312

# XXX: TODO add an explicit interface for getting the reference list

1313

# status, to handle this bit of user-friendliness in the API more

1314

# explicitly.

1315

for (key, value) in nodes:

1316

translated_nodes.append((self.prefix + key, value))

1317

self.add_nodes_callback(translated_nodes)

1318

1319

def add_node(self, key, value, references=()):

1320

"""Add a node to the index.

1321

1322

:param key: The key. keys are non-empty tuples containing

1323

as many whitespace-free utf8 bytestrings as the key length

1324

defined for this index.

1325

:param references: An iterable of iterables of keys. Each is a

1326

reference to another key.

1327

:param value: The value to associate with the key. It may be any

1328

bytes as long as it does not contain \0 or \n.

1329

"""

1330

self.add_nodes(((key, value, references), ))

1331

1332

def _strip_prefix(self, an_iter):

1333

"""Strip prefix data from nodes and return it."""

1334

for node in an_iter:

1335

# cross checks

1336

if node[1][:self.prefix_len] != self.prefix:

1337

raise errors.BadIndexData(self)

1338

for ref_list in node[3]:

1339

for ref_node in ref_list:

1340

if ref_node[:self.prefix_len] != self.prefix:

1341

raise errors.BadIndexData(self)

1342

yield node[0], node[1][self.prefix_len:], node[2], (

1343

tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)

1344

for ref_list in node[3]))

1345

1346

def iter_all_entries(self):

1347

"""Iterate over all keys within the index

1348

1349

iter_all_entries is implemented against the adapted index using

1350

iter_entries_prefix.

1351

1352

:return: An iterable of (index, key, reference_lists, value). There is no

1353

defined order for the result iteration - it will be in the most

1354

efficient order for the index (in this case dictionary hash order).

1355

"""

1356

return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))

1357

1358

def iter_entries(self, keys):

1359

"""Iterate over keys within the index.

1360

1361

:param keys: An iterable providing the keys to be retrieved.

1362

:return: An iterable of (index, key, value, reference_lists). There is no

1363

defined order for the result iteration - it will be in the most

1364

efficient order for the index (keys iteration order in this case).

1365

"""

1366

return self._strip_prefix(self.adapted.iter_entries(

1367

self.prefix + key for key in keys))

1368

1369

def iter_entries_prefix(self, keys):

1370

"""Iterate over keys within the index using prefix matching.

1371

1372

Prefix matching is applied within the tuple of a key, not to within

1373

the bytestring of each key element. e.g. if you have the keys ('foo',

1374

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1375

only the former key is returned.

1376

1377

:param keys: An iterable providing the key prefixes to be retrieved.

1378

Each key prefix takes the form of a tuple the length of a key, but

1379

with the last N elements 'None' rather than a regular bytestring.

1380

The first element cannot be 'None'.

1381

:return: An iterable as per iter_all_entries, but restricted to the

1382

keys with a matching prefix to those supplied. No additional keys

1383

will be returned, and every match that is in the index will be

1384

returned.

1385

"""

1386

return self._strip_prefix(self.adapted.iter_entries_prefix(

1387

self.prefix + key for key in keys))

1388

1389

def key_count(self):

1390

"""Return an estimate of the number of keys in this index.

1391

1392

For GraphIndexPrefixAdapter this is relatively expensive - key

1393

iteration with the prefix is done.

1394

"""

1395

return len(list(self.iter_all_entries()))

1396

1397

def validate(self):

1398

"""Call the adapted's validate."""

1399

self.adapted.validate()

Older »